1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 #include <sys/mman.h> 49 50 #include <vm/vm.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/vm_pager.h> 55 56 #include <machine/stdarg.h> 57 58 #if defined(__i386__) || defined(__amd64__) 59 #include <machine/md_var.h> 60 #endif 61 62 #include <linux/kobject.h> 63 #include <linux/device.h> 64 #include <linux/slab.h> 65 #include <linux/module.h> 66 #include <linux/moduleparam.h> 67 #include <linux/cdev.h> 68 #include <linux/file.h> 69 #include <linux/sysfs.h> 70 #include <linux/mm.h> 71 #include <linux/io.h> 72 #include <linux/vmalloc.h> 73 #include <linux/netdevice.h> 74 #include <linux/timer.h> 75 #include <linux/interrupt.h> 76 #include <linux/uaccess.h> 77 #include <linux/list.h> 78 #include <linux/kthread.h> 79 #include <linux/kernel.h> 80 #include <linux/compat.h> 81 #include <linux/poll.h> 82 #include <linux/smp.h> 83 84 #if defined(__i386__) || defined(__amd64__) 85 #include <asm/smp.h> 86 #endif 87 88 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 89 90 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 91 92 #include <linux/rbtree.h> 93 /* Undo Linux compat changes. */ 94 #undef RB_ROOT 95 #undef file 96 #undef cdev 97 #define RB_ROOT(head) (head)->rbh_root 98 99 static struct vm_area_struct *linux_cdev_handle_find(void *handle); 100 101 struct kobject linux_class_root; 102 struct device linux_root_device; 103 struct class linux_class_misc; 104 struct list_head pci_drivers; 105 struct list_head pci_devices; 106 spinlock_t pci_lock; 107 108 unsigned long linux_timer_hz_mask; 109 110 int 111 panic_cmp(struct rb_node *one, struct rb_node *two) 112 { 113 panic("no cmp"); 114 } 115 116 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 117 118 int 119 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 120 { 121 va_list tmp_va; 122 int len; 123 char *old; 124 char *name; 125 char dummy; 126 127 old = kobj->name; 128 129 if (old && fmt == NULL) 130 return (0); 131 132 /* compute length of string */ 133 va_copy(tmp_va, args); 134 len = vsnprintf(&dummy, 0, fmt, tmp_va); 135 va_end(tmp_va); 136 137 /* account for zero termination */ 138 len++; 139 140 /* check for error */ 141 if (len < 1) 142 return (-EINVAL); 143 144 /* allocate memory for string */ 145 name = kzalloc(len, GFP_KERNEL); 146 if (name == NULL) 147 return (-ENOMEM); 148 vsnprintf(name, len, fmt, args); 149 kobj->name = name; 150 151 /* free old string */ 152 kfree(old); 153 154 /* filter new string */ 155 for (; *name != '\0'; name++) 156 if (*name == '/') 157 *name = '!'; 158 return (0); 159 } 160 161 int 162 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 163 { 164 va_list args; 165 int error; 166 167 va_start(args, fmt); 168 error = kobject_set_name_vargs(kobj, fmt, args); 169 va_end(args); 170 171 return (error); 172 } 173 174 static int 175 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 176 { 177 const struct kobj_type *t; 178 int error; 179 180 kobj->parent = parent; 181 error = sysfs_create_dir(kobj); 182 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 183 struct attribute **attr; 184 t = kobj->ktype; 185 186 for (attr = t->default_attrs; *attr != NULL; attr++) { 187 error = sysfs_create_file(kobj, *attr); 188 if (error) 189 break; 190 } 191 if (error) 192 sysfs_remove_dir(kobj); 193 194 } 195 return (error); 196 } 197 198 int 199 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 200 { 201 va_list args; 202 int error; 203 204 va_start(args, fmt); 205 error = kobject_set_name_vargs(kobj, fmt, args); 206 va_end(args); 207 if (error) 208 return (error); 209 210 return kobject_add_complete(kobj, parent); 211 } 212 213 void 214 linux_kobject_release(struct kref *kref) 215 { 216 struct kobject *kobj; 217 char *name; 218 219 kobj = container_of(kref, struct kobject, kref); 220 sysfs_remove_dir(kobj); 221 name = kobj->name; 222 if (kobj->ktype && kobj->ktype->release) 223 kobj->ktype->release(kobj); 224 kfree(name); 225 } 226 227 static void 228 linux_kobject_kfree(struct kobject *kobj) 229 { 230 kfree(kobj); 231 } 232 233 static void 234 linux_kobject_kfree_name(struct kobject *kobj) 235 { 236 if (kobj) { 237 kfree(kobj->name); 238 } 239 } 240 241 const struct kobj_type linux_kfree_type = { 242 .release = linux_kobject_kfree 243 }; 244 245 static void 246 linux_device_release(struct device *dev) 247 { 248 pr_debug("linux_device_release: %s\n", dev_name(dev)); 249 kfree(dev); 250 } 251 252 static ssize_t 253 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 254 { 255 struct class_attribute *dattr; 256 ssize_t error; 257 258 dattr = container_of(attr, struct class_attribute, attr); 259 error = -EIO; 260 if (dattr->show) 261 error = dattr->show(container_of(kobj, struct class, kobj), 262 dattr, buf); 263 return (error); 264 } 265 266 static ssize_t 267 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 268 size_t count) 269 { 270 struct class_attribute *dattr; 271 ssize_t error; 272 273 dattr = container_of(attr, struct class_attribute, attr); 274 error = -EIO; 275 if (dattr->store) 276 error = dattr->store(container_of(kobj, struct class, kobj), 277 dattr, buf, count); 278 return (error); 279 } 280 281 static void 282 linux_class_release(struct kobject *kobj) 283 { 284 struct class *class; 285 286 class = container_of(kobj, struct class, kobj); 287 if (class->class_release) 288 class->class_release(class); 289 } 290 291 static const struct sysfs_ops linux_class_sysfs = { 292 .show = linux_class_show, 293 .store = linux_class_store, 294 }; 295 296 const struct kobj_type linux_class_ktype = { 297 .release = linux_class_release, 298 .sysfs_ops = &linux_class_sysfs 299 }; 300 301 static void 302 linux_dev_release(struct kobject *kobj) 303 { 304 struct device *dev; 305 306 dev = container_of(kobj, struct device, kobj); 307 /* This is the precedence defined by linux. */ 308 if (dev->release) 309 dev->release(dev); 310 else if (dev->class && dev->class->dev_release) 311 dev->class->dev_release(dev); 312 } 313 314 static ssize_t 315 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 316 { 317 struct device_attribute *dattr; 318 ssize_t error; 319 320 dattr = container_of(attr, struct device_attribute, attr); 321 error = -EIO; 322 if (dattr->show) 323 error = dattr->show(container_of(kobj, struct device, kobj), 324 dattr, buf); 325 return (error); 326 } 327 328 static ssize_t 329 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 330 size_t count) 331 { 332 struct device_attribute *dattr; 333 ssize_t error; 334 335 dattr = container_of(attr, struct device_attribute, attr); 336 error = -EIO; 337 if (dattr->store) 338 error = dattr->store(container_of(kobj, struct device, kobj), 339 dattr, buf, count); 340 return (error); 341 } 342 343 static const struct sysfs_ops linux_dev_sysfs = { 344 .show = linux_dev_show, 345 .store = linux_dev_store, 346 }; 347 348 const struct kobj_type linux_dev_ktype = { 349 .release = linux_dev_release, 350 .sysfs_ops = &linux_dev_sysfs 351 }; 352 353 struct device * 354 device_create(struct class *class, struct device *parent, dev_t devt, 355 void *drvdata, const char *fmt, ...) 356 { 357 struct device *dev; 358 va_list args; 359 360 dev = kzalloc(sizeof(*dev), M_WAITOK); 361 dev->parent = parent; 362 dev->class = class; 363 dev->devt = devt; 364 dev->driver_data = drvdata; 365 dev->release = linux_device_release; 366 va_start(args, fmt); 367 kobject_set_name_vargs(&dev->kobj, fmt, args); 368 va_end(args); 369 device_register(dev); 370 371 return (dev); 372 } 373 374 int 375 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 376 struct kobject *parent, const char *fmt, ...) 377 { 378 va_list args; 379 int error; 380 381 kobject_init(kobj, ktype); 382 kobj->ktype = ktype; 383 kobj->parent = parent; 384 kobj->name = NULL; 385 386 va_start(args, fmt); 387 error = kobject_set_name_vargs(kobj, fmt, args); 388 va_end(args); 389 if (error) 390 return (error); 391 return kobject_add_complete(kobj, parent); 392 } 393 394 static void 395 linux_kq_lock(void *arg) 396 { 397 spinlock_t *s = arg; 398 399 spin_lock(s); 400 } 401 static void 402 linux_kq_unlock(void *arg) 403 { 404 spinlock_t *s = arg; 405 406 spin_unlock(s); 407 } 408 409 static void 410 linux_kq_lock_owned(void *arg) 411 { 412 #ifdef INVARIANTS 413 spinlock_t *s = arg; 414 415 mtx_assert(&s->m, MA_OWNED); 416 #endif 417 } 418 419 static void 420 linux_kq_lock_unowned(void *arg) 421 { 422 #ifdef INVARIANTS 423 spinlock_t *s = arg; 424 425 mtx_assert(&s->m, MA_NOTOWNED); 426 #endif 427 } 428 429 static void 430 linux_file_kqfilter_poll(struct linux_file *, int); 431 432 struct linux_file * 433 linux_file_alloc(void) 434 { 435 struct linux_file *filp; 436 437 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 438 439 /* set initial refcount */ 440 filp->f_count = 1; 441 442 /* setup fields needed by kqueue support */ 443 spin_lock_init(&filp->f_kqlock); 444 knlist_init(&filp->f_selinfo.si_note, &filp->f_kqlock, 445 linux_kq_lock, linux_kq_unlock, 446 linux_kq_lock_owned, linux_kq_lock_unowned); 447 448 return (filp); 449 } 450 451 void 452 linux_file_free(struct linux_file *filp) 453 { 454 if (filp->_file == NULL) { 455 if (filp->f_shmem != NULL) 456 vm_object_deallocate(filp->f_shmem); 457 kfree(filp); 458 } else { 459 /* 460 * The close method of the character device or file 461 * will free the linux_file structure: 462 */ 463 _fdrop(filp->_file, curthread); 464 } 465 } 466 467 static int 468 linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 469 vm_page_t *mres) 470 { 471 struct vm_area_struct *vmap; 472 473 vmap = linux_cdev_handle_find(vm_obj->handle); 474 475 MPASS(vmap != NULL); 476 MPASS(vmap->vm_private_data == vm_obj->handle); 477 478 if (likely(vmap->vm_ops != NULL && offset < vmap->vm_len)) { 479 vm_paddr_t paddr = IDX_TO_OFF(vmap->vm_pfn) + offset; 480 vm_page_t page; 481 482 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 483 /* 484 * If the passed in result page is a fake 485 * page, update it with the new physical 486 * address. 487 */ 488 page = *mres; 489 vm_page_updatefake(page, paddr, vm_obj->memattr); 490 } else { 491 /* 492 * Replace the passed in "mres" page with our 493 * own fake page and free up the all of the 494 * original pages. 495 */ 496 VM_OBJECT_WUNLOCK(vm_obj); 497 page = vm_page_getfake(paddr, vm_obj->memattr); 498 VM_OBJECT_WLOCK(vm_obj); 499 500 vm_page_replace_checked(page, vm_obj, 501 (*mres)->pindex, *mres); 502 503 vm_page_lock(*mres); 504 vm_page_free(*mres); 505 vm_page_unlock(*mres); 506 *mres = page; 507 } 508 page->valid = VM_PAGE_BITS_ALL; 509 return (VM_PAGER_OK); 510 } 511 return (VM_PAGER_FAIL); 512 } 513 514 static int 515 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, 516 vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 517 { 518 struct vm_area_struct *vmap; 519 int err; 520 521 linux_set_current(curthread); 522 523 /* get VM area structure */ 524 vmap = linux_cdev_handle_find(vm_obj->handle); 525 MPASS(vmap != NULL); 526 MPASS(vmap->vm_private_data == vm_obj->handle); 527 528 VM_OBJECT_WUNLOCK(vm_obj); 529 530 down_write(&vmap->vm_mm->mmap_sem); 531 if (unlikely(vmap->vm_ops == NULL)) { 532 err = VM_FAULT_SIGBUS; 533 } else { 534 struct vm_fault vmf; 535 536 /* fill out VM fault structure */ 537 vmf.virtual_address = (void *)((uintptr_t)pidx << PAGE_SHIFT); 538 vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 539 vmf.pgoff = 0; 540 vmf.page = NULL; 541 542 vmap->vm_pfn_count = 0; 543 vmap->vm_pfn_pcount = &vmap->vm_pfn_count; 544 vmap->vm_obj = vm_obj; 545 546 err = vmap->vm_ops->fault(vmap, &vmf); 547 548 while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) { 549 kern_yield(PRI_USER); 550 err = vmap->vm_ops->fault(vmap, &vmf); 551 } 552 } 553 554 /* translate return code */ 555 switch (err) { 556 case VM_FAULT_OOM: 557 err = VM_PAGER_AGAIN; 558 break; 559 case VM_FAULT_SIGBUS: 560 err = VM_PAGER_BAD; 561 break; 562 case VM_FAULT_NOPAGE: 563 /* 564 * By contract the fault handler will return having 565 * busied all the pages itself. If pidx is already 566 * found in the object, it will simply xbusy the first 567 * page and return with vm_pfn_count set to 1. 568 */ 569 *first = vmap->vm_pfn_first; 570 *last = *first + vmap->vm_pfn_count - 1; 571 err = VM_PAGER_OK; 572 break; 573 default: 574 err = VM_PAGER_ERROR; 575 break; 576 } 577 up_write(&vmap->vm_mm->mmap_sem); 578 VM_OBJECT_WLOCK(vm_obj); 579 return (err); 580 } 581 582 static struct rwlock linux_vma_lock; 583 static TAILQ_HEAD(, vm_area_struct) linux_vma_head = 584 TAILQ_HEAD_INITIALIZER(linux_vma_head); 585 586 static void 587 linux_cdev_handle_free(struct vm_area_struct *vmap) 588 { 589 /* Drop reference on vm_file */ 590 if (vmap->vm_file != NULL) 591 fput(vmap->vm_file); 592 593 /* Drop reference on mm_struct */ 594 mmput(vmap->vm_mm); 595 596 kfree(vmap); 597 } 598 599 static void 600 linux_cdev_handle_remove(struct vm_area_struct *vmap) 601 { 602 rw_wlock(&linux_vma_lock); 603 TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry); 604 rw_wunlock(&linux_vma_lock); 605 } 606 607 static struct vm_area_struct * 608 linux_cdev_handle_find(void *handle) 609 { 610 struct vm_area_struct *vmap; 611 612 rw_rlock(&linux_vma_lock); 613 TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) { 614 if (vmap->vm_private_data == handle) 615 break; 616 } 617 rw_runlock(&linux_vma_lock); 618 return (vmap); 619 } 620 621 static int 622 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 623 vm_ooffset_t foff, struct ucred *cred, u_short *color) 624 { 625 626 MPASS(linux_cdev_handle_find(handle) != NULL); 627 *color = 0; 628 return (0); 629 } 630 631 static void 632 linux_cdev_pager_dtor(void *handle) 633 { 634 const struct vm_operations_struct *vm_ops; 635 struct vm_area_struct *vmap; 636 637 vmap = linux_cdev_handle_find(handle); 638 MPASS(vmap != NULL); 639 640 /* 641 * Remove handle before calling close operation to prevent 642 * other threads from reusing the handle pointer. 643 */ 644 linux_cdev_handle_remove(vmap); 645 646 down_write(&vmap->vm_mm->mmap_sem); 647 vm_ops = vmap->vm_ops; 648 if (likely(vm_ops != NULL)) 649 vm_ops->close(vmap); 650 up_write(&vmap->vm_mm->mmap_sem); 651 652 linux_cdev_handle_free(vmap); 653 } 654 655 static struct cdev_pager_ops linux_cdev_pager_ops[2] = { 656 { 657 /* OBJT_MGTDEVICE */ 658 .cdev_pg_populate = linux_cdev_pager_populate, 659 .cdev_pg_ctor = linux_cdev_pager_ctor, 660 .cdev_pg_dtor = linux_cdev_pager_dtor 661 }, 662 { 663 /* OBJT_DEVICE */ 664 .cdev_pg_fault = linux_cdev_pager_fault, 665 .cdev_pg_ctor = linux_cdev_pager_ctor, 666 .cdev_pg_dtor = linux_cdev_pager_dtor 667 }, 668 }; 669 670 #define OPW(fp,td,code) ({ \ 671 struct file *__fpop; \ 672 __typeof(code) __retval; \ 673 \ 674 __fpop = (td)->td_fpop; \ 675 (td)->td_fpop = (fp); \ 676 __retval = (code); \ 677 (td)->td_fpop = __fpop; \ 678 __retval; \ 679 }) 680 681 static int 682 linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file) 683 { 684 struct linux_cdev *ldev; 685 struct linux_file *filp; 686 int error; 687 688 ldev = dev->si_drv1; 689 690 filp = linux_file_alloc(); 691 filp->f_dentry = &filp->f_dentry_store; 692 filp->f_op = ldev->ops; 693 filp->f_mode = file->f_flag; 694 filp->f_flags = file->f_flag; 695 filp->f_vnode = file->f_vnode; 696 filp->_file = file; 697 698 linux_set_current(td); 699 700 if (filp->f_op->open) { 701 error = -filp->f_op->open(file->f_vnode, filp); 702 if (error) { 703 kfree(filp); 704 return (error); 705 } 706 } 707 708 /* hold on to the vnode - used for fstat() */ 709 vhold(filp->f_vnode); 710 711 /* release the file from devfs */ 712 finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); 713 return (ENXIO); 714 } 715 716 #define LINUX_IOCTL_MIN_PTR 0x10000UL 717 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 718 719 static inline int 720 linux_remap_address(void **uaddr, size_t len) 721 { 722 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 723 724 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 725 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 726 struct task_struct *pts = current; 727 if (pts == NULL) { 728 *uaddr = NULL; 729 return (1); 730 } 731 732 /* compute data offset */ 733 uaddr_val -= LINUX_IOCTL_MIN_PTR; 734 735 /* check that length is within bounds */ 736 if ((len > IOCPARM_MAX) || 737 (uaddr_val + len) > pts->bsd_ioctl_len) { 738 *uaddr = NULL; 739 return (1); 740 } 741 742 /* re-add kernel buffer address */ 743 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 744 745 /* update address location */ 746 *uaddr = (void *)uaddr_val; 747 return (1); 748 } 749 return (0); 750 } 751 752 int 753 linux_copyin(const void *uaddr, void *kaddr, size_t len) 754 { 755 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 756 if (uaddr == NULL) 757 return (-EFAULT); 758 memcpy(kaddr, uaddr, len); 759 return (0); 760 } 761 return (-copyin(uaddr, kaddr, len)); 762 } 763 764 int 765 linux_copyout(const void *kaddr, void *uaddr, size_t len) 766 { 767 if (linux_remap_address(&uaddr, len)) { 768 if (uaddr == NULL) 769 return (-EFAULT); 770 memcpy(uaddr, kaddr, len); 771 return (0); 772 } 773 return (-copyout(kaddr, uaddr, len)); 774 } 775 776 size_t 777 linux_clear_user(void *_uaddr, size_t _len) 778 { 779 uint8_t *uaddr = _uaddr; 780 size_t len = _len; 781 782 /* make sure uaddr is aligned before going into the fast loop */ 783 while (((uintptr_t)uaddr & 7) != 0 && len > 7) { 784 if (subyte(uaddr, 0)) 785 return (_len); 786 uaddr++; 787 len--; 788 } 789 790 /* zero 8 bytes at a time */ 791 while (len > 7) { 792 #ifdef __LP64__ 793 if (suword64(uaddr, 0)) 794 return (_len); 795 #else 796 if (suword32(uaddr, 0)) 797 return (_len); 798 if (suword32(uaddr + 4, 0)) 799 return (_len); 800 #endif 801 uaddr += 8; 802 len -= 8; 803 } 804 805 /* zero fill end, if any */ 806 while (len > 0) { 807 if (subyte(uaddr, 0)) 808 return (_len); 809 uaddr++; 810 len--; 811 } 812 return (0); 813 } 814 815 int 816 linux_access_ok(int rw, const void *uaddr, size_t len) 817 { 818 uintptr_t saddr; 819 uintptr_t eaddr; 820 821 /* get start and end address */ 822 saddr = (uintptr_t)uaddr; 823 eaddr = (uintptr_t)uaddr + len; 824 825 /* verify addresses are valid for userspace */ 826 return ((saddr == eaddr) || 827 (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); 828 } 829 830 static int 831 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, 832 u_long cmd, caddr_t data, struct thread *td) 833 { 834 unsigned size; 835 int error; 836 837 size = IOCPARM_LEN(cmd); 838 /* refer to logic in sys_ioctl() */ 839 if (size > 0) { 840 /* 841 * Setup hint for linux_copyin() and linux_copyout(). 842 * 843 * Background: Linux code expects a user-space address 844 * while FreeBSD supplies a kernel-space address. 845 */ 846 current->bsd_ioctl_data = data; 847 current->bsd_ioctl_len = size; 848 data = (void *)LINUX_IOCTL_MIN_PTR; 849 } else { 850 /* fetch user-space pointer */ 851 data = *(void **)data; 852 } 853 #if defined(__amd64__) 854 if (td->td_proc->p_elf_machine == EM_386) { 855 /* try the compat IOCTL handler first */ 856 if (filp->f_op->compat_ioctl != NULL) 857 error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data)); 858 else 859 error = ENOTTY; 860 861 /* fallback to the regular IOCTL handler, if any */ 862 if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL) 863 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 864 } else 865 #endif 866 if (filp->f_op->unlocked_ioctl != NULL) 867 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 868 else 869 error = ENOTTY; 870 if (size > 0) { 871 current->bsd_ioctl_data = NULL; 872 current->bsd_ioctl_len = 0; 873 } 874 875 if (error == EWOULDBLOCK) { 876 /* update kqfilter status, if any */ 877 linux_file_kqfilter_poll(filp, 878 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 879 } else if (error == ERESTARTSYS) 880 error = ERESTART; 881 return (error); 882 } 883 884 #define LINUX_POLL_TABLE_NORMAL ((poll_table *)1) 885 886 /* 887 * This function atomically updates the poll wakeup state and returns 888 * the previous state at the time of update. 889 */ 890 static uint8_t 891 linux_poll_wakeup_state(atomic_t *v, const uint8_t *pstate) 892 { 893 int c, old; 894 895 c = v->counter; 896 897 while ((old = atomic_cmpxchg(v, c, pstate[c])) != c) 898 c = old; 899 900 return (c); 901 } 902 903 904 static int 905 linux_poll_wakeup_callback(wait_queue_t *wq, unsigned int wq_state, int flags, void *key) 906 { 907 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 908 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 909 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 910 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_READY, 911 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_READY, /* NOP */ 912 }; 913 struct linux_file *filp = container_of(wq, struct linux_file, f_wait_queue.wq); 914 915 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 916 case LINUX_FWQ_STATE_QUEUED: 917 linux_poll_wakeup(filp); 918 return (1); 919 default: 920 return (0); 921 } 922 } 923 924 void 925 linux_poll_wait(struct linux_file *filp, wait_queue_head_t *wqh, poll_table *p) 926 { 927 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 928 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_NOT_READY, 929 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 930 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_QUEUED, /* NOP */ 931 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_QUEUED, 932 }; 933 934 /* check if we are called inside the select system call */ 935 if (p == LINUX_POLL_TABLE_NORMAL) 936 selrecord(curthread, &filp->f_selinfo); 937 938 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 939 case LINUX_FWQ_STATE_INIT: 940 /* NOTE: file handles can only belong to one wait-queue */ 941 filp->f_wait_queue.wqh = wqh; 942 filp->f_wait_queue.wq.func = &linux_poll_wakeup_callback; 943 add_wait_queue(wqh, &filp->f_wait_queue.wq); 944 atomic_set(&filp->f_wait_queue.state, LINUX_FWQ_STATE_QUEUED); 945 break; 946 default: 947 break; 948 } 949 } 950 951 static void 952 linux_poll_wait_dequeue(struct linux_file *filp) 953 { 954 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 955 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 956 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_INIT, 957 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_INIT, 958 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_INIT, 959 }; 960 961 seldrain(&filp->f_selinfo); 962 963 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 964 case LINUX_FWQ_STATE_NOT_READY: 965 case LINUX_FWQ_STATE_QUEUED: 966 case LINUX_FWQ_STATE_READY: 967 remove_wait_queue(filp->f_wait_queue.wqh, &filp->f_wait_queue.wq); 968 break; 969 default: 970 break; 971 } 972 } 973 974 void 975 linux_poll_wakeup(struct linux_file *filp) 976 { 977 /* this function should be NULL-safe */ 978 if (filp == NULL) 979 return; 980 981 selwakeup(&filp->f_selinfo); 982 983 spin_lock(&filp->f_kqlock); 984 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ | 985 LINUX_KQ_FLAG_NEED_WRITE; 986 987 /* make sure the "knote" gets woken up */ 988 KNOTE_LOCKED(&filp->f_selinfo.si_note, 1); 989 spin_unlock(&filp->f_kqlock); 990 } 991 992 static void 993 linux_file_kqfilter_detach(struct knote *kn) 994 { 995 struct linux_file *filp = kn->kn_hook; 996 997 spin_lock(&filp->f_kqlock); 998 knlist_remove(&filp->f_selinfo.si_note, kn, 1); 999 spin_unlock(&filp->f_kqlock); 1000 } 1001 1002 static int 1003 linux_file_kqfilter_read_event(struct knote *kn, long hint) 1004 { 1005 struct linux_file *filp = kn->kn_hook; 1006 1007 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1008 1009 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_READ) ? 1 : 0); 1010 } 1011 1012 static int 1013 linux_file_kqfilter_write_event(struct knote *kn, long hint) 1014 { 1015 struct linux_file *filp = kn->kn_hook; 1016 1017 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1018 1019 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_WRITE) ? 1 : 0); 1020 } 1021 1022 static struct filterops linux_dev_kqfiltops_read = { 1023 .f_isfd = 1, 1024 .f_detach = linux_file_kqfilter_detach, 1025 .f_event = linux_file_kqfilter_read_event, 1026 }; 1027 1028 static struct filterops linux_dev_kqfiltops_write = { 1029 .f_isfd = 1, 1030 .f_detach = linux_file_kqfilter_detach, 1031 .f_event = linux_file_kqfilter_write_event, 1032 }; 1033 1034 static void 1035 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags) 1036 { 1037 int temp; 1038 1039 if (filp->f_kqflags & kqflags) { 1040 struct thread *td = curthread; 1041 1042 /* get the latest polling state */ 1043 temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL)); 1044 1045 spin_lock(&filp->f_kqlock); 1046 /* clear kqflags */ 1047 filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ | 1048 LINUX_KQ_FLAG_NEED_WRITE); 1049 /* update kqflags */ 1050 if (temp & (POLLIN | POLLOUT)) { 1051 if (temp & POLLIN) 1052 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ; 1053 if (temp & POLLOUT) 1054 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE; 1055 1056 /* make sure the "knote" gets woken up */ 1057 KNOTE_LOCKED(&filp->f_selinfo.si_note, 0); 1058 } 1059 spin_unlock(&filp->f_kqlock); 1060 } 1061 } 1062 1063 static int 1064 linux_file_kqfilter(struct file *file, struct knote *kn) 1065 { 1066 struct linux_file *filp; 1067 struct thread *td; 1068 int error; 1069 1070 td = curthread; 1071 filp = (struct linux_file *)file->f_data; 1072 filp->f_flags = file->f_flag; 1073 if (filp->f_op->poll == NULL) 1074 return (EINVAL); 1075 1076 spin_lock(&filp->f_kqlock); 1077 switch (kn->kn_filter) { 1078 case EVFILT_READ: 1079 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_READ; 1080 kn->kn_fop = &linux_dev_kqfiltops_read; 1081 kn->kn_hook = filp; 1082 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1083 error = 0; 1084 break; 1085 case EVFILT_WRITE: 1086 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE; 1087 kn->kn_fop = &linux_dev_kqfiltops_write; 1088 kn->kn_hook = filp; 1089 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1090 error = 0; 1091 break; 1092 default: 1093 error = EINVAL; 1094 break; 1095 } 1096 spin_unlock(&filp->f_kqlock); 1097 1098 if (error == 0) { 1099 linux_set_current(td); 1100 1101 /* update kqfilter status, if any */ 1102 linux_file_kqfilter_poll(filp, 1103 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 1104 } 1105 return (error); 1106 } 1107 1108 static int 1109 linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, 1110 vm_size_t size, struct vm_object **object, int nprot, 1111 struct thread *td) 1112 { 1113 struct vm_area_struct *vmap; 1114 struct mm_struct *mm; 1115 struct linux_file *filp; 1116 vm_memattr_t attr; 1117 int error; 1118 1119 filp = (struct linux_file *)fp->f_data; 1120 filp->f_flags = fp->f_flag; 1121 1122 if (filp->f_op->mmap == NULL) 1123 return (EOPNOTSUPP); 1124 1125 linux_set_current(td); 1126 1127 /* 1128 * The same VM object might be shared by multiple processes 1129 * and the mm_struct is usually freed when a process exits. 1130 * 1131 * The atomic reference below makes sure the mm_struct is 1132 * available as long as the vmap is in the linux_vma_head. 1133 */ 1134 mm = current->mm; 1135 if (atomic_inc_not_zero(&mm->mm_users) == 0) 1136 return (EINVAL); 1137 1138 vmap = kzalloc(sizeof(*vmap), GFP_KERNEL); 1139 vmap->vm_start = 0; 1140 vmap->vm_end = size; 1141 vmap->vm_pgoff = *offset / PAGE_SIZE; 1142 vmap->vm_pfn = 0; 1143 vmap->vm_flags = vmap->vm_page_prot = (nprot & VM_PROT_ALL); 1144 vmap->vm_ops = NULL; 1145 vmap->vm_file = get_file(filp); 1146 vmap->vm_mm = mm; 1147 1148 if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) { 1149 error = EINTR; 1150 } else { 1151 error = -OPW(fp, td, filp->f_op->mmap(filp, vmap)); 1152 if (error == ERESTARTSYS) 1153 error = ERESTART; 1154 up_write(&vmap->vm_mm->mmap_sem); 1155 } 1156 1157 if (error != 0) { 1158 linux_cdev_handle_free(vmap); 1159 return (error); 1160 } 1161 1162 attr = pgprot2cachemode(vmap->vm_page_prot); 1163 1164 if (vmap->vm_ops != NULL) { 1165 struct vm_area_struct *ptr; 1166 void *vm_private_data; 1167 bool vm_no_fault; 1168 1169 if (vmap->vm_ops->open == NULL || 1170 vmap->vm_ops->close == NULL || 1171 vmap->vm_private_data == NULL) { 1172 /* free allocated VM area struct */ 1173 linux_cdev_handle_free(vmap); 1174 return (EINVAL); 1175 } 1176 1177 vm_private_data = vmap->vm_private_data; 1178 1179 rw_wlock(&linux_vma_lock); 1180 TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) { 1181 if (ptr->vm_private_data == vm_private_data) 1182 break; 1183 } 1184 /* check if there is an existing VM area struct */ 1185 if (ptr != NULL) { 1186 /* check if the VM area structure is invalid */ 1187 if (ptr->vm_ops == NULL || 1188 ptr->vm_ops->open == NULL || 1189 ptr->vm_ops->close == NULL) { 1190 error = ESTALE; 1191 vm_no_fault = 1; 1192 } else { 1193 error = EEXIST; 1194 vm_no_fault = (ptr->vm_ops->fault == NULL); 1195 } 1196 } else { 1197 /* insert VM area structure into list */ 1198 TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry); 1199 error = 0; 1200 vm_no_fault = (vmap->vm_ops->fault == NULL); 1201 } 1202 rw_wunlock(&linux_vma_lock); 1203 1204 if (error != 0) { 1205 /* free allocated VM area struct */ 1206 linux_cdev_handle_free(vmap); 1207 /* check for stale VM area struct */ 1208 if (error != EEXIST) 1209 return (error); 1210 } 1211 1212 /* check if there is no fault handler */ 1213 if (vm_no_fault) { 1214 *object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE, 1215 &linux_cdev_pager_ops[1], size, nprot, *offset, 1216 td->td_ucred); 1217 } else { 1218 *object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE, 1219 &linux_cdev_pager_ops[0], size, nprot, *offset, 1220 td->td_ucred); 1221 } 1222 1223 /* check if allocating the VM object failed */ 1224 if (*object == NULL) { 1225 if (error == 0) { 1226 /* remove VM area struct from list */ 1227 linux_cdev_handle_remove(vmap); 1228 /* free allocated VM area struct */ 1229 linux_cdev_handle_free(vmap); 1230 } 1231 return (EINVAL); 1232 } 1233 } else { 1234 struct sglist *sg; 1235 1236 sg = sglist_alloc(1, M_WAITOK); 1237 sglist_append_phys(sg, 1238 (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len); 1239 1240 *object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len, 1241 nprot, 0, td->td_ucred); 1242 1243 linux_cdev_handle_free(vmap); 1244 1245 if (*object == NULL) { 1246 sglist_free(sg); 1247 return (EINVAL); 1248 } 1249 } 1250 1251 if (attr != VM_MEMATTR_DEFAULT) { 1252 VM_OBJECT_WLOCK(*object); 1253 vm_object_set_memattr(*object, attr); 1254 VM_OBJECT_WUNLOCK(*object); 1255 } 1256 *offset = 0; 1257 return (0); 1258 } 1259 1260 struct cdevsw linuxcdevsw = { 1261 .d_version = D_VERSION, 1262 .d_fdopen = linux_dev_fdopen, 1263 .d_name = "lkpidev", 1264 }; 1265 1266 static int 1267 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 1268 int flags, struct thread *td) 1269 { 1270 struct linux_file *filp; 1271 ssize_t bytes; 1272 int error; 1273 1274 error = 0; 1275 filp = (struct linux_file *)file->f_data; 1276 filp->f_flags = file->f_flag; 1277 /* XXX no support for I/O vectors currently */ 1278 if (uio->uio_iovcnt != 1) 1279 return (EOPNOTSUPP); 1280 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1281 return (EINVAL); 1282 linux_set_current(td); 1283 if (filp->f_op->read) { 1284 bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base, 1285 uio->uio_iov->iov_len, &uio->uio_offset)); 1286 if (bytes >= 0) { 1287 uio->uio_iov->iov_base = 1288 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1289 uio->uio_iov->iov_len -= bytes; 1290 uio->uio_resid -= bytes; 1291 } else { 1292 error = -bytes; 1293 if (error == ERESTARTSYS) 1294 error = ERESTART; 1295 } 1296 } else 1297 error = ENXIO; 1298 1299 /* update kqfilter status, if any */ 1300 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ); 1301 1302 return (error); 1303 } 1304 1305 static int 1306 linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred, 1307 int flags, struct thread *td) 1308 { 1309 struct linux_file *filp; 1310 ssize_t bytes; 1311 int error; 1312 1313 error = 0; 1314 filp = (struct linux_file *)file->f_data; 1315 filp->f_flags = file->f_flag; 1316 /* XXX no support for I/O vectors currently */ 1317 if (uio->uio_iovcnt != 1) 1318 return (EOPNOTSUPP); 1319 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1320 return (EINVAL); 1321 linux_set_current(td); 1322 if (filp->f_op->write) { 1323 bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base, 1324 uio->uio_iov->iov_len, &uio->uio_offset)); 1325 if (bytes >= 0) { 1326 uio->uio_iov->iov_base = 1327 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1328 uio->uio_iov->iov_len -= bytes; 1329 uio->uio_resid -= bytes; 1330 } else { 1331 error = -bytes; 1332 if (error == ERESTARTSYS) 1333 error = ERESTART; 1334 } 1335 } else 1336 error = ENXIO; 1337 1338 /* update kqfilter status, if any */ 1339 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE); 1340 1341 return (error); 1342 } 1343 1344 static int 1345 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 1346 struct thread *td) 1347 { 1348 struct linux_file *filp; 1349 int revents; 1350 1351 filp = (struct linux_file *)file->f_data; 1352 filp->f_flags = file->f_flag; 1353 linux_set_current(td); 1354 if (filp->f_op->poll != NULL) 1355 revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events; 1356 else 1357 revents = 0; 1358 1359 return (revents); 1360 } 1361 1362 static int 1363 linux_file_close(struct file *file, struct thread *td) 1364 { 1365 struct linux_file *filp; 1366 int error; 1367 1368 filp = (struct linux_file *)file->f_data; 1369 1370 KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp))); 1371 1372 filp->f_flags = file->f_flag; 1373 linux_set_current(td); 1374 linux_poll_wait_dequeue(filp); 1375 error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp)); 1376 funsetown(&filp->f_sigio); 1377 if (filp->f_vnode != NULL) 1378 vdrop(filp->f_vnode); 1379 kfree(filp); 1380 1381 return (error); 1382 } 1383 1384 static int 1385 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 1386 struct thread *td) 1387 { 1388 struct linux_file *filp; 1389 int error; 1390 1391 filp = (struct linux_file *)fp->f_data; 1392 filp->f_flags = fp->f_flag; 1393 error = 0; 1394 1395 linux_set_current(td); 1396 switch (cmd) { 1397 case FIONBIO: 1398 break; 1399 case FIOASYNC: 1400 if (filp->f_op->fasync == NULL) 1401 break; 1402 error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC)); 1403 break; 1404 case FIOSETOWN: 1405 error = fsetown(*(int *)data, &filp->f_sigio); 1406 if (error == 0) { 1407 if (filp->f_op->fasync == NULL) 1408 break; 1409 error = -OPW(fp, td, filp->f_op->fasync(0, filp, 1410 fp->f_flag & FASYNC)); 1411 } 1412 break; 1413 case FIOGETOWN: 1414 *(int *)data = fgetown(&filp->f_sigio); 1415 break; 1416 default: 1417 error = linux_file_ioctl_sub(fp, filp, cmd, data, td); 1418 break; 1419 } 1420 return (error); 1421 } 1422 1423 static int 1424 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1425 vm_prot_t *maxprotp, int *flagsp, struct file *fp, 1426 vm_ooffset_t *foff, vm_object_t *objp) 1427 { 1428 /* 1429 * Character devices do not provide private mappings 1430 * of any kind: 1431 */ 1432 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1433 (prot & VM_PROT_WRITE) != 0) 1434 return (EACCES); 1435 if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0) 1436 return (EINVAL); 1437 1438 return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td)); 1439 } 1440 1441 static int 1442 linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1443 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1444 struct thread *td) 1445 { 1446 struct linux_file *filp; 1447 struct mount *mp; 1448 struct vnode *vp; 1449 vm_object_t object; 1450 vm_prot_t maxprot; 1451 int error; 1452 1453 filp = (struct linux_file *)fp->f_data; 1454 1455 vp = filp->f_vnode; 1456 if (vp == NULL) 1457 return (EOPNOTSUPP); 1458 1459 /* 1460 * Ensure that file and memory protections are 1461 * compatible. 1462 */ 1463 mp = vp->v_mount; 1464 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1465 maxprot = VM_PROT_NONE; 1466 if ((prot & VM_PROT_EXECUTE) != 0) 1467 return (EACCES); 1468 } else 1469 maxprot = VM_PROT_EXECUTE; 1470 if ((fp->f_flag & FREAD) != 0) 1471 maxprot |= VM_PROT_READ; 1472 else if ((prot & VM_PROT_READ) != 0) 1473 return (EACCES); 1474 1475 /* 1476 * If we are sharing potential changes via MAP_SHARED and we 1477 * are trying to get write permission although we opened it 1478 * without asking for it, bail out. 1479 * 1480 * Note that most character devices always share mappings. 1481 * 1482 * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE 1483 * requests rather than doing it here. 1484 */ 1485 if ((flags & MAP_SHARED) != 0) { 1486 if ((fp->f_flag & FWRITE) != 0) 1487 maxprot |= VM_PROT_WRITE; 1488 else if ((prot & VM_PROT_WRITE) != 0) 1489 return (EACCES); 1490 } 1491 maxprot &= cap_maxprot; 1492 1493 error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff, 1494 &object); 1495 if (error != 0) 1496 return (error); 1497 1498 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1499 foff, FALSE, td); 1500 if (error != 0) 1501 vm_object_deallocate(object); 1502 return (error); 1503 } 1504 1505 static int 1506 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 1507 struct thread *td) 1508 { 1509 struct linux_file *filp; 1510 struct vnode *vp; 1511 int error; 1512 1513 filp = (struct linux_file *)fp->f_data; 1514 if (filp->f_vnode == NULL) 1515 return (EOPNOTSUPP); 1516 1517 vp = filp->f_vnode; 1518 1519 vn_lock(vp, LK_SHARED | LK_RETRY); 1520 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 1521 VOP_UNLOCK(vp, 0); 1522 1523 return (error); 1524 } 1525 1526 static int 1527 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 1528 struct filedesc *fdp) 1529 { 1530 1531 return (0); 1532 } 1533 1534 unsigned int 1535 linux_iminor(struct inode *inode) 1536 { 1537 struct linux_cdev *ldev; 1538 1539 if (inode == NULL || inode->v_rdev == NULL || 1540 inode->v_rdev->si_devsw != &linuxcdevsw) 1541 return (-1U); 1542 ldev = inode->v_rdev->si_drv1; 1543 if (ldev == NULL) 1544 return (-1U); 1545 1546 return (minor(ldev->dev)); 1547 } 1548 1549 struct fileops linuxfileops = { 1550 .fo_read = linux_file_read, 1551 .fo_write = linux_file_write, 1552 .fo_truncate = invfo_truncate, 1553 .fo_kqfilter = linux_file_kqfilter, 1554 .fo_stat = linux_file_stat, 1555 .fo_fill_kinfo = linux_file_fill_kinfo, 1556 .fo_poll = linux_file_poll, 1557 .fo_close = linux_file_close, 1558 .fo_ioctl = linux_file_ioctl, 1559 .fo_mmap = linux_file_mmap, 1560 .fo_chmod = invfo_chmod, 1561 .fo_chown = invfo_chown, 1562 .fo_sendfile = invfo_sendfile, 1563 .fo_flags = DFLAG_PASSABLE, 1564 }; 1565 1566 /* 1567 * Hash of vmmap addresses. This is infrequently accessed and does not 1568 * need to be particularly large. This is done because we must store the 1569 * caller's idea of the map size to properly unmap. 1570 */ 1571 struct vmmap { 1572 LIST_ENTRY(vmmap) vm_next; 1573 void *vm_addr; 1574 unsigned long vm_size; 1575 }; 1576 1577 struct vmmaphd { 1578 struct vmmap *lh_first; 1579 }; 1580 #define VMMAP_HASH_SIZE 64 1581 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 1582 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 1583 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 1584 static struct mtx vmmaplock; 1585 1586 static void 1587 vmmap_add(void *addr, unsigned long size) 1588 { 1589 struct vmmap *vmmap; 1590 1591 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 1592 mtx_lock(&vmmaplock); 1593 vmmap->vm_size = size; 1594 vmmap->vm_addr = addr; 1595 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 1596 mtx_unlock(&vmmaplock); 1597 } 1598 1599 static struct vmmap * 1600 vmmap_remove(void *addr) 1601 { 1602 struct vmmap *vmmap; 1603 1604 mtx_lock(&vmmaplock); 1605 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 1606 if (vmmap->vm_addr == addr) 1607 break; 1608 if (vmmap) 1609 LIST_REMOVE(vmmap, vm_next); 1610 mtx_unlock(&vmmaplock); 1611 1612 return (vmmap); 1613 } 1614 1615 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1616 void * 1617 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 1618 { 1619 void *addr; 1620 1621 addr = pmap_mapdev_attr(phys_addr, size, attr); 1622 if (addr == NULL) 1623 return (NULL); 1624 vmmap_add(addr, size); 1625 1626 return (addr); 1627 } 1628 #endif 1629 1630 void 1631 iounmap(void *addr) 1632 { 1633 struct vmmap *vmmap; 1634 1635 vmmap = vmmap_remove(addr); 1636 if (vmmap == NULL) 1637 return; 1638 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1639 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 1640 #endif 1641 kfree(vmmap); 1642 } 1643 1644 1645 void * 1646 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 1647 { 1648 vm_offset_t off; 1649 size_t size; 1650 1651 size = count * PAGE_SIZE; 1652 off = kva_alloc(size); 1653 if (off == 0) 1654 return (NULL); 1655 vmmap_add((void *)off, size); 1656 pmap_qenter(off, pages, count); 1657 1658 return ((void *)off); 1659 } 1660 1661 void 1662 vunmap(void *addr) 1663 { 1664 struct vmmap *vmmap; 1665 1666 vmmap = vmmap_remove(addr); 1667 if (vmmap == NULL) 1668 return; 1669 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 1670 kva_free((vm_offset_t)addr, vmmap->vm_size); 1671 kfree(vmmap); 1672 } 1673 1674 char * 1675 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1676 { 1677 unsigned int len; 1678 char *p; 1679 va_list aq; 1680 1681 va_copy(aq, ap); 1682 len = vsnprintf(NULL, 0, fmt, aq); 1683 va_end(aq); 1684 1685 p = kmalloc(len + 1, gfp); 1686 if (p != NULL) 1687 vsnprintf(p, len + 1, fmt, ap); 1688 1689 return (p); 1690 } 1691 1692 char * 1693 kasprintf(gfp_t gfp, const char *fmt, ...) 1694 { 1695 va_list ap; 1696 char *p; 1697 1698 va_start(ap, fmt); 1699 p = kvasprintf(gfp, fmt, ap); 1700 va_end(ap); 1701 1702 return (p); 1703 } 1704 1705 static void 1706 linux_timer_callback_wrapper(void *context) 1707 { 1708 struct timer_list *timer; 1709 1710 linux_set_current(curthread); 1711 1712 timer = context; 1713 timer->function(timer->data); 1714 } 1715 1716 void 1717 mod_timer(struct timer_list *timer, int expires) 1718 { 1719 1720 timer->expires = expires; 1721 callout_reset(&timer->timer_callout, 1722 linux_timer_jiffies_until(expires), 1723 &linux_timer_callback_wrapper, timer); 1724 } 1725 1726 void 1727 add_timer(struct timer_list *timer) 1728 { 1729 1730 callout_reset(&timer->timer_callout, 1731 linux_timer_jiffies_until(timer->expires), 1732 &linux_timer_callback_wrapper, timer); 1733 } 1734 1735 void 1736 add_timer_on(struct timer_list *timer, int cpu) 1737 { 1738 1739 callout_reset_on(&timer->timer_callout, 1740 linux_timer_jiffies_until(timer->expires), 1741 &linux_timer_callback_wrapper, timer, cpu); 1742 } 1743 1744 static void 1745 linux_timer_init(void *arg) 1746 { 1747 1748 /* 1749 * Compute an internal HZ value which can divide 2**32 to 1750 * avoid timer rounding problems when the tick value wraps 1751 * around 2**32: 1752 */ 1753 linux_timer_hz_mask = 1; 1754 while (linux_timer_hz_mask < (unsigned long)hz) 1755 linux_timer_hz_mask *= 2; 1756 linux_timer_hz_mask--; 1757 } 1758 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1759 1760 void 1761 linux_complete_common(struct completion *c, int all) 1762 { 1763 int wakeup_swapper; 1764 1765 sleepq_lock(c); 1766 c->done++; 1767 if (all) 1768 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1769 else 1770 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1771 sleepq_release(c); 1772 if (wakeup_swapper) 1773 kick_proc0(); 1774 } 1775 1776 /* 1777 * Indefinite wait for done != 0 with or without signals. 1778 */ 1779 int 1780 linux_wait_for_common(struct completion *c, int flags) 1781 { 1782 int error; 1783 1784 if (SCHEDULER_STOPPED()) 1785 return (0); 1786 1787 DROP_GIANT(); 1788 1789 if (flags != 0) 1790 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1791 else 1792 flags = SLEEPQ_SLEEP; 1793 error = 0; 1794 for (;;) { 1795 sleepq_lock(c); 1796 if (c->done) 1797 break; 1798 sleepq_add(c, NULL, "completion", flags, 0); 1799 if (flags & SLEEPQ_INTERRUPTIBLE) { 1800 if (sleepq_wait_sig(c, 0) != 0) { 1801 error = -ERESTARTSYS; 1802 goto intr; 1803 } 1804 } else 1805 sleepq_wait(c, 0); 1806 } 1807 c->done--; 1808 sleepq_release(c); 1809 1810 intr: 1811 PICKUP_GIANT(); 1812 1813 return (error); 1814 } 1815 1816 /* 1817 * Time limited wait for done != 0 with or without signals. 1818 */ 1819 int 1820 linux_wait_for_timeout_common(struct completion *c, int timeout, int flags) 1821 { 1822 int end = jiffies + timeout; 1823 int error; 1824 int ret; 1825 1826 if (SCHEDULER_STOPPED()) 1827 return (0); 1828 1829 DROP_GIANT(); 1830 1831 if (flags != 0) 1832 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1833 else 1834 flags = SLEEPQ_SLEEP; 1835 1836 error = 0; 1837 ret = 0; 1838 for (;;) { 1839 sleepq_lock(c); 1840 if (c->done) 1841 break; 1842 sleepq_add(c, NULL, "completion", flags, 0); 1843 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1844 if (flags & SLEEPQ_INTERRUPTIBLE) 1845 ret = sleepq_timedwait_sig(c, 0); 1846 else 1847 ret = sleepq_timedwait(c, 0); 1848 if (ret != 0) { 1849 /* check for timeout or signal */ 1850 if (ret == EWOULDBLOCK) 1851 error = 0; 1852 else 1853 error = -ERESTARTSYS; 1854 goto intr; 1855 } 1856 } 1857 c->done--; 1858 sleepq_release(c); 1859 1860 intr: 1861 PICKUP_GIANT(); 1862 1863 /* return how many jiffies are left */ 1864 return (ret != 0 ? error : linux_timer_jiffies_until(end)); 1865 } 1866 1867 int 1868 linux_try_wait_for_completion(struct completion *c) 1869 { 1870 int isdone; 1871 1872 isdone = 1; 1873 sleepq_lock(c); 1874 if (c->done) 1875 c->done--; 1876 else 1877 isdone = 0; 1878 sleepq_release(c); 1879 return (isdone); 1880 } 1881 1882 int 1883 linux_completion_done(struct completion *c) 1884 { 1885 int isdone; 1886 1887 isdone = 1; 1888 sleepq_lock(c); 1889 if (c->done == 0) 1890 isdone = 0; 1891 sleepq_release(c); 1892 return (isdone); 1893 } 1894 1895 static void 1896 linux_cdev_release(struct kobject *kobj) 1897 { 1898 struct linux_cdev *cdev; 1899 struct kobject *parent; 1900 1901 cdev = container_of(kobj, struct linux_cdev, kobj); 1902 parent = kobj->parent; 1903 if (cdev->cdev) 1904 destroy_dev(cdev->cdev); 1905 kfree(cdev); 1906 kobject_put(parent); 1907 } 1908 1909 static void 1910 linux_cdev_static_release(struct kobject *kobj) 1911 { 1912 struct linux_cdev *cdev; 1913 struct kobject *parent; 1914 1915 cdev = container_of(kobj, struct linux_cdev, kobj); 1916 parent = kobj->parent; 1917 if (cdev->cdev) 1918 destroy_dev(cdev->cdev); 1919 kobject_put(parent); 1920 } 1921 1922 const struct kobj_type linux_cdev_ktype = { 1923 .release = linux_cdev_release, 1924 }; 1925 1926 const struct kobj_type linux_cdev_static_ktype = { 1927 .release = linux_cdev_static_release, 1928 }; 1929 1930 static void 1931 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1932 { 1933 struct notifier_block *nb; 1934 1935 nb = arg; 1936 if (linkstate == LINK_STATE_UP) 1937 nb->notifier_call(nb, NETDEV_UP, ifp); 1938 else 1939 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1940 } 1941 1942 static void 1943 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1944 { 1945 struct notifier_block *nb; 1946 1947 nb = arg; 1948 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1949 } 1950 1951 static void 1952 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1953 { 1954 struct notifier_block *nb; 1955 1956 nb = arg; 1957 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1958 } 1959 1960 static void 1961 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1962 { 1963 struct notifier_block *nb; 1964 1965 nb = arg; 1966 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1967 } 1968 1969 static void 1970 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1971 { 1972 struct notifier_block *nb; 1973 1974 nb = arg; 1975 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1976 } 1977 1978 int 1979 register_netdevice_notifier(struct notifier_block *nb) 1980 { 1981 1982 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1983 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1984 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1985 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1986 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1987 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1988 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1989 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1990 1991 return (0); 1992 } 1993 1994 int 1995 register_inetaddr_notifier(struct notifier_block *nb) 1996 { 1997 1998 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1999 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 2000 return (0); 2001 } 2002 2003 int 2004 unregister_netdevice_notifier(struct notifier_block *nb) 2005 { 2006 2007 EVENTHANDLER_DEREGISTER(ifnet_link_event, 2008 nb->tags[NETDEV_UP]); 2009 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 2010 nb->tags[NETDEV_REGISTER]); 2011 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2012 nb->tags[NETDEV_UNREGISTER]); 2013 EVENTHANDLER_DEREGISTER(iflladdr_event, 2014 nb->tags[NETDEV_CHANGEADDR]); 2015 2016 return (0); 2017 } 2018 2019 int 2020 unregister_inetaddr_notifier(struct notifier_block *nb) 2021 { 2022 2023 EVENTHANDLER_DEREGISTER(ifaddr_event, 2024 nb->tags[NETDEV_CHANGEIFADDR]); 2025 2026 return (0); 2027 } 2028 2029 struct list_sort_thunk { 2030 int (*cmp)(void *, struct list_head *, struct list_head *); 2031 void *priv; 2032 }; 2033 2034 static inline int 2035 linux_le_cmp(void *priv, const void *d1, const void *d2) 2036 { 2037 struct list_head *le1, *le2; 2038 struct list_sort_thunk *thunk; 2039 2040 thunk = priv; 2041 le1 = *(__DECONST(struct list_head **, d1)); 2042 le2 = *(__DECONST(struct list_head **, d2)); 2043 return ((thunk->cmp)(thunk->priv, le1, le2)); 2044 } 2045 2046 void 2047 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 2048 struct list_head *a, struct list_head *b)) 2049 { 2050 struct list_sort_thunk thunk; 2051 struct list_head **ar, *le; 2052 size_t count, i; 2053 2054 count = 0; 2055 list_for_each(le, head) 2056 count++; 2057 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 2058 i = 0; 2059 list_for_each(le, head) 2060 ar[i++] = le; 2061 thunk.cmp = cmp; 2062 thunk.priv = priv; 2063 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 2064 INIT_LIST_HEAD(head); 2065 for (i = 0; i < count; i++) 2066 list_add_tail(ar[i], head); 2067 free(ar, M_KMALLOC); 2068 } 2069 2070 void 2071 linux_irq_handler(void *ent) 2072 { 2073 struct irq_ent *irqe; 2074 2075 linux_set_current(curthread); 2076 2077 irqe = ent; 2078 irqe->handler(irqe->irq, irqe->arg); 2079 } 2080 2081 #if defined(__i386__) || defined(__amd64__) 2082 int 2083 linux_wbinvd_on_all_cpus(void) 2084 { 2085 2086 pmap_invalidate_cache(); 2087 return (0); 2088 } 2089 #endif 2090 2091 int 2092 linux_on_each_cpu(void callback(void *), void *data) 2093 { 2094 2095 smp_rendezvous(smp_no_rendezvous_barrier, callback, 2096 smp_no_rendezvous_barrier, data); 2097 return (0); 2098 } 2099 2100 int 2101 linux_in_atomic(void) 2102 { 2103 2104 return ((curthread->td_pflags & TDP_NOFAULTING) != 0); 2105 } 2106 2107 struct linux_cdev * 2108 linux_find_cdev(const char *name, unsigned major, unsigned minor) 2109 { 2110 dev_t dev = MKDEV(major, minor); 2111 struct cdev *cdev; 2112 2113 dev_lock(); 2114 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 2115 struct linux_cdev *ldev = cdev->si_drv1; 2116 if (ldev->dev == dev && 2117 strcmp(kobject_name(&ldev->kobj), name) == 0) { 2118 break; 2119 } 2120 } 2121 dev_unlock(); 2122 2123 return (cdev != NULL ? cdev->si_drv1 : NULL); 2124 } 2125 2126 int 2127 __register_chrdev(unsigned int major, unsigned int baseminor, 2128 unsigned int count, const char *name, 2129 const struct file_operations *fops) 2130 { 2131 struct linux_cdev *cdev; 2132 int ret = 0; 2133 int i; 2134 2135 for (i = baseminor; i < baseminor + count; i++) { 2136 cdev = cdev_alloc(); 2137 cdev_init(cdev, fops); 2138 kobject_set_name(&cdev->kobj, name); 2139 2140 ret = cdev_add(cdev, makedev(major, i), 1); 2141 if (ret != 0) 2142 break; 2143 } 2144 return (ret); 2145 } 2146 2147 int 2148 __register_chrdev_p(unsigned int major, unsigned int baseminor, 2149 unsigned int count, const char *name, 2150 const struct file_operations *fops, uid_t uid, 2151 gid_t gid, int mode) 2152 { 2153 struct linux_cdev *cdev; 2154 int ret = 0; 2155 int i; 2156 2157 for (i = baseminor; i < baseminor + count; i++) { 2158 cdev = cdev_alloc(); 2159 cdev_init(cdev, fops); 2160 kobject_set_name(&cdev->kobj, name); 2161 2162 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 2163 if (ret != 0) 2164 break; 2165 } 2166 return (ret); 2167 } 2168 2169 void 2170 __unregister_chrdev(unsigned int major, unsigned int baseminor, 2171 unsigned int count, const char *name) 2172 { 2173 struct linux_cdev *cdevp; 2174 int i; 2175 2176 for (i = baseminor; i < baseminor + count; i++) { 2177 cdevp = linux_find_cdev(name, major, i); 2178 if (cdevp != NULL) 2179 cdev_del(cdevp); 2180 } 2181 } 2182 2183 #if defined(__i386__) || defined(__amd64__) 2184 bool linux_cpu_has_clflush; 2185 #endif 2186 2187 static void 2188 linux_compat_init(void *arg) 2189 { 2190 struct sysctl_oid *rootoid; 2191 int i; 2192 2193 #if defined(__i386__) || defined(__amd64__) 2194 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 2195 #endif 2196 rw_init(&linux_vma_lock, "lkpi-vma-lock"); 2197 2198 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 2199 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 2200 kobject_init(&linux_class_root, &linux_class_ktype); 2201 kobject_set_name(&linux_class_root, "class"); 2202 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 2203 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 2204 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 2205 kobject_set_name(&linux_root_device.kobj, "device"); 2206 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 2207 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 2208 "device"); 2209 linux_root_device.bsddev = root_bus; 2210 linux_class_misc.name = "misc"; 2211 class_register(&linux_class_misc); 2212 INIT_LIST_HEAD(&pci_drivers); 2213 INIT_LIST_HEAD(&pci_devices); 2214 spin_lock_init(&pci_lock); 2215 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 2216 for (i = 0; i < VMMAP_HASH_SIZE; i++) 2217 LIST_INIT(&vmmaphead[i]); 2218 } 2219 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 2220 2221 static void 2222 linux_compat_uninit(void *arg) 2223 { 2224 linux_kobject_kfree_name(&linux_class_root); 2225 linux_kobject_kfree_name(&linux_root_device.kobj); 2226 linux_kobject_kfree_name(&linux_class_misc.kobj); 2227 2228 mtx_destroy(&vmmaplock); 2229 spin_lock_destroy(&pci_lock); 2230 rw_destroy(&linux_vma_lock); 2231 } 2232 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 2233 2234 /* 2235 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 2236 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 2237 * used. Assert these types have the same size, else some parts of the 2238 * LinuxKPI may not work like expected: 2239 */ 2240 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 2241