1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 #include <sys/mman.h> 49 50 #include <vm/vm.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/vm_pager.h> 55 56 #include <machine/stdarg.h> 57 58 #if defined(__i386__) || defined(__amd64__) 59 #include <machine/md_var.h> 60 #endif 61 62 #include <linux/kobject.h> 63 #include <linux/device.h> 64 #include <linux/slab.h> 65 #include <linux/module.h> 66 #include <linux/moduleparam.h> 67 #include <linux/cdev.h> 68 #include <linux/file.h> 69 #include <linux/sysfs.h> 70 #include <linux/mm.h> 71 #include <linux/io.h> 72 #include <linux/vmalloc.h> 73 #include <linux/netdevice.h> 74 #include <linux/timer.h> 75 #include <linux/interrupt.h> 76 #include <linux/uaccess.h> 77 #include <linux/list.h> 78 #include <linux/kthread.h> 79 #include <linux/kernel.h> 80 #include <linux/compat.h> 81 #include <linux/poll.h> 82 #include <linux/smp.h> 83 84 #if defined(__i386__) || defined(__amd64__) 85 #include <asm/smp.h> 86 #endif 87 88 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 89 90 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 91 92 #include <linux/rbtree.h> 93 /* Undo Linux compat changes. */ 94 #undef RB_ROOT 95 #undef file 96 #undef cdev 97 #define RB_ROOT(head) (head)->rbh_root 98 99 static struct vm_area_struct *linux_cdev_handle_find(void *handle); 100 101 struct kobject linux_class_root; 102 struct device linux_root_device; 103 struct class linux_class_misc; 104 struct list_head pci_drivers; 105 struct list_head pci_devices; 106 spinlock_t pci_lock; 107 108 unsigned long linux_timer_hz_mask; 109 110 int 111 panic_cmp(struct rb_node *one, struct rb_node *two) 112 { 113 panic("no cmp"); 114 } 115 116 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 117 118 int 119 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 120 { 121 va_list tmp_va; 122 int len; 123 char *old; 124 char *name; 125 char dummy; 126 127 old = kobj->name; 128 129 if (old && fmt == NULL) 130 return (0); 131 132 /* compute length of string */ 133 va_copy(tmp_va, args); 134 len = vsnprintf(&dummy, 0, fmt, tmp_va); 135 va_end(tmp_va); 136 137 /* account for zero termination */ 138 len++; 139 140 /* check for error */ 141 if (len < 1) 142 return (-EINVAL); 143 144 /* allocate memory for string */ 145 name = kzalloc(len, GFP_KERNEL); 146 if (name == NULL) 147 return (-ENOMEM); 148 vsnprintf(name, len, fmt, args); 149 kobj->name = name; 150 151 /* free old string */ 152 kfree(old); 153 154 /* filter new string */ 155 for (; *name != '\0'; name++) 156 if (*name == '/') 157 *name = '!'; 158 return (0); 159 } 160 161 int 162 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 163 { 164 va_list args; 165 int error; 166 167 va_start(args, fmt); 168 error = kobject_set_name_vargs(kobj, fmt, args); 169 va_end(args); 170 171 return (error); 172 } 173 174 static int 175 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 176 { 177 const struct kobj_type *t; 178 int error; 179 180 kobj->parent = parent; 181 error = sysfs_create_dir(kobj); 182 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 183 struct attribute **attr; 184 t = kobj->ktype; 185 186 for (attr = t->default_attrs; *attr != NULL; attr++) { 187 error = sysfs_create_file(kobj, *attr); 188 if (error) 189 break; 190 } 191 if (error) 192 sysfs_remove_dir(kobj); 193 194 } 195 return (error); 196 } 197 198 int 199 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 200 { 201 va_list args; 202 int error; 203 204 va_start(args, fmt); 205 error = kobject_set_name_vargs(kobj, fmt, args); 206 va_end(args); 207 if (error) 208 return (error); 209 210 return kobject_add_complete(kobj, parent); 211 } 212 213 void 214 linux_kobject_release(struct kref *kref) 215 { 216 struct kobject *kobj; 217 char *name; 218 219 kobj = container_of(kref, struct kobject, kref); 220 sysfs_remove_dir(kobj); 221 name = kobj->name; 222 if (kobj->ktype && kobj->ktype->release) 223 kobj->ktype->release(kobj); 224 kfree(name); 225 } 226 227 static void 228 linux_kobject_kfree(struct kobject *kobj) 229 { 230 kfree(kobj); 231 } 232 233 static void 234 linux_kobject_kfree_name(struct kobject *kobj) 235 { 236 if (kobj) { 237 kfree(kobj->name); 238 } 239 } 240 241 const struct kobj_type linux_kfree_type = { 242 .release = linux_kobject_kfree 243 }; 244 245 static void 246 linux_device_release(struct device *dev) 247 { 248 pr_debug("linux_device_release: %s\n", dev_name(dev)); 249 kfree(dev); 250 } 251 252 static ssize_t 253 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 254 { 255 struct class_attribute *dattr; 256 ssize_t error; 257 258 dattr = container_of(attr, struct class_attribute, attr); 259 error = -EIO; 260 if (dattr->show) 261 error = dattr->show(container_of(kobj, struct class, kobj), 262 dattr, buf); 263 return (error); 264 } 265 266 static ssize_t 267 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 268 size_t count) 269 { 270 struct class_attribute *dattr; 271 ssize_t error; 272 273 dattr = container_of(attr, struct class_attribute, attr); 274 error = -EIO; 275 if (dattr->store) 276 error = dattr->store(container_of(kobj, struct class, kobj), 277 dattr, buf, count); 278 return (error); 279 } 280 281 static void 282 linux_class_release(struct kobject *kobj) 283 { 284 struct class *class; 285 286 class = container_of(kobj, struct class, kobj); 287 if (class->class_release) 288 class->class_release(class); 289 } 290 291 static const struct sysfs_ops linux_class_sysfs = { 292 .show = linux_class_show, 293 .store = linux_class_store, 294 }; 295 296 const struct kobj_type linux_class_ktype = { 297 .release = linux_class_release, 298 .sysfs_ops = &linux_class_sysfs 299 }; 300 301 static void 302 linux_dev_release(struct kobject *kobj) 303 { 304 struct device *dev; 305 306 dev = container_of(kobj, struct device, kobj); 307 /* This is the precedence defined by linux. */ 308 if (dev->release) 309 dev->release(dev); 310 else if (dev->class && dev->class->dev_release) 311 dev->class->dev_release(dev); 312 } 313 314 static ssize_t 315 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 316 { 317 struct device_attribute *dattr; 318 ssize_t error; 319 320 dattr = container_of(attr, struct device_attribute, attr); 321 error = -EIO; 322 if (dattr->show) 323 error = dattr->show(container_of(kobj, struct device, kobj), 324 dattr, buf); 325 return (error); 326 } 327 328 static ssize_t 329 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 330 size_t count) 331 { 332 struct device_attribute *dattr; 333 ssize_t error; 334 335 dattr = container_of(attr, struct device_attribute, attr); 336 error = -EIO; 337 if (dattr->store) 338 error = dattr->store(container_of(kobj, struct device, kobj), 339 dattr, buf, count); 340 return (error); 341 } 342 343 static const struct sysfs_ops linux_dev_sysfs = { 344 .show = linux_dev_show, 345 .store = linux_dev_store, 346 }; 347 348 const struct kobj_type linux_dev_ktype = { 349 .release = linux_dev_release, 350 .sysfs_ops = &linux_dev_sysfs 351 }; 352 353 struct device * 354 device_create(struct class *class, struct device *parent, dev_t devt, 355 void *drvdata, const char *fmt, ...) 356 { 357 struct device *dev; 358 va_list args; 359 360 dev = kzalloc(sizeof(*dev), M_WAITOK); 361 dev->parent = parent; 362 dev->class = class; 363 dev->devt = devt; 364 dev->driver_data = drvdata; 365 dev->release = linux_device_release; 366 va_start(args, fmt); 367 kobject_set_name_vargs(&dev->kobj, fmt, args); 368 va_end(args); 369 device_register(dev); 370 371 return (dev); 372 } 373 374 int 375 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 376 struct kobject *parent, const char *fmt, ...) 377 { 378 va_list args; 379 int error; 380 381 kobject_init(kobj, ktype); 382 kobj->ktype = ktype; 383 kobj->parent = parent; 384 kobj->name = NULL; 385 386 va_start(args, fmt); 387 error = kobject_set_name_vargs(kobj, fmt, args); 388 va_end(args); 389 if (error) 390 return (error); 391 return kobject_add_complete(kobj, parent); 392 } 393 394 static void 395 linux_kq_lock(void *arg) 396 { 397 spinlock_t *s = arg; 398 399 spin_lock(s); 400 } 401 static void 402 linux_kq_unlock(void *arg) 403 { 404 spinlock_t *s = arg; 405 406 spin_unlock(s); 407 } 408 409 static void 410 linux_kq_lock_owned(void *arg) 411 { 412 #ifdef INVARIANTS 413 spinlock_t *s = arg; 414 415 mtx_assert(&s->m, MA_OWNED); 416 #endif 417 } 418 419 static void 420 linux_kq_lock_unowned(void *arg) 421 { 422 #ifdef INVARIANTS 423 spinlock_t *s = arg; 424 425 mtx_assert(&s->m, MA_NOTOWNED); 426 #endif 427 } 428 429 static void 430 linux_file_kqfilter_poll(struct linux_file *, int); 431 432 struct linux_file * 433 linux_file_alloc(void) 434 { 435 struct linux_file *filp; 436 437 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 438 439 /* set initial refcount */ 440 filp->f_count = 1; 441 442 /* setup fields needed by kqueue support */ 443 spin_lock_init(&filp->f_kqlock); 444 knlist_init(&filp->f_selinfo.si_note, &filp->f_kqlock, 445 linux_kq_lock, linux_kq_unlock, 446 linux_kq_lock_owned, linux_kq_lock_unowned); 447 448 return (filp); 449 } 450 451 void 452 linux_file_free(struct linux_file *filp) 453 { 454 if (filp->_file == NULL) { 455 if (filp->f_shmem != NULL) 456 vm_object_deallocate(filp->f_shmem); 457 kfree(filp); 458 } else { 459 /* 460 * The close method of the character device or file 461 * will free the linux_file structure: 462 */ 463 _fdrop(filp->_file, curthread); 464 } 465 } 466 467 static int 468 linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 469 vm_page_t *mres) 470 { 471 struct vm_area_struct *vmap; 472 473 vmap = linux_cdev_handle_find(vm_obj->handle); 474 475 MPASS(vmap != NULL); 476 MPASS(vmap->vm_private_data == vm_obj->handle); 477 478 if (likely(vmap->vm_ops != NULL && offset < vmap->vm_len)) { 479 vm_paddr_t paddr = IDX_TO_OFF(vmap->vm_pfn) + offset; 480 vm_page_t page; 481 482 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 483 /* 484 * If the passed in result page is a fake 485 * page, update it with the new physical 486 * address. 487 */ 488 page = *mres; 489 vm_page_updatefake(page, paddr, vm_obj->memattr); 490 } else { 491 /* 492 * Replace the passed in "mres" page with our 493 * own fake page and free up the all of the 494 * original pages. 495 */ 496 VM_OBJECT_WUNLOCK(vm_obj); 497 page = vm_page_getfake(paddr, vm_obj->memattr); 498 VM_OBJECT_WLOCK(vm_obj); 499 500 vm_page_replace_checked(page, vm_obj, 501 (*mres)->pindex, *mres); 502 503 vm_page_lock(*mres); 504 vm_page_free(*mres); 505 vm_page_unlock(*mres); 506 *mres = page; 507 } 508 page->valid = VM_PAGE_BITS_ALL; 509 return (VM_PAGER_OK); 510 } 511 return (VM_PAGER_FAIL); 512 } 513 514 static int 515 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, 516 vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 517 { 518 struct vm_area_struct *vmap; 519 int err; 520 521 linux_set_current(curthread); 522 523 /* get VM area structure */ 524 vmap = linux_cdev_handle_find(vm_obj->handle); 525 MPASS(vmap != NULL); 526 MPASS(vmap->vm_private_data == vm_obj->handle); 527 528 VM_OBJECT_WUNLOCK(vm_obj); 529 530 down_write(&vmap->vm_mm->mmap_sem); 531 if (unlikely(vmap->vm_ops == NULL)) { 532 err = VM_FAULT_SIGBUS; 533 } else { 534 struct vm_fault vmf; 535 536 /* fill out VM fault structure */ 537 vmf.virtual_address = (void *)((uintptr_t)pidx << PAGE_SHIFT); 538 vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 539 vmf.pgoff = 0; 540 vmf.page = NULL; 541 542 vmap->vm_pfn_count = 0; 543 vmap->vm_pfn_pcount = &vmap->vm_pfn_count; 544 vmap->vm_obj = vm_obj; 545 546 err = vmap->vm_ops->fault(vmap, &vmf); 547 548 while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) { 549 kern_yield(PRI_USER); 550 err = vmap->vm_ops->fault(vmap, &vmf); 551 } 552 } 553 554 /* translate return code */ 555 switch (err) { 556 case VM_FAULT_OOM: 557 err = VM_PAGER_AGAIN; 558 break; 559 case VM_FAULT_SIGBUS: 560 err = VM_PAGER_BAD; 561 break; 562 case VM_FAULT_NOPAGE: 563 /* 564 * By contract the fault handler will return having 565 * busied all the pages itself. If pidx is already 566 * found in the object, it will simply xbusy the first 567 * page and return with vm_pfn_count set to 1. 568 */ 569 *first = vmap->vm_pfn_first; 570 *last = *first + vmap->vm_pfn_count - 1; 571 err = VM_PAGER_OK; 572 break; 573 default: 574 err = VM_PAGER_ERROR; 575 break; 576 } 577 up_write(&vmap->vm_mm->mmap_sem); 578 VM_OBJECT_WLOCK(vm_obj); 579 return (err); 580 } 581 582 static struct rwlock linux_vma_lock; 583 static TAILQ_HEAD(, vm_area_struct) linux_vma_head = 584 TAILQ_HEAD_INITIALIZER(linux_vma_head); 585 586 static void 587 linux_cdev_handle_free(struct vm_area_struct *vmap) 588 { 589 /* Drop reference on vm_file */ 590 if (vmap->vm_file != NULL) 591 fput(vmap->vm_file); 592 593 /* Drop reference on mm_struct */ 594 mmput(vmap->vm_mm); 595 596 kfree(vmap); 597 } 598 599 static void 600 linux_cdev_handle_remove(struct vm_area_struct *vmap) 601 { 602 rw_wlock(&linux_vma_lock); 603 TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry); 604 rw_wunlock(&linux_vma_lock); 605 } 606 607 static struct vm_area_struct * 608 linux_cdev_handle_find(void *handle) 609 { 610 struct vm_area_struct *vmap; 611 612 rw_rlock(&linux_vma_lock); 613 TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) { 614 if (vmap->vm_private_data == handle) 615 break; 616 } 617 rw_runlock(&linux_vma_lock); 618 return (vmap); 619 } 620 621 static int 622 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 623 vm_ooffset_t foff, struct ucred *cred, u_short *color) 624 { 625 626 MPASS(linux_cdev_handle_find(handle) != NULL); 627 *color = 0; 628 return (0); 629 } 630 631 static void 632 linux_cdev_pager_dtor(void *handle) 633 { 634 const struct vm_operations_struct *vm_ops; 635 struct vm_area_struct *vmap; 636 637 vmap = linux_cdev_handle_find(handle); 638 MPASS(vmap != NULL); 639 640 /* 641 * Remove handle before calling close operation to prevent 642 * other threads from reusing the handle pointer. 643 */ 644 linux_cdev_handle_remove(vmap); 645 646 down_write(&vmap->vm_mm->mmap_sem); 647 vm_ops = vmap->vm_ops; 648 if (likely(vm_ops != NULL)) 649 vm_ops->close(vmap); 650 up_write(&vmap->vm_mm->mmap_sem); 651 652 linux_cdev_handle_free(vmap); 653 } 654 655 static struct cdev_pager_ops linux_cdev_pager_ops[2] = { 656 { 657 /* OBJT_MGTDEVICE */ 658 .cdev_pg_populate = linux_cdev_pager_populate, 659 .cdev_pg_ctor = linux_cdev_pager_ctor, 660 .cdev_pg_dtor = linux_cdev_pager_dtor 661 }, 662 { 663 /* OBJT_DEVICE */ 664 .cdev_pg_fault = linux_cdev_pager_fault, 665 .cdev_pg_ctor = linux_cdev_pager_ctor, 666 .cdev_pg_dtor = linux_cdev_pager_dtor 667 }, 668 }; 669 670 #define OPW(fp,td,code) ({ \ 671 struct file *__fpop; \ 672 int __retval; \ 673 \ 674 __fpop = (td)->td_fpop; \ 675 (td)->td_fpop = (fp); \ 676 __retval = (code); \ 677 (td)->td_fpop = __fpop; \ 678 __retval; \ 679 }) 680 681 static int 682 linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file) 683 { 684 struct linux_cdev *ldev; 685 struct linux_file *filp; 686 int error; 687 688 ldev = dev->si_drv1; 689 690 filp = linux_file_alloc(); 691 filp->f_dentry = &filp->f_dentry_store; 692 filp->f_op = ldev->ops; 693 filp->f_mode = file->f_flag; 694 filp->f_flags = file->f_flag; 695 filp->f_vnode = file->f_vnode; 696 filp->_file = file; 697 698 linux_set_current(td); 699 700 if (filp->f_op->open) { 701 error = -filp->f_op->open(file->f_vnode, filp); 702 if (error) { 703 kfree(filp); 704 return (error); 705 } 706 } 707 708 /* hold on to the vnode - used for fstat() */ 709 vhold(filp->f_vnode); 710 711 /* release the file from devfs */ 712 finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); 713 return (ENXIO); 714 } 715 716 #define LINUX_IOCTL_MIN_PTR 0x10000UL 717 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 718 719 static inline int 720 linux_remap_address(void **uaddr, size_t len) 721 { 722 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 723 724 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 725 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 726 struct task_struct *pts = current; 727 if (pts == NULL) { 728 *uaddr = NULL; 729 return (1); 730 } 731 732 /* compute data offset */ 733 uaddr_val -= LINUX_IOCTL_MIN_PTR; 734 735 /* check that length is within bounds */ 736 if ((len > IOCPARM_MAX) || 737 (uaddr_val + len) > pts->bsd_ioctl_len) { 738 *uaddr = NULL; 739 return (1); 740 } 741 742 /* re-add kernel buffer address */ 743 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 744 745 /* update address location */ 746 *uaddr = (void *)uaddr_val; 747 return (1); 748 } 749 return (0); 750 } 751 752 int 753 linux_copyin(const void *uaddr, void *kaddr, size_t len) 754 { 755 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 756 if (uaddr == NULL) 757 return (-EFAULT); 758 memcpy(kaddr, uaddr, len); 759 return (0); 760 } 761 return (-copyin(uaddr, kaddr, len)); 762 } 763 764 int 765 linux_copyout(const void *kaddr, void *uaddr, size_t len) 766 { 767 if (linux_remap_address(&uaddr, len)) { 768 if (uaddr == NULL) 769 return (-EFAULT); 770 memcpy(uaddr, kaddr, len); 771 return (0); 772 } 773 return (-copyout(kaddr, uaddr, len)); 774 } 775 776 size_t 777 linux_clear_user(void *_uaddr, size_t _len) 778 { 779 uint8_t *uaddr = _uaddr; 780 size_t len = _len; 781 782 /* make sure uaddr is aligned before going into the fast loop */ 783 while (((uintptr_t)uaddr & 7) != 0 && len > 7) { 784 if (subyte(uaddr, 0)) 785 return (_len); 786 uaddr++; 787 len--; 788 } 789 790 /* zero 8 bytes at a time */ 791 while (len > 7) { 792 #ifdef __LP64__ 793 if (suword64(uaddr, 0)) 794 return (_len); 795 #else 796 if (suword32(uaddr, 0)) 797 return (_len); 798 if (suword32(uaddr + 4, 0)) 799 return (_len); 800 #endif 801 uaddr += 8; 802 len -= 8; 803 } 804 805 /* zero fill end, if any */ 806 while (len > 0) { 807 if (subyte(uaddr, 0)) 808 return (_len); 809 uaddr++; 810 len--; 811 } 812 return (0); 813 } 814 815 int 816 linux_access_ok(int rw, const void *uaddr, size_t len) 817 { 818 uintptr_t saddr; 819 uintptr_t eaddr; 820 821 /* get start and end address */ 822 saddr = (uintptr_t)uaddr; 823 eaddr = (uintptr_t)uaddr + len; 824 825 /* verify addresses are valid for userspace */ 826 return ((saddr == eaddr) || 827 (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); 828 } 829 830 static int 831 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, 832 u_long cmd, caddr_t data, struct thread *td) 833 { 834 unsigned size; 835 int error; 836 837 size = IOCPARM_LEN(cmd); 838 /* refer to logic in sys_ioctl() */ 839 if (size > 0) { 840 /* 841 * Setup hint for linux_copyin() and linux_copyout(). 842 * 843 * Background: Linux code expects a user-space address 844 * while FreeBSD supplies a kernel-space address. 845 */ 846 current->bsd_ioctl_data = data; 847 current->bsd_ioctl_len = size; 848 data = (void *)LINUX_IOCTL_MIN_PTR; 849 } else { 850 /* fetch user-space pointer */ 851 data = *(void **)data; 852 } 853 #if defined(__amd64__) 854 if (td->td_proc->p_elf_machine == EM_386) { 855 /* try the compat IOCTL handler first */ 856 if (filp->f_op->compat_ioctl != NULL) 857 error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data)); 858 else 859 error = ENOTTY; 860 861 /* fallback to the regular IOCTL handler, if any */ 862 if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL) 863 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 864 } else 865 #endif 866 if (filp->f_op->unlocked_ioctl != NULL) 867 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 868 else 869 error = ENOTTY; 870 if (size > 0) { 871 current->bsd_ioctl_data = NULL; 872 current->bsd_ioctl_len = 0; 873 } 874 875 if (error == EWOULDBLOCK) { 876 /* update kqfilter status, if any */ 877 linux_file_kqfilter_poll(filp, 878 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 879 } else if (error == ERESTARTSYS) 880 error = ERESTART; 881 return (error); 882 } 883 884 #define LINUX_POLL_TABLE_NORMAL ((poll_table *)1) 885 886 /* 887 * This function atomically updates the poll wakeup state and returns 888 * the previous state at the time of update. 889 */ 890 static uint8_t 891 linux_poll_wakeup_state(atomic_t *v, const uint8_t *pstate) 892 { 893 int c, old; 894 895 c = v->counter; 896 897 while ((old = atomic_cmpxchg(v, c, pstate[c])) != c) 898 c = old; 899 900 return (c); 901 } 902 903 904 static int 905 linux_poll_wakeup_callback(wait_queue_t *wq, unsigned int wq_state, int flags, void *key) 906 { 907 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 908 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 909 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 910 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_READY, 911 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_READY, /* NOP */ 912 }; 913 struct linux_file *filp = container_of(wq, struct linux_file, f_wait_queue.wq); 914 915 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 916 case LINUX_FWQ_STATE_QUEUED: 917 linux_poll_wakeup(filp); 918 return (1); 919 default: 920 return (0); 921 } 922 } 923 924 void 925 linux_poll_wait(struct linux_file *filp, wait_queue_head_t *wqh, poll_table *p) 926 { 927 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 928 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_NOT_READY, 929 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 930 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_QUEUED, /* NOP */ 931 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_QUEUED, 932 }; 933 934 /* check if we are called inside the select system call */ 935 if (p == LINUX_POLL_TABLE_NORMAL) 936 selrecord(curthread, &filp->f_selinfo); 937 938 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 939 case LINUX_FWQ_STATE_INIT: 940 /* NOTE: file handles can only belong to one wait-queue */ 941 filp->f_wait_queue.wqh = wqh; 942 filp->f_wait_queue.wq.func = &linux_poll_wakeup_callback; 943 add_wait_queue(wqh, &filp->f_wait_queue.wq); 944 atomic_set(&filp->f_wait_queue.state, LINUX_FWQ_STATE_QUEUED); 945 break; 946 default: 947 break; 948 } 949 } 950 951 static void 952 linux_poll_wait_dequeue(struct linux_file *filp) 953 { 954 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 955 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 956 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_INIT, 957 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_INIT, 958 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_INIT, 959 }; 960 961 seldrain(&filp->f_selinfo); 962 963 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 964 case LINUX_FWQ_STATE_NOT_READY: 965 case LINUX_FWQ_STATE_QUEUED: 966 case LINUX_FWQ_STATE_READY: 967 remove_wait_queue(filp->f_wait_queue.wqh, &filp->f_wait_queue.wq); 968 break; 969 default: 970 break; 971 } 972 } 973 974 void 975 linux_poll_wakeup(struct linux_file *filp) 976 { 977 /* this function should be NULL-safe */ 978 if (filp == NULL) 979 return; 980 981 selwakeup(&filp->f_selinfo); 982 983 spin_lock(&filp->f_kqlock); 984 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ | 985 LINUX_KQ_FLAG_NEED_WRITE; 986 987 /* make sure the "knote" gets woken up */ 988 KNOTE_LOCKED(&filp->f_selinfo.si_note, 1); 989 spin_unlock(&filp->f_kqlock); 990 } 991 992 static void 993 linux_file_kqfilter_detach(struct knote *kn) 994 { 995 struct linux_file *filp = kn->kn_hook; 996 997 spin_lock(&filp->f_kqlock); 998 knlist_remove(&filp->f_selinfo.si_note, kn, 1); 999 spin_unlock(&filp->f_kqlock); 1000 } 1001 1002 static int 1003 linux_file_kqfilter_read_event(struct knote *kn, long hint) 1004 { 1005 struct linux_file *filp = kn->kn_hook; 1006 1007 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1008 1009 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_READ) ? 1 : 0); 1010 } 1011 1012 static int 1013 linux_file_kqfilter_write_event(struct knote *kn, long hint) 1014 { 1015 struct linux_file *filp = kn->kn_hook; 1016 1017 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1018 1019 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_WRITE) ? 1 : 0); 1020 } 1021 1022 static struct filterops linux_dev_kqfiltops_read = { 1023 .f_isfd = 1, 1024 .f_detach = linux_file_kqfilter_detach, 1025 .f_event = linux_file_kqfilter_read_event, 1026 }; 1027 1028 static struct filterops linux_dev_kqfiltops_write = { 1029 .f_isfd = 1, 1030 .f_detach = linux_file_kqfilter_detach, 1031 .f_event = linux_file_kqfilter_write_event, 1032 }; 1033 1034 static void 1035 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags) 1036 { 1037 int temp; 1038 1039 if (filp->f_kqflags & kqflags) { 1040 struct thread *td = curthread; 1041 1042 /* get the latest polling state */ 1043 temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL)); 1044 1045 spin_lock(&filp->f_kqlock); 1046 /* clear kqflags */ 1047 filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ | 1048 LINUX_KQ_FLAG_NEED_WRITE); 1049 /* update kqflags */ 1050 if (temp & (POLLIN | POLLOUT)) { 1051 if (temp & POLLIN) 1052 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ; 1053 if (temp & POLLOUT) 1054 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE; 1055 1056 /* make sure the "knote" gets woken up */ 1057 KNOTE_LOCKED(&filp->f_selinfo.si_note, 0); 1058 } 1059 spin_unlock(&filp->f_kqlock); 1060 } 1061 } 1062 1063 static int 1064 linux_file_kqfilter(struct file *file, struct knote *kn) 1065 { 1066 struct linux_file *filp; 1067 struct thread *td; 1068 int error; 1069 1070 td = curthread; 1071 filp = (struct linux_file *)file->f_data; 1072 filp->f_flags = file->f_flag; 1073 if (filp->f_op->poll == NULL) 1074 return (EINVAL); 1075 1076 spin_lock(&filp->f_kqlock); 1077 switch (kn->kn_filter) { 1078 case EVFILT_READ: 1079 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_READ; 1080 kn->kn_fop = &linux_dev_kqfiltops_read; 1081 kn->kn_hook = filp; 1082 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1083 error = 0; 1084 break; 1085 case EVFILT_WRITE: 1086 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE; 1087 kn->kn_fop = &linux_dev_kqfiltops_write; 1088 kn->kn_hook = filp; 1089 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1090 error = 0; 1091 break; 1092 default: 1093 error = EINVAL; 1094 break; 1095 } 1096 spin_unlock(&filp->f_kqlock); 1097 1098 if (error == 0) { 1099 linux_set_current(td); 1100 1101 /* update kqfilter status, if any */ 1102 linux_file_kqfilter_poll(filp, 1103 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 1104 } 1105 return (error); 1106 } 1107 1108 static int 1109 linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, 1110 vm_size_t size, struct vm_object **object, int nprot, 1111 struct thread *td) 1112 { 1113 struct vm_area_struct *vmap; 1114 struct mm_struct *mm; 1115 struct linux_file *filp; 1116 vm_memattr_t attr; 1117 int error; 1118 1119 filp = (struct linux_file *)fp->f_data; 1120 filp->f_flags = fp->f_flag; 1121 1122 if (filp->f_op->mmap == NULL) 1123 return (EOPNOTSUPP); 1124 1125 linux_set_current(td); 1126 1127 /* 1128 * The same VM object might be shared by multiple processes 1129 * and the mm_struct is usually freed when a process exits. 1130 * 1131 * The atomic reference below makes sure the mm_struct is 1132 * available as long as the vmap is in the linux_vma_head. 1133 */ 1134 mm = current->mm; 1135 if (atomic_inc_not_zero(&mm->mm_users) == 0) 1136 return (EINVAL); 1137 1138 vmap = kzalloc(sizeof(*vmap), GFP_KERNEL); 1139 vmap->vm_start = 0; 1140 vmap->vm_end = size; 1141 vmap->vm_pgoff = *offset / PAGE_SIZE; 1142 vmap->vm_pfn = 0; 1143 vmap->vm_flags = vmap->vm_page_prot = (nprot & VM_PROT_ALL); 1144 vmap->vm_ops = NULL; 1145 vmap->vm_file = get_file(filp); 1146 vmap->vm_mm = mm; 1147 1148 if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) { 1149 error = EINTR; 1150 } else { 1151 error = -OPW(fp, td, filp->f_op->mmap(filp, vmap)); 1152 up_write(&vmap->vm_mm->mmap_sem); 1153 } 1154 1155 if (error != 0) { 1156 linux_cdev_handle_free(vmap); 1157 return (error); 1158 } 1159 1160 attr = pgprot2cachemode(vmap->vm_page_prot); 1161 1162 if (vmap->vm_ops != NULL) { 1163 struct vm_area_struct *ptr; 1164 void *vm_private_data; 1165 bool vm_no_fault; 1166 1167 if (vmap->vm_ops->open == NULL || 1168 vmap->vm_ops->close == NULL || 1169 vmap->vm_private_data == NULL) { 1170 /* free allocated VM area struct */ 1171 linux_cdev_handle_free(vmap); 1172 return (EINVAL); 1173 } 1174 1175 vm_private_data = vmap->vm_private_data; 1176 1177 rw_wlock(&linux_vma_lock); 1178 TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) { 1179 if (ptr->vm_private_data == vm_private_data) 1180 break; 1181 } 1182 /* check if there is an existing VM area struct */ 1183 if (ptr != NULL) { 1184 /* check if the VM area structure is invalid */ 1185 if (ptr->vm_ops == NULL || 1186 ptr->vm_ops->open == NULL || 1187 ptr->vm_ops->close == NULL) { 1188 error = ESTALE; 1189 vm_no_fault = 1; 1190 } else { 1191 error = EEXIST; 1192 vm_no_fault = (ptr->vm_ops->fault == NULL); 1193 } 1194 } else { 1195 /* insert VM area structure into list */ 1196 TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry); 1197 error = 0; 1198 vm_no_fault = (vmap->vm_ops->fault == NULL); 1199 } 1200 rw_wunlock(&linux_vma_lock); 1201 1202 if (error != 0) { 1203 /* free allocated VM area struct */ 1204 linux_cdev_handle_free(vmap); 1205 /* check for stale VM area struct */ 1206 if (error != EEXIST) 1207 return (error); 1208 } 1209 1210 /* check if there is no fault handler */ 1211 if (vm_no_fault) { 1212 *object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE, 1213 &linux_cdev_pager_ops[1], size, nprot, *offset, 1214 td->td_ucred); 1215 } else { 1216 *object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE, 1217 &linux_cdev_pager_ops[0], size, nprot, *offset, 1218 td->td_ucred); 1219 } 1220 1221 /* check if allocating the VM object failed */ 1222 if (*object == NULL) { 1223 if (error == 0) { 1224 /* remove VM area struct from list */ 1225 linux_cdev_handle_remove(vmap); 1226 /* free allocated VM area struct */ 1227 linux_cdev_handle_free(vmap); 1228 } 1229 return (EINVAL); 1230 } 1231 } else { 1232 struct sglist *sg; 1233 1234 sg = sglist_alloc(1, M_WAITOK); 1235 sglist_append_phys(sg, 1236 (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len); 1237 1238 *object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len, 1239 nprot, 0, td->td_ucred); 1240 1241 linux_cdev_handle_free(vmap); 1242 1243 if (*object == NULL) { 1244 sglist_free(sg); 1245 return (EINVAL); 1246 } 1247 } 1248 1249 if (attr != VM_MEMATTR_DEFAULT) { 1250 VM_OBJECT_WLOCK(*object); 1251 vm_object_set_memattr(*object, attr); 1252 VM_OBJECT_WUNLOCK(*object); 1253 } 1254 *offset = 0; 1255 return (0); 1256 } 1257 1258 struct cdevsw linuxcdevsw = { 1259 .d_version = D_VERSION, 1260 .d_fdopen = linux_dev_fdopen, 1261 .d_name = "lkpidev", 1262 }; 1263 1264 static int 1265 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 1266 int flags, struct thread *td) 1267 { 1268 struct linux_file *filp; 1269 ssize_t bytes; 1270 int error; 1271 1272 error = 0; 1273 filp = (struct linux_file *)file->f_data; 1274 filp->f_flags = file->f_flag; 1275 /* XXX no support for I/O vectors currently */ 1276 if (uio->uio_iovcnt != 1) 1277 return (EOPNOTSUPP); 1278 linux_set_current(td); 1279 if (filp->f_op->read) { 1280 bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base, 1281 uio->uio_iov->iov_len, &uio->uio_offset)); 1282 if (bytes >= 0) { 1283 uio->uio_iov->iov_base = 1284 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1285 uio->uio_iov->iov_len -= bytes; 1286 uio->uio_resid -= bytes; 1287 } else { 1288 error = -bytes; 1289 if (error == ERESTARTSYS) 1290 error = ERESTART; 1291 } 1292 } else 1293 error = ENXIO; 1294 1295 /* update kqfilter status, if any */ 1296 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ); 1297 1298 return (error); 1299 } 1300 1301 static int 1302 linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred, 1303 int flags, struct thread *td) 1304 { 1305 struct linux_file *filp; 1306 ssize_t bytes; 1307 int error; 1308 1309 error = 0; 1310 filp = (struct linux_file *)file->f_data; 1311 filp->f_flags = file->f_flag; 1312 /* XXX no support for I/O vectors currently */ 1313 if (uio->uio_iovcnt != 1) 1314 return (EOPNOTSUPP); 1315 linux_set_current(td); 1316 if (filp->f_op->write) { 1317 bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base, 1318 uio->uio_iov->iov_len, &uio->uio_offset)); 1319 if (bytes >= 0) { 1320 uio->uio_iov->iov_base = 1321 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1322 uio->uio_iov->iov_len -= bytes; 1323 uio->uio_resid -= bytes; 1324 } else { 1325 error = -bytes; 1326 if (error == ERESTARTSYS) 1327 error = ERESTART; 1328 } 1329 } else 1330 error = ENXIO; 1331 1332 /* update kqfilter status, if any */ 1333 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE); 1334 1335 return (error); 1336 } 1337 1338 static int 1339 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 1340 struct thread *td) 1341 { 1342 struct linux_file *filp; 1343 int revents; 1344 1345 filp = (struct linux_file *)file->f_data; 1346 filp->f_flags = file->f_flag; 1347 linux_set_current(td); 1348 if (filp->f_op->poll != NULL) 1349 revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events; 1350 else 1351 revents = 0; 1352 1353 return (revents); 1354 } 1355 1356 static int 1357 linux_file_close(struct file *file, struct thread *td) 1358 { 1359 struct linux_file *filp; 1360 int error; 1361 1362 filp = (struct linux_file *)file->f_data; 1363 1364 KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp))); 1365 1366 filp->f_flags = file->f_flag; 1367 linux_set_current(td); 1368 linux_poll_wait_dequeue(filp); 1369 error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp)); 1370 funsetown(&filp->f_sigio); 1371 if (filp->f_vnode != NULL) 1372 vdrop(filp->f_vnode); 1373 kfree(filp); 1374 1375 return (error); 1376 } 1377 1378 static int 1379 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 1380 struct thread *td) 1381 { 1382 struct linux_file *filp; 1383 int error; 1384 1385 filp = (struct linux_file *)fp->f_data; 1386 filp->f_flags = fp->f_flag; 1387 error = 0; 1388 1389 linux_set_current(td); 1390 switch (cmd) { 1391 case FIONBIO: 1392 break; 1393 case FIOASYNC: 1394 if (filp->f_op->fasync == NULL) 1395 break; 1396 error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC)); 1397 break; 1398 case FIOSETOWN: 1399 error = fsetown(*(int *)data, &filp->f_sigio); 1400 if (error == 0) { 1401 if (filp->f_op->fasync == NULL) 1402 break; 1403 error = -OPW(fp, td, filp->f_op->fasync(0, filp, 1404 fp->f_flag & FASYNC)); 1405 } 1406 break; 1407 case FIOGETOWN: 1408 *(int *)data = fgetown(&filp->f_sigio); 1409 break; 1410 default: 1411 error = linux_file_ioctl_sub(fp, filp, cmd, data, td); 1412 break; 1413 } 1414 return (error); 1415 } 1416 1417 static int 1418 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1419 vm_prot_t *maxprotp, int *flagsp, struct file *fp, 1420 vm_ooffset_t *foff, vm_object_t *objp) 1421 { 1422 /* 1423 * Character devices do not provide private mappings 1424 * of any kind: 1425 */ 1426 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1427 (prot & VM_PROT_WRITE) != 0) 1428 return (EACCES); 1429 if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0) 1430 return (EINVAL); 1431 1432 return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td)); 1433 } 1434 1435 static int 1436 linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1437 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1438 struct thread *td) 1439 { 1440 struct linux_file *filp; 1441 struct mount *mp; 1442 struct vnode *vp; 1443 vm_object_t object; 1444 vm_prot_t maxprot; 1445 int error; 1446 1447 filp = (struct linux_file *)fp->f_data; 1448 1449 vp = filp->f_vnode; 1450 if (vp == NULL) 1451 return (EOPNOTSUPP); 1452 1453 /* 1454 * Ensure that file and memory protections are 1455 * compatible. 1456 */ 1457 mp = vp->v_mount; 1458 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1459 maxprot = VM_PROT_NONE; 1460 if ((prot & VM_PROT_EXECUTE) != 0) 1461 return (EACCES); 1462 } else 1463 maxprot = VM_PROT_EXECUTE; 1464 if ((fp->f_flag & FREAD) != 0) 1465 maxprot |= VM_PROT_READ; 1466 else if ((prot & VM_PROT_READ) != 0) 1467 return (EACCES); 1468 1469 /* 1470 * If we are sharing potential changes via MAP_SHARED and we 1471 * are trying to get write permission although we opened it 1472 * without asking for it, bail out. 1473 * 1474 * Note that most character devices always share mappings. 1475 * 1476 * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE 1477 * requests rather than doing it here. 1478 */ 1479 if ((flags & MAP_SHARED) != 0) { 1480 if ((fp->f_flag & FWRITE) != 0) 1481 maxprot |= VM_PROT_WRITE; 1482 else if ((prot & VM_PROT_WRITE) != 0) 1483 return (EACCES); 1484 } 1485 maxprot &= cap_maxprot; 1486 1487 error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff, 1488 &object); 1489 if (error != 0) 1490 return (error); 1491 1492 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1493 foff, FALSE, td); 1494 if (error != 0) 1495 vm_object_deallocate(object); 1496 return (error); 1497 } 1498 1499 static int 1500 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 1501 struct thread *td) 1502 { 1503 struct linux_file *filp; 1504 struct vnode *vp; 1505 int error; 1506 1507 filp = (struct linux_file *)fp->f_data; 1508 if (filp->f_vnode == NULL) 1509 return (EOPNOTSUPP); 1510 1511 vp = filp->f_vnode; 1512 1513 vn_lock(vp, LK_SHARED | LK_RETRY); 1514 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 1515 VOP_UNLOCK(vp, 0); 1516 1517 return (error); 1518 } 1519 1520 static int 1521 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 1522 struct filedesc *fdp) 1523 { 1524 1525 return (0); 1526 } 1527 1528 unsigned int 1529 linux_iminor(struct inode *inode) 1530 { 1531 struct linux_cdev *ldev; 1532 1533 if (inode == NULL || inode->v_rdev == NULL || 1534 inode->v_rdev->si_devsw != &linuxcdevsw) 1535 return (-1U); 1536 ldev = inode->v_rdev->si_drv1; 1537 if (ldev == NULL) 1538 return (-1U); 1539 1540 return (minor(ldev->dev)); 1541 } 1542 1543 struct fileops linuxfileops = { 1544 .fo_read = linux_file_read, 1545 .fo_write = linux_file_write, 1546 .fo_truncate = invfo_truncate, 1547 .fo_kqfilter = linux_file_kqfilter, 1548 .fo_stat = linux_file_stat, 1549 .fo_fill_kinfo = linux_file_fill_kinfo, 1550 .fo_poll = linux_file_poll, 1551 .fo_close = linux_file_close, 1552 .fo_ioctl = linux_file_ioctl, 1553 .fo_mmap = linux_file_mmap, 1554 .fo_chmod = invfo_chmod, 1555 .fo_chown = invfo_chown, 1556 .fo_sendfile = invfo_sendfile, 1557 }; 1558 1559 /* 1560 * Hash of vmmap addresses. This is infrequently accessed and does not 1561 * need to be particularly large. This is done because we must store the 1562 * caller's idea of the map size to properly unmap. 1563 */ 1564 struct vmmap { 1565 LIST_ENTRY(vmmap) vm_next; 1566 void *vm_addr; 1567 unsigned long vm_size; 1568 }; 1569 1570 struct vmmaphd { 1571 struct vmmap *lh_first; 1572 }; 1573 #define VMMAP_HASH_SIZE 64 1574 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 1575 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 1576 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 1577 static struct mtx vmmaplock; 1578 1579 static void 1580 vmmap_add(void *addr, unsigned long size) 1581 { 1582 struct vmmap *vmmap; 1583 1584 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 1585 mtx_lock(&vmmaplock); 1586 vmmap->vm_size = size; 1587 vmmap->vm_addr = addr; 1588 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 1589 mtx_unlock(&vmmaplock); 1590 } 1591 1592 static struct vmmap * 1593 vmmap_remove(void *addr) 1594 { 1595 struct vmmap *vmmap; 1596 1597 mtx_lock(&vmmaplock); 1598 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 1599 if (vmmap->vm_addr == addr) 1600 break; 1601 if (vmmap) 1602 LIST_REMOVE(vmmap, vm_next); 1603 mtx_unlock(&vmmaplock); 1604 1605 return (vmmap); 1606 } 1607 1608 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1609 void * 1610 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 1611 { 1612 void *addr; 1613 1614 addr = pmap_mapdev_attr(phys_addr, size, attr); 1615 if (addr == NULL) 1616 return (NULL); 1617 vmmap_add(addr, size); 1618 1619 return (addr); 1620 } 1621 #endif 1622 1623 void 1624 iounmap(void *addr) 1625 { 1626 struct vmmap *vmmap; 1627 1628 vmmap = vmmap_remove(addr); 1629 if (vmmap == NULL) 1630 return; 1631 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1632 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 1633 #endif 1634 kfree(vmmap); 1635 } 1636 1637 1638 void * 1639 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 1640 { 1641 vm_offset_t off; 1642 size_t size; 1643 1644 size = count * PAGE_SIZE; 1645 off = kva_alloc(size); 1646 if (off == 0) 1647 return (NULL); 1648 vmmap_add((void *)off, size); 1649 pmap_qenter(off, pages, count); 1650 1651 return ((void *)off); 1652 } 1653 1654 void 1655 vunmap(void *addr) 1656 { 1657 struct vmmap *vmmap; 1658 1659 vmmap = vmmap_remove(addr); 1660 if (vmmap == NULL) 1661 return; 1662 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 1663 kva_free((vm_offset_t)addr, vmmap->vm_size); 1664 kfree(vmmap); 1665 } 1666 1667 char * 1668 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1669 { 1670 unsigned int len; 1671 char *p; 1672 va_list aq; 1673 1674 va_copy(aq, ap); 1675 len = vsnprintf(NULL, 0, fmt, aq); 1676 va_end(aq); 1677 1678 p = kmalloc(len + 1, gfp); 1679 if (p != NULL) 1680 vsnprintf(p, len + 1, fmt, ap); 1681 1682 return (p); 1683 } 1684 1685 char * 1686 kasprintf(gfp_t gfp, const char *fmt, ...) 1687 { 1688 va_list ap; 1689 char *p; 1690 1691 va_start(ap, fmt); 1692 p = kvasprintf(gfp, fmt, ap); 1693 va_end(ap); 1694 1695 return (p); 1696 } 1697 1698 static void 1699 linux_timer_callback_wrapper(void *context) 1700 { 1701 struct timer_list *timer; 1702 1703 linux_set_current(curthread); 1704 1705 timer = context; 1706 timer->function(timer->data); 1707 } 1708 1709 void 1710 mod_timer(struct timer_list *timer, int expires) 1711 { 1712 1713 timer->expires = expires; 1714 callout_reset(&timer->timer_callout, 1715 linux_timer_jiffies_until(expires), 1716 &linux_timer_callback_wrapper, timer); 1717 } 1718 1719 void 1720 add_timer(struct timer_list *timer) 1721 { 1722 1723 callout_reset(&timer->timer_callout, 1724 linux_timer_jiffies_until(timer->expires), 1725 &linux_timer_callback_wrapper, timer); 1726 } 1727 1728 void 1729 add_timer_on(struct timer_list *timer, int cpu) 1730 { 1731 1732 callout_reset_on(&timer->timer_callout, 1733 linux_timer_jiffies_until(timer->expires), 1734 &linux_timer_callback_wrapper, timer, cpu); 1735 } 1736 1737 static void 1738 linux_timer_init(void *arg) 1739 { 1740 1741 /* 1742 * Compute an internal HZ value which can divide 2**32 to 1743 * avoid timer rounding problems when the tick value wraps 1744 * around 2**32: 1745 */ 1746 linux_timer_hz_mask = 1; 1747 while (linux_timer_hz_mask < (unsigned long)hz) 1748 linux_timer_hz_mask *= 2; 1749 linux_timer_hz_mask--; 1750 } 1751 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1752 1753 void 1754 linux_complete_common(struct completion *c, int all) 1755 { 1756 int wakeup_swapper; 1757 1758 sleepq_lock(c); 1759 c->done++; 1760 if (all) 1761 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1762 else 1763 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1764 sleepq_release(c); 1765 if (wakeup_swapper) 1766 kick_proc0(); 1767 } 1768 1769 /* 1770 * Indefinite wait for done != 0 with or without signals. 1771 */ 1772 int 1773 linux_wait_for_common(struct completion *c, int flags) 1774 { 1775 int error; 1776 1777 if (SCHEDULER_STOPPED()) 1778 return (0); 1779 1780 DROP_GIANT(); 1781 1782 if (flags != 0) 1783 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1784 else 1785 flags = SLEEPQ_SLEEP; 1786 error = 0; 1787 for (;;) { 1788 sleepq_lock(c); 1789 if (c->done) 1790 break; 1791 sleepq_add(c, NULL, "completion", flags, 0); 1792 if (flags & SLEEPQ_INTERRUPTIBLE) { 1793 if (sleepq_wait_sig(c, 0) != 0) { 1794 error = -ERESTARTSYS; 1795 goto intr; 1796 } 1797 } else 1798 sleepq_wait(c, 0); 1799 } 1800 c->done--; 1801 sleepq_release(c); 1802 1803 intr: 1804 PICKUP_GIANT(); 1805 1806 return (error); 1807 } 1808 1809 /* 1810 * Time limited wait for done != 0 with or without signals. 1811 */ 1812 int 1813 linux_wait_for_timeout_common(struct completion *c, int timeout, int flags) 1814 { 1815 int end = jiffies + timeout; 1816 int error; 1817 int ret; 1818 1819 if (SCHEDULER_STOPPED()) 1820 return (0); 1821 1822 DROP_GIANT(); 1823 1824 if (flags != 0) 1825 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1826 else 1827 flags = SLEEPQ_SLEEP; 1828 1829 error = 0; 1830 ret = 0; 1831 for (;;) { 1832 sleepq_lock(c); 1833 if (c->done) 1834 break; 1835 sleepq_add(c, NULL, "completion", flags, 0); 1836 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1837 if (flags & SLEEPQ_INTERRUPTIBLE) 1838 ret = sleepq_timedwait_sig(c, 0); 1839 else 1840 ret = sleepq_timedwait(c, 0); 1841 if (ret != 0) { 1842 /* check for timeout or signal */ 1843 if (ret == EWOULDBLOCK) 1844 error = 0; 1845 else 1846 error = -ERESTARTSYS; 1847 goto intr; 1848 } 1849 } 1850 c->done--; 1851 sleepq_release(c); 1852 1853 intr: 1854 PICKUP_GIANT(); 1855 1856 /* return how many jiffies are left */ 1857 return (ret != 0 ? error : linux_timer_jiffies_until(end)); 1858 } 1859 1860 int 1861 linux_try_wait_for_completion(struct completion *c) 1862 { 1863 int isdone; 1864 1865 isdone = 1; 1866 sleepq_lock(c); 1867 if (c->done) 1868 c->done--; 1869 else 1870 isdone = 0; 1871 sleepq_release(c); 1872 return (isdone); 1873 } 1874 1875 int 1876 linux_completion_done(struct completion *c) 1877 { 1878 int isdone; 1879 1880 isdone = 1; 1881 sleepq_lock(c); 1882 if (c->done == 0) 1883 isdone = 0; 1884 sleepq_release(c); 1885 return (isdone); 1886 } 1887 1888 static void 1889 linux_cdev_release(struct kobject *kobj) 1890 { 1891 struct linux_cdev *cdev; 1892 struct kobject *parent; 1893 1894 cdev = container_of(kobj, struct linux_cdev, kobj); 1895 parent = kobj->parent; 1896 if (cdev->cdev) 1897 destroy_dev(cdev->cdev); 1898 kfree(cdev); 1899 kobject_put(parent); 1900 } 1901 1902 static void 1903 linux_cdev_static_release(struct kobject *kobj) 1904 { 1905 struct linux_cdev *cdev; 1906 struct kobject *parent; 1907 1908 cdev = container_of(kobj, struct linux_cdev, kobj); 1909 parent = kobj->parent; 1910 if (cdev->cdev) 1911 destroy_dev(cdev->cdev); 1912 kobject_put(parent); 1913 } 1914 1915 const struct kobj_type linux_cdev_ktype = { 1916 .release = linux_cdev_release, 1917 }; 1918 1919 const struct kobj_type linux_cdev_static_ktype = { 1920 .release = linux_cdev_static_release, 1921 }; 1922 1923 static void 1924 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1925 { 1926 struct notifier_block *nb; 1927 1928 nb = arg; 1929 if (linkstate == LINK_STATE_UP) 1930 nb->notifier_call(nb, NETDEV_UP, ifp); 1931 else 1932 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1933 } 1934 1935 static void 1936 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1937 { 1938 struct notifier_block *nb; 1939 1940 nb = arg; 1941 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1942 } 1943 1944 static void 1945 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1946 { 1947 struct notifier_block *nb; 1948 1949 nb = arg; 1950 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1951 } 1952 1953 static void 1954 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1955 { 1956 struct notifier_block *nb; 1957 1958 nb = arg; 1959 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1960 } 1961 1962 static void 1963 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1964 { 1965 struct notifier_block *nb; 1966 1967 nb = arg; 1968 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1969 } 1970 1971 int 1972 register_netdevice_notifier(struct notifier_block *nb) 1973 { 1974 1975 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1976 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1977 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1978 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1979 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1980 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1981 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1982 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1983 1984 return (0); 1985 } 1986 1987 int 1988 register_inetaddr_notifier(struct notifier_block *nb) 1989 { 1990 1991 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1992 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1993 return (0); 1994 } 1995 1996 int 1997 unregister_netdevice_notifier(struct notifier_block *nb) 1998 { 1999 2000 EVENTHANDLER_DEREGISTER(ifnet_link_event, 2001 nb->tags[NETDEV_UP]); 2002 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 2003 nb->tags[NETDEV_REGISTER]); 2004 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2005 nb->tags[NETDEV_UNREGISTER]); 2006 EVENTHANDLER_DEREGISTER(iflladdr_event, 2007 nb->tags[NETDEV_CHANGEADDR]); 2008 2009 return (0); 2010 } 2011 2012 int 2013 unregister_inetaddr_notifier(struct notifier_block *nb) 2014 { 2015 2016 EVENTHANDLER_DEREGISTER(ifaddr_event, 2017 nb->tags[NETDEV_CHANGEIFADDR]); 2018 2019 return (0); 2020 } 2021 2022 struct list_sort_thunk { 2023 int (*cmp)(void *, struct list_head *, struct list_head *); 2024 void *priv; 2025 }; 2026 2027 static inline int 2028 linux_le_cmp(void *priv, const void *d1, const void *d2) 2029 { 2030 struct list_head *le1, *le2; 2031 struct list_sort_thunk *thunk; 2032 2033 thunk = priv; 2034 le1 = *(__DECONST(struct list_head **, d1)); 2035 le2 = *(__DECONST(struct list_head **, d2)); 2036 return ((thunk->cmp)(thunk->priv, le1, le2)); 2037 } 2038 2039 void 2040 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 2041 struct list_head *a, struct list_head *b)) 2042 { 2043 struct list_sort_thunk thunk; 2044 struct list_head **ar, *le; 2045 size_t count, i; 2046 2047 count = 0; 2048 list_for_each(le, head) 2049 count++; 2050 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 2051 i = 0; 2052 list_for_each(le, head) 2053 ar[i++] = le; 2054 thunk.cmp = cmp; 2055 thunk.priv = priv; 2056 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 2057 INIT_LIST_HEAD(head); 2058 for (i = 0; i < count; i++) 2059 list_add_tail(ar[i], head); 2060 free(ar, M_KMALLOC); 2061 } 2062 2063 void 2064 linux_irq_handler(void *ent) 2065 { 2066 struct irq_ent *irqe; 2067 2068 linux_set_current(curthread); 2069 2070 irqe = ent; 2071 irqe->handler(irqe->irq, irqe->arg); 2072 } 2073 2074 #if defined(__i386__) || defined(__amd64__) 2075 int 2076 linux_wbinvd_on_all_cpus(void) 2077 { 2078 2079 pmap_invalidate_cache(); 2080 return (0); 2081 } 2082 #endif 2083 2084 int 2085 linux_on_each_cpu(void callback(void *), void *data) 2086 { 2087 2088 smp_rendezvous(smp_no_rendezvous_barrier, callback, 2089 smp_no_rendezvous_barrier, data); 2090 return (0); 2091 } 2092 2093 int 2094 linux_in_atomic(void) 2095 { 2096 2097 return ((curthread->td_pflags & TDP_NOFAULTING) != 0); 2098 } 2099 2100 struct linux_cdev * 2101 linux_find_cdev(const char *name, unsigned major, unsigned minor) 2102 { 2103 dev_t dev = MKDEV(major, minor); 2104 struct cdev *cdev; 2105 2106 dev_lock(); 2107 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 2108 struct linux_cdev *ldev = cdev->si_drv1; 2109 if (ldev->dev == dev && 2110 strcmp(kobject_name(&ldev->kobj), name) == 0) { 2111 break; 2112 } 2113 } 2114 dev_unlock(); 2115 2116 return (cdev != NULL ? cdev->si_drv1 : NULL); 2117 } 2118 2119 int 2120 __register_chrdev(unsigned int major, unsigned int baseminor, 2121 unsigned int count, const char *name, 2122 const struct file_operations *fops) 2123 { 2124 struct linux_cdev *cdev; 2125 int ret = 0; 2126 int i; 2127 2128 for (i = baseminor; i < baseminor + count; i++) { 2129 cdev = cdev_alloc(); 2130 cdev_init(cdev, fops); 2131 kobject_set_name(&cdev->kobj, name); 2132 2133 ret = cdev_add(cdev, makedev(major, i), 1); 2134 if (ret != 0) 2135 break; 2136 } 2137 return (ret); 2138 } 2139 2140 int 2141 __register_chrdev_p(unsigned int major, unsigned int baseminor, 2142 unsigned int count, const char *name, 2143 const struct file_operations *fops, uid_t uid, 2144 gid_t gid, int mode) 2145 { 2146 struct linux_cdev *cdev; 2147 int ret = 0; 2148 int i; 2149 2150 for (i = baseminor; i < baseminor + count; i++) { 2151 cdev = cdev_alloc(); 2152 cdev_init(cdev, fops); 2153 kobject_set_name(&cdev->kobj, name); 2154 2155 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 2156 if (ret != 0) 2157 break; 2158 } 2159 return (ret); 2160 } 2161 2162 void 2163 __unregister_chrdev(unsigned int major, unsigned int baseminor, 2164 unsigned int count, const char *name) 2165 { 2166 struct linux_cdev *cdevp; 2167 int i; 2168 2169 for (i = baseminor; i < baseminor + count; i++) { 2170 cdevp = linux_find_cdev(name, major, i); 2171 if (cdevp != NULL) 2172 cdev_del(cdevp); 2173 } 2174 } 2175 2176 #if defined(__i386__) || defined(__amd64__) 2177 bool linux_cpu_has_clflush; 2178 #endif 2179 2180 static void 2181 linux_compat_init(void *arg) 2182 { 2183 struct sysctl_oid *rootoid; 2184 int i; 2185 2186 #if defined(__i386__) || defined(__amd64__) 2187 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 2188 #endif 2189 rw_init(&linux_vma_lock, "lkpi-vma-lock"); 2190 2191 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 2192 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 2193 kobject_init(&linux_class_root, &linux_class_ktype); 2194 kobject_set_name(&linux_class_root, "class"); 2195 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 2196 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 2197 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 2198 kobject_set_name(&linux_root_device.kobj, "device"); 2199 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 2200 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 2201 "device"); 2202 linux_root_device.bsddev = root_bus; 2203 linux_class_misc.name = "misc"; 2204 class_register(&linux_class_misc); 2205 INIT_LIST_HEAD(&pci_drivers); 2206 INIT_LIST_HEAD(&pci_devices); 2207 spin_lock_init(&pci_lock); 2208 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 2209 for (i = 0; i < VMMAP_HASH_SIZE; i++) 2210 LIST_INIT(&vmmaphead[i]); 2211 } 2212 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 2213 2214 static void 2215 linux_compat_uninit(void *arg) 2216 { 2217 linux_kobject_kfree_name(&linux_class_root); 2218 linux_kobject_kfree_name(&linux_root_device.kobj); 2219 linux_kobject_kfree_name(&linux_class_misc.kobj); 2220 2221 mtx_destroy(&vmmaplock); 2222 spin_lock_destroy(&pci_lock); 2223 rw_destroy(&linux_vma_lock); 2224 } 2225 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 2226 2227 /* 2228 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 2229 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 2230 * used. Assert these types have the same size, else some parts of the 2231 * LinuxKPI may not work like expected: 2232 */ 2233 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 2234