1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_stack.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/kernel.h> 39 #include <sys/sysctl.h> 40 #include <sys/proc.h> 41 #include <sys/sglist.h> 42 #include <sys/sleepqueue.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/bus.h> 46 #include <sys/fcntl.h> 47 #include <sys/file.h> 48 #include <sys/filio.h> 49 #include <sys/rwlock.h> 50 #include <sys/mman.h> 51 #include <sys/stack.h> 52 53 #include <vm/vm.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_pager.h> 58 59 #include <machine/stdarg.h> 60 61 #if defined(__i386__) || defined(__amd64__) 62 #include <machine/md_var.h> 63 #endif 64 65 #include <linux/kobject.h> 66 #include <linux/device.h> 67 #include <linux/slab.h> 68 #include <linux/module.h> 69 #include <linux/moduleparam.h> 70 #include <linux/cdev.h> 71 #include <linux/file.h> 72 #include <linux/sysfs.h> 73 #include <linux/mm.h> 74 #include <linux/io.h> 75 #include <linux/vmalloc.h> 76 #include <linux/netdevice.h> 77 #include <linux/timer.h> 78 #include <linux/interrupt.h> 79 #include <linux/uaccess.h> 80 #include <linux/list.h> 81 #include <linux/kthread.h> 82 #include <linux/kernel.h> 83 #include <linux/compat.h> 84 #include <linux/poll.h> 85 #include <linux/smp.h> 86 87 #if defined(__i386__) || defined(__amd64__) 88 #include <asm/smp.h> 89 #endif 90 91 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 92 93 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 94 95 #include <linux/rbtree.h> 96 /* Undo Linux compat changes. */ 97 #undef RB_ROOT 98 #undef file 99 #undef cdev 100 #define RB_ROOT(head) (head)->rbh_root 101 102 static struct vm_area_struct *linux_cdev_handle_find(void *handle); 103 104 struct kobject linux_class_root; 105 struct device linux_root_device; 106 struct class linux_class_misc; 107 struct list_head pci_drivers; 108 struct list_head pci_devices; 109 spinlock_t pci_lock; 110 111 unsigned long linux_timer_hz_mask; 112 113 int 114 panic_cmp(struct rb_node *one, struct rb_node *two) 115 { 116 panic("no cmp"); 117 } 118 119 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 120 121 int 122 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 123 { 124 va_list tmp_va; 125 int len; 126 char *old; 127 char *name; 128 char dummy; 129 130 old = kobj->name; 131 132 if (old && fmt == NULL) 133 return (0); 134 135 /* compute length of string */ 136 va_copy(tmp_va, args); 137 len = vsnprintf(&dummy, 0, fmt, tmp_va); 138 va_end(tmp_va); 139 140 /* account for zero termination */ 141 len++; 142 143 /* check for error */ 144 if (len < 1) 145 return (-EINVAL); 146 147 /* allocate memory for string */ 148 name = kzalloc(len, GFP_KERNEL); 149 if (name == NULL) 150 return (-ENOMEM); 151 vsnprintf(name, len, fmt, args); 152 kobj->name = name; 153 154 /* free old string */ 155 kfree(old); 156 157 /* filter new string */ 158 for (; *name != '\0'; name++) 159 if (*name == '/') 160 *name = '!'; 161 return (0); 162 } 163 164 int 165 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 166 { 167 va_list args; 168 int error; 169 170 va_start(args, fmt); 171 error = kobject_set_name_vargs(kobj, fmt, args); 172 va_end(args); 173 174 return (error); 175 } 176 177 static int 178 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 179 { 180 const struct kobj_type *t; 181 int error; 182 183 kobj->parent = parent; 184 error = sysfs_create_dir(kobj); 185 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 186 struct attribute **attr; 187 t = kobj->ktype; 188 189 for (attr = t->default_attrs; *attr != NULL; attr++) { 190 error = sysfs_create_file(kobj, *attr); 191 if (error) 192 break; 193 } 194 if (error) 195 sysfs_remove_dir(kobj); 196 197 } 198 return (error); 199 } 200 201 int 202 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 203 { 204 va_list args; 205 int error; 206 207 va_start(args, fmt); 208 error = kobject_set_name_vargs(kobj, fmt, args); 209 va_end(args); 210 if (error) 211 return (error); 212 213 return kobject_add_complete(kobj, parent); 214 } 215 216 void 217 linux_kobject_release(struct kref *kref) 218 { 219 struct kobject *kobj; 220 char *name; 221 222 kobj = container_of(kref, struct kobject, kref); 223 sysfs_remove_dir(kobj); 224 name = kobj->name; 225 if (kobj->ktype && kobj->ktype->release) 226 kobj->ktype->release(kobj); 227 kfree(name); 228 } 229 230 static void 231 linux_kobject_kfree(struct kobject *kobj) 232 { 233 kfree(kobj); 234 } 235 236 static void 237 linux_kobject_kfree_name(struct kobject *kobj) 238 { 239 if (kobj) { 240 kfree(kobj->name); 241 } 242 } 243 244 const struct kobj_type linux_kfree_type = { 245 .release = linux_kobject_kfree 246 }; 247 248 static void 249 linux_device_release(struct device *dev) 250 { 251 pr_debug("linux_device_release: %s\n", dev_name(dev)); 252 kfree(dev); 253 } 254 255 static ssize_t 256 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 257 { 258 struct class_attribute *dattr; 259 ssize_t error; 260 261 dattr = container_of(attr, struct class_attribute, attr); 262 error = -EIO; 263 if (dattr->show) 264 error = dattr->show(container_of(kobj, struct class, kobj), 265 dattr, buf); 266 return (error); 267 } 268 269 static ssize_t 270 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 271 size_t count) 272 { 273 struct class_attribute *dattr; 274 ssize_t error; 275 276 dattr = container_of(attr, struct class_attribute, attr); 277 error = -EIO; 278 if (dattr->store) 279 error = dattr->store(container_of(kobj, struct class, kobj), 280 dattr, buf, count); 281 return (error); 282 } 283 284 static void 285 linux_class_release(struct kobject *kobj) 286 { 287 struct class *class; 288 289 class = container_of(kobj, struct class, kobj); 290 if (class->class_release) 291 class->class_release(class); 292 } 293 294 static const struct sysfs_ops linux_class_sysfs = { 295 .show = linux_class_show, 296 .store = linux_class_store, 297 }; 298 299 const struct kobj_type linux_class_ktype = { 300 .release = linux_class_release, 301 .sysfs_ops = &linux_class_sysfs 302 }; 303 304 static void 305 linux_dev_release(struct kobject *kobj) 306 { 307 struct device *dev; 308 309 dev = container_of(kobj, struct device, kobj); 310 /* This is the precedence defined by linux. */ 311 if (dev->release) 312 dev->release(dev); 313 else if (dev->class && dev->class->dev_release) 314 dev->class->dev_release(dev); 315 } 316 317 static ssize_t 318 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 319 { 320 struct device_attribute *dattr; 321 ssize_t error; 322 323 dattr = container_of(attr, struct device_attribute, attr); 324 error = -EIO; 325 if (dattr->show) 326 error = dattr->show(container_of(kobj, struct device, kobj), 327 dattr, buf); 328 return (error); 329 } 330 331 static ssize_t 332 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 333 size_t count) 334 { 335 struct device_attribute *dattr; 336 ssize_t error; 337 338 dattr = container_of(attr, struct device_attribute, attr); 339 error = -EIO; 340 if (dattr->store) 341 error = dattr->store(container_of(kobj, struct device, kobj), 342 dattr, buf, count); 343 return (error); 344 } 345 346 static const struct sysfs_ops linux_dev_sysfs = { 347 .show = linux_dev_show, 348 .store = linux_dev_store, 349 }; 350 351 const struct kobj_type linux_dev_ktype = { 352 .release = linux_dev_release, 353 .sysfs_ops = &linux_dev_sysfs 354 }; 355 356 struct device * 357 device_create(struct class *class, struct device *parent, dev_t devt, 358 void *drvdata, const char *fmt, ...) 359 { 360 struct device *dev; 361 va_list args; 362 363 dev = kzalloc(sizeof(*dev), M_WAITOK); 364 dev->parent = parent; 365 dev->class = class; 366 dev->devt = devt; 367 dev->driver_data = drvdata; 368 dev->release = linux_device_release; 369 va_start(args, fmt); 370 kobject_set_name_vargs(&dev->kobj, fmt, args); 371 va_end(args); 372 device_register(dev); 373 374 return (dev); 375 } 376 377 int 378 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 379 struct kobject *parent, const char *fmt, ...) 380 { 381 va_list args; 382 int error; 383 384 kobject_init(kobj, ktype); 385 kobj->ktype = ktype; 386 kobj->parent = parent; 387 kobj->name = NULL; 388 389 va_start(args, fmt); 390 error = kobject_set_name_vargs(kobj, fmt, args); 391 va_end(args); 392 if (error) 393 return (error); 394 return kobject_add_complete(kobj, parent); 395 } 396 397 static void 398 linux_kq_lock(void *arg) 399 { 400 spinlock_t *s = arg; 401 402 spin_lock(s); 403 } 404 static void 405 linux_kq_unlock(void *arg) 406 { 407 spinlock_t *s = arg; 408 409 spin_unlock(s); 410 } 411 412 static void 413 linux_kq_lock_owned(void *arg) 414 { 415 #ifdef INVARIANTS 416 spinlock_t *s = arg; 417 418 mtx_assert(&s->m, MA_OWNED); 419 #endif 420 } 421 422 static void 423 linux_kq_lock_unowned(void *arg) 424 { 425 #ifdef INVARIANTS 426 spinlock_t *s = arg; 427 428 mtx_assert(&s->m, MA_NOTOWNED); 429 #endif 430 } 431 432 static void 433 linux_file_kqfilter_poll(struct linux_file *, int); 434 435 struct linux_file * 436 linux_file_alloc(void) 437 { 438 struct linux_file *filp; 439 440 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 441 442 /* set initial refcount */ 443 filp->f_count = 1; 444 445 /* setup fields needed by kqueue support */ 446 spin_lock_init(&filp->f_kqlock); 447 knlist_init(&filp->f_selinfo.si_note, &filp->f_kqlock, 448 linux_kq_lock, linux_kq_unlock, 449 linux_kq_lock_owned, linux_kq_lock_unowned); 450 451 return (filp); 452 } 453 454 void 455 linux_file_free(struct linux_file *filp) 456 { 457 if (filp->_file == NULL) { 458 if (filp->f_shmem != NULL) 459 vm_object_deallocate(filp->f_shmem); 460 kfree(filp); 461 } else { 462 /* 463 * The close method of the character device or file 464 * will free the linux_file structure: 465 */ 466 _fdrop(filp->_file, curthread); 467 } 468 } 469 470 static int 471 linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 472 vm_page_t *mres) 473 { 474 struct vm_area_struct *vmap; 475 476 vmap = linux_cdev_handle_find(vm_obj->handle); 477 478 MPASS(vmap != NULL); 479 MPASS(vmap->vm_private_data == vm_obj->handle); 480 481 if (likely(vmap->vm_ops != NULL && offset < vmap->vm_len)) { 482 vm_paddr_t paddr = IDX_TO_OFF(vmap->vm_pfn) + offset; 483 vm_page_t page; 484 485 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 486 /* 487 * If the passed in result page is a fake 488 * page, update it with the new physical 489 * address. 490 */ 491 page = *mres; 492 vm_page_updatefake(page, paddr, vm_obj->memattr); 493 } else { 494 /* 495 * Replace the passed in "mres" page with our 496 * own fake page and free up the all of the 497 * original pages. 498 */ 499 VM_OBJECT_WUNLOCK(vm_obj); 500 page = vm_page_getfake(paddr, vm_obj->memattr); 501 VM_OBJECT_WLOCK(vm_obj); 502 503 vm_page_replace_checked(page, vm_obj, 504 (*mres)->pindex, *mres); 505 506 vm_page_lock(*mres); 507 vm_page_free(*mres); 508 vm_page_unlock(*mres); 509 *mres = page; 510 } 511 page->valid = VM_PAGE_BITS_ALL; 512 return (VM_PAGER_OK); 513 } 514 return (VM_PAGER_FAIL); 515 } 516 517 static int 518 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, 519 vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 520 { 521 struct vm_area_struct *vmap; 522 int err; 523 524 linux_set_current(curthread); 525 526 /* get VM area structure */ 527 vmap = linux_cdev_handle_find(vm_obj->handle); 528 MPASS(vmap != NULL); 529 MPASS(vmap->vm_private_data == vm_obj->handle); 530 531 VM_OBJECT_WUNLOCK(vm_obj); 532 533 down_write(&vmap->vm_mm->mmap_sem); 534 if (unlikely(vmap->vm_ops == NULL)) { 535 err = VM_FAULT_SIGBUS; 536 } else { 537 struct vm_fault vmf; 538 539 /* fill out VM fault structure */ 540 vmf.virtual_address = (void *)((uintptr_t)pidx << PAGE_SHIFT); 541 vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 542 vmf.pgoff = 0; 543 vmf.page = NULL; 544 vmf.vma = vmap; 545 546 vmap->vm_pfn_count = 0; 547 vmap->vm_pfn_pcount = &vmap->vm_pfn_count; 548 vmap->vm_obj = vm_obj; 549 550 err = vmap->vm_ops->fault(vmap, &vmf); 551 552 while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) { 553 kern_yield(PRI_USER); 554 err = vmap->vm_ops->fault(vmap, &vmf); 555 } 556 } 557 558 /* translate return code */ 559 switch (err) { 560 case VM_FAULT_OOM: 561 err = VM_PAGER_AGAIN; 562 break; 563 case VM_FAULT_SIGBUS: 564 err = VM_PAGER_BAD; 565 break; 566 case VM_FAULT_NOPAGE: 567 /* 568 * By contract the fault handler will return having 569 * busied all the pages itself. If pidx is already 570 * found in the object, it will simply xbusy the first 571 * page and return with vm_pfn_count set to 1. 572 */ 573 *first = vmap->vm_pfn_first; 574 *last = *first + vmap->vm_pfn_count - 1; 575 err = VM_PAGER_OK; 576 break; 577 default: 578 err = VM_PAGER_ERROR; 579 break; 580 } 581 up_write(&vmap->vm_mm->mmap_sem); 582 VM_OBJECT_WLOCK(vm_obj); 583 return (err); 584 } 585 586 static struct rwlock linux_vma_lock; 587 static TAILQ_HEAD(, vm_area_struct) linux_vma_head = 588 TAILQ_HEAD_INITIALIZER(linux_vma_head); 589 590 static void 591 linux_cdev_handle_free(struct vm_area_struct *vmap) 592 { 593 /* Drop reference on vm_file */ 594 if (vmap->vm_file != NULL) 595 fput(vmap->vm_file); 596 597 /* Drop reference on mm_struct */ 598 mmput(vmap->vm_mm); 599 600 kfree(vmap); 601 } 602 603 static void 604 linux_cdev_handle_remove(struct vm_area_struct *vmap) 605 { 606 rw_wlock(&linux_vma_lock); 607 TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry); 608 rw_wunlock(&linux_vma_lock); 609 } 610 611 static struct vm_area_struct * 612 linux_cdev_handle_find(void *handle) 613 { 614 struct vm_area_struct *vmap; 615 616 rw_rlock(&linux_vma_lock); 617 TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) { 618 if (vmap->vm_private_data == handle) 619 break; 620 } 621 rw_runlock(&linux_vma_lock); 622 return (vmap); 623 } 624 625 static int 626 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 627 vm_ooffset_t foff, struct ucred *cred, u_short *color) 628 { 629 630 MPASS(linux_cdev_handle_find(handle) != NULL); 631 *color = 0; 632 return (0); 633 } 634 635 static void 636 linux_cdev_pager_dtor(void *handle) 637 { 638 const struct vm_operations_struct *vm_ops; 639 struct vm_area_struct *vmap; 640 641 vmap = linux_cdev_handle_find(handle); 642 MPASS(vmap != NULL); 643 644 /* 645 * Remove handle before calling close operation to prevent 646 * other threads from reusing the handle pointer. 647 */ 648 linux_cdev_handle_remove(vmap); 649 650 down_write(&vmap->vm_mm->mmap_sem); 651 vm_ops = vmap->vm_ops; 652 if (likely(vm_ops != NULL)) 653 vm_ops->close(vmap); 654 up_write(&vmap->vm_mm->mmap_sem); 655 656 linux_cdev_handle_free(vmap); 657 } 658 659 static struct cdev_pager_ops linux_cdev_pager_ops[2] = { 660 { 661 /* OBJT_MGTDEVICE */ 662 .cdev_pg_populate = linux_cdev_pager_populate, 663 .cdev_pg_ctor = linux_cdev_pager_ctor, 664 .cdev_pg_dtor = linux_cdev_pager_dtor 665 }, 666 { 667 /* OBJT_DEVICE */ 668 .cdev_pg_fault = linux_cdev_pager_fault, 669 .cdev_pg_ctor = linux_cdev_pager_ctor, 670 .cdev_pg_dtor = linux_cdev_pager_dtor 671 }, 672 }; 673 674 #define OPW(fp,td,code) ({ \ 675 struct file *__fpop; \ 676 __typeof(code) __retval; \ 677 \ 678 __fpop = (td)->td_fpop; \ 679 (td)->td_fpop = (fp); \ 680 __retval = (code); \ 681 (td)->td_fpop = __fpop; \ 682 __retval; \ 683 }) 684 685 static int 686 linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file) 687 { 688 struct linux_cdev *ldev; 689 struct linux_file *filp; 690 int error; 691 692 ldev = dev->si_drv1; 693 694 filp = linux_file_alloc(); 695 filp->f_dentry = &filp->f_dentry_store; 696 filp->f_op = ldev->ops; 697 filp->f_mode = file->f_flag; 698 filp->f_flags = file->f_flag; 699 filp->f_vnode = file->f_vnode; 700 filp->_file = file; 701 702 linux_set_current(td); 703 704 if (filp->f_op->open) { 705 error = -filp->f_op->open(file->f_vnode, filp); 706 if (error) { 707 kfree(filp); 708 return (error); 709 } 710 } 711 712 /* hold on to the vnode - used for fstat() */ 713 vhold(filp->f_vnode); 714 715 /* release the file from devfs */ 716 finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); 717 return (ENXIO); 718 } 719 720 #define LINUX_IOCTL_MIN_PTR 0x10000UL 721 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 722 723 static inline int 724 linux_remap_address(void **uaddr, size_t len) 725 { 726 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 727 728 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 729 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 730 struct task_struct *pts = current; 731 if (pts == NULL) { 732 *uaddr = NULL; 733 return (1); 734 } 735 736 /* compute data offset */ 737 uaddr_val -= LINUX_IOCTL_MIN_PTR; 738 739 /* check that length is within bounds */ 740 if ((len > IOCPARM_MAX) || 741 (uaddr_val + len) > pts->bsd_ioctl_len) { 742 *uaddr = NULL; 743 return (1); 744 } 745 746 /* re-add kernel buffer address */ 747 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 748 749 /* update address location */ 750 *uaddr = (void *)uaddr_val; 751 return (1); 752 } 753 return (0); 754 } 755 756 int 757 linux_copyin(const void *uaddr, void *kaddr, size_t len) 758 { 759 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 760 if (uaddr == NULL) 761 return (-EFAULT); 762 memcpy(kaddr, uaddr, len); 763 return (0); 764 } 765 return (-copyin(uaddr, kaddr, len)); 766 } 767 768 int 769 linux_copyout(const void *kaddr, void *uaddr, size_t len) 770 { 771 if (linux_remap_address(&uaddr, len)) { 772 if (uaddr == NULL) 773 return (-EFAULT); 774 memcpy(uaddr, kaddr, len); 775 return (0); 776 } 777 return (-copyout(kaddr, uaddr, len)); 778 } 779 780 size_t 781 linux_clear_user(void *_uaddr, size_t _len) 782 { 783 uint8_t *uaddr = _uaddr; 784 size_t len = _len; 785 786 /* make sure uaddr is aligned before going into the fast loop */ 787 while (((uintptr_t)uaddr & 7) != 0 && len > 7) { 788 if (subyte(uaddr, 0)) 789 return (_len); 790 uaddr++; 791 len--; 792 } 793 794 /* zero 8 bytes at a time */ 795 while (len > 7) { 796 #ifdef __LP64__ 797 if (suword64(uaddr, 0)) 798 return (_len); 799 #else 800 if (suword32(uaddr, 0)) 801 return (_len); 802 if (suword32(uaddr + 4, 0)) 803 return (_len); 804 #endif 805 uaddr += 8; 806 len -= 8; 807 } 808 809 /* zero fill end, if any */ 810 while (len > 0) { 811 if (subyte(uaddr, 0)) 812 return (_len); 813 uaddr++; 814 len--; 815 } 816 return (0); 817 } 818 819 int 820 linux_access_ok(int rw, const void *uaddr, size_t len) 821 { 822 uintptr_t saddr; 823 uintptr_t eaddr; 824 825 /* get start and end address */ 826 saddr = (uintptr_t)uaddr; 827 eaddr = (uintptr_t)uaddr + len; 828 829 /* verify addresses are valid for userspace */ 830 return ((saddr == eaddr) || 831 (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); 832 } 833 834 /* 835 * This function should return either EINTR or ERESTART depending on 836 * the signal type sent to this thread: 837 */ 838 static int 839 linux_get_error(struct task_struct *task, int error) 840 { 841 /* check for signal type interrupt code */ 842 if (error == EINTR || error == ERESTARTSYS || error == ERESTART) { 843 error = -linux_schedule_get_interrupt_value(task); 844 if (error == 0) 845 error = EINTR; 846 } 847 return (error); 848 } 849 850 static int 851 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, 852 u_long cmd, caddr_t data, struct thread *td) 853 { 854 struct task_struct *task = current; 855 unsigned size; 856 int error; 857 858 size = IOCPARM_LEN(cmd); 859 /* refer to logic in sys_ioctl() */ 860 if (size > 0) { 861 /* 862 * Setup hint for linux_copyin() and linux_copyout(). 863 * 864 * Background: Linux code expects a user-space address 865 * while FreeBSD supplies a kernel-space address. 866 */ 867 task->bsd_ioctl_data = data; 868 task->bsd_ioctl_len = size; 869 data = (void *)LINUX_IOCTL_MIN_PTR; 870 } else { 871 /* fetch user-space pointer */ 872 data = *(void **)data; 873 } 874 #if defined(__amd64__) 875 if (td->td_proc->p_elf_machine == EM_386) { 876 /* try the compat IOCTL handler first */ 877 if (filp->f_op->compat_ioctl != NULL) 878 error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data)); 879 else 880 error = ENOTTY; 881 882 /* fallback to the regular IOCTL handler, if any */ 883 if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL) 884 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 885 } else 886 #endif 887 if (filp->f_op->unlocked_ioctl != NULL) 888 error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); 889 else 890 error = ENOTTY; 891 if (size > 0) { 892 task->bsd_ioctl_data = NULL; 893 task->bsd_ioctl_len = 0; 894 } 895 896 if (error == EWOULDBLOCK) { 897 /* update kqfilter status, if any */ 898 linux_file_kqfilter_poll(filp, 899 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 900 } else { 901 error = linux_get_error(task, error); 902 } 903 return (error); 904 } 905 906 #define LINUX_POLL_TABLE_NORMAL ((poll_table *)1) 907 908 /* 909 * This function atomically updates the poll wakeup state and returns 910 * the previous state at the time of update. 911 */ 912 static uint8_t 913 linux_poll_wakeup_state(atomic_t *v, const uint8_t *pstate) 914 { 915 int c, old; 916 917 c = v->counter; 918 919 while ((old = atomic_cmpxchg(v, c, pstate[c])) != c) 920 c = old; 921 922 return (c); 923 } 924 925 926 static int 927 linux_poll_wakeup_callback(wait_queue_t *wq, unsigned int wq_state, int flags, void *key) 928 { 929 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 930 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 931 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 932 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_READY, 933 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_READY, /* NOP */ 934 }; 935 struct linux_file *filp = container_of(wq, struct linux_file, f_wait_queue.wq); 936 937 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 938 case LINUX_FWQ_STATE_QUEUED: 939 linux_poll_wakeup(filp); 940 return (1); 941 default: 942 return (0); 943 } 944 } 945 946 void 947 linux_poll_wait(struct linux_file *filp, wait_queue_head_t *wqh, poll_table *p) 948 { 949 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 950 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_NOT_READY, 951 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */ 952 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_QUEUED, /* NOP */ 953 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_QUEUED, 954 }; 955 956 /* check if we are called inside the select system call */ 957 if (p == LINUX_POLL_TABLE_NORMAL) 958 selrecord(curthread, &filp->f_selinfo); 959 960 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 961 case LINUX_FWQ_STATE_INIT: 962 /* NOTE: file handles can only belong to one wait-queue */ 963 filp->f_wait_queue.wqh = wqh; 964 filp->f_wait_queue.wq.func = &linux_poll_wakeup_callback; 965 add_wait_queue(wqh, &filp->f_wait_queue.wq); 966 atomic_set(&filp->f_wait_queue.state, LINUX_FWQ_STATE_QUEUED); 967 break; 968 default: 969 break; 970 } 971 } 972 973 static void 974 linux_poll_wait_dequeue(struct linux_file *filp) 975 { 976 static const uint8_t state[LINUX_FWQ_STATE_MAX] = { 977 [LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */ 978 [LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_INIT, 979 [LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_INIT, 980 [LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_INIT, 981 }; 982 983 seldrain(&filp->f_selinfo); 984 985 switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) { 986 case LINUX_FWQ_STATE_NOT_READY: 987 case LINUX_FWQ_STATE_QUEUED: 988 case LINUX_FWQ_STATE_READY: 989 remove_wait_queue(filp->f_wait_queue.wqh, &filp->f_wait_queue.wq); 990 break; 991 default: 992 break; 993 } 994 } 995 996 void 997 linux_poll_wakeup(struct linux_file *filp) 998 { 999 /* this function should be NULL-safe */ 1000 if (filp == NULL) 1001 return; 1002 1003 selwakeup(&filp->f_selinfo); 1004 1005 spin_lock(&filp->f_kqlock); 1006 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ | 1007 LINUX_KQ_FLAG_NEED_WRITE; 1008 1009 /* make sure the "knote" gets woken up */ 1010 KNOTE_LOCKED(&filp->f_selinfo.si_note, 1); 1011 spin_unlock(&filp->f_kqlock); 1012 } 1013 1014 static void 1015 linux_file_kqfilter_detach(struct knote *kn) 1016 { 1017 struct linux_file *filp = kn->kn_hook; 1018 1019 spin_lock(&filp->f_kqlock); 1020 knlist_remove(&filp->f_selinfo.si_note, kn, 1); 1021 spin_unlock(&filp->f_kqlock); 1022 } 1023 1024 static int 1025 linux_file_kqfilter_read_event(struct knote *kn, long hint) 1026 { 1027 struct linux_file *filp = kn->kn_hook; 1028 1029 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1030 1031 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_READ) ? 1 : 0); 1032 } 1033 1034 static int 1035 linux_file_kqfilter_write_event(struct knote *kn, long hint) 1036 { 1037 struct linux_file *filp = kn->kn_hook; 1038 1039 mtx_assert(&filp->f_kqlock.m, MA_OWNED); 1040 1041 return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_WRITE) ? 1 : 0); 1042 } 1043 1044 static struct filterops linux_dev_kqfiltops_read = { 1045 .f_isfd = 1, 1046 .f_detach = linux_file_kqfilter_detach, 1047 .f_event = linux_file_kqfilter_read_event, 1048 }; 1049 1050 static struct filterops linux_dev_kqfiltops_write = { 1051 .f_isfd = 1, 1052 .f_detach = linux_file_kqfilter_detach, 1053 .f_event = linux_file_kqfilter_write_event, 1054 }; 1055 1056 static void 1057 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags) 1058 { 1059 int temp; 1060 1061 if (filp->f_kqflags & kqflags) { 1062 struct thread *td = curthread; 1063 1064 /* get the latest polling state */ 1065 temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL)); 1066 1067 spin_lock(&filp->f_kqlock); 1068 /* clear kqflags */ 1069 filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ | 1070 LINUX_KQ_FLAG_NEED_WRITE); 1071 /* update kqflags */ 1072 if (temp & (POLLIN | POLLOUT)) { 1073 if (temp & POLLIN) 1074 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ; 1075 if (temp & POLLOUT) 1076 filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE; 1077 1078 /* make sure the "knote" gets woken up */ 1079 KNOTE_LOCKED(&filp->f_selinfo.si_note, 0); 1080 } 1081 spin_unlock(&filp->f_kqlock); 1082 } 1083 } 1084 1085 static int 1086 linux_file_kqfilter(struct file *file, struct knote *kn) 1087 { 1088 struct linux_file *filp; 1089 struct thread *td; 1090 int error; 1091 1092 td = curthread; 1093 filp = (struct linux_file *)file->f_data; 1094 filp->f_flags = file->f_flag; 1095 if (filp->f_op->poll == NULL) 1096 return (EINVAL); 1097 1098 spin_lock(&filp->f_kqlock); 1099 switch (kn->kn_filter) { 1100 case EVFILT_READ: 1101 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_READ; 1102 kn->kn_fop = &linux_dev_kqfiltops_read; 1103 kn->kn_hook = filp; 1104 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1105 error = 0; 1106 break; 1107 case EVFILT_WRITE: 1108 filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE; 1109 kn->kn_fop = &linux_dev_kqfiltops_write; 1110 kn->kn_hook = filp; 1111 knlist_add(&filp->f_selinfo.si_note, kn, 1); 1112 error = 0; 1113 break; 1114 default: 1115 error = EINVAL; 1116 break; 1117 } 1118 spin_unlock(&filp->f_kqlock); 1119 1120 if (error == 0) { 1121 linux_set_current(td); 1122 1123 /* update kqfilter status, if any */ 1124 linux_file_kqfilter_poll(filp, 1125 LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); 1126 } 1127 return (error); 1128 } 1129 1130 static int 1131 linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, 1132 vm_size_t size, struct vm_object **object, int nprot, 1133 struct thread *td) 1134 { 1135 struct task_struct *task; 1136 struct vm_area_struct *vmap; 1137 struct mm_struct *mm; 1138 struct linux_file *filp; 1139 vm_memattr_t attr; 1140 int error; 1141 1142 filp = (struct linux_file *)fp->f_data; 1143 filp->f_flags = fp->f_flag; 1144 1145 if (filp->f_op->mmap == NULL) 1146 return (EOPNOTSUPP); 1147 1148 linux_set_current(td); 1149 1150 /* 1151 * The same VM object might be shared by multiple processes 1152 * and the mm_struct is usually freed when a process exits. 1153 * 1154 * The atomic reference below makes sure the mm_struct is 1155 * available as long as the vmap is in the linux_vma_head. 1156 */ 1157 task = current; 1158 mm = task->mm; 1159 if (atomic_inc_not_zero(&mm->mm_users) == 0) 1160 return (EINVAL); 1161 1162 vmap = kzalloc(sizeof(*vmap), GFP_KERNEL); 1163 vmap->vm_start = 0; 1164 vmap->vm_end = size; 1165 vmap->vm_pgoff = *offset / PAGE_SIZE; 1166 vmap->vm_pfn = 0; 1167 vmap->vm_flags = vmap->vm_page_prot = (nprot & VM_PROT_ALL); 1168 vmap->vm_ops = NULL; 1169 vmap->vm_file = get_file(filp); 1170 vmap->vm_mm = mm; 1171 1172 if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) { 1173 error = linux_get_error(task, EINTR); 1174 } else { 1175 error = -OPW(fp, td, filp->f_op->mmap(filp, vmap)); 1176 error = linux_get_error(task, error); 1177 up_write(&vmap->vm_mm->mmap_sem); 1178 } 1179 1180 if (error != 0) { 1181 linux_cdev_handle_free(vmap); 1182 return (error); 1183 } 1184 1185 attr = pgprot2cachemode(vmap->vm_page_prot); 1186 1187 if (vmap->vm_ops != NULL) { 1188 struct vm_area_struct *ptr; 1189 void *vm_private_data; 1190 bool vm_no_fault; 1191 1192 if (vmap->vm_ops->open == NULL || 1193 vmap->vm_ops->close == NULL || 1194 vmap->vm_private_data == NULL) { 1195 /* free allocated VM area struct */ 1196 linux_cdev_handle_free(vmap); 1197 return (EINVAL); 1198 } 1199 1200 vm_private_data = vmap->vm_private_data; 1201 1202 rw_wlock(&linux_vma_lock); 1203 TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) { 1204 if (ptr->vm_private_data == vm_private_data) 1205 break; 1206 } 1207 /* check if there is an existing VM area struct */ 1208 if (ptr != NULL) { 1209 /* check if the VM area structure is invalid */ 1210 if (ptr->vm_ops == NULL || 1211 ptr->vm_ops->open == NULL || 1212 ptr->vm_ops->close == NULL) { 1213 error = ESTALE; 1214 vm_no_fault = 1; 1215 } else { 1216 error = EEXIST; 1217 vm_no_fault = (ptr->vm_ops->fault == NULL); 1218 } 1219 } else { 1220 /* insert VM area structure into list */ 1221 TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry); 1222 error = 0; 1223 vm_no_fault = (vmap->vm_ops->fault == NULL); 1224 } 1225 rw_wunlock(&linux_vma_lock); 1226 1227 if (error != 0) { 1228 /* free allocated VM area struct */ 1229 linux_cdev_handle_free(vmap); 1230 /* check for stale VM area struct */ 1231 if (error != EEXIST) 1232 return (error); 1233 } 1234 1235 /* check if there is no fault handler */ 1236 if (vm_no_fault) { 1237 *object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE, 1238 &linux_cdev_pager_ops[1], size, nprot, *offset, 1239 td->td_ucred); 1240 } else { 1241 *object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE, 1242 &linux_cdev_pager_ops[0], size, nprot, *offset, 1243 td->td_ucred); 1244 } 1245 1246 /* check if allocating the VM object failed */ 1247 if (*object == NULL) { 1248 if (error == 0) { 1249 /* remove VM area struct from list */ 1250 linux_cdev_handle_remove(vmap); 1251 /* free allocated VM area struct */ 1252 linux_cdev_handle_free(vmap); 1253 } 1254 return (EINVAL); 1255 } 1256 } else { 1257 struct sglist *sg; 1258 1259 sg = sglist_alloc(1, M_WAITOK); 1260 sglist_append_phys(sg, 1261 (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len); 1262 1263 *object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len, 1264 nprot, 0, td->td_ucred); 1265 1266 linux_cdev_handle_free(vmap); 1267 1268 if (*object == NULL) { 1269 sglist_free(sg); 1270 return (EINVAL); 1271 } 1272 } 1273 1274 if (attr != VM_MEMATTR_DEFAULT) { 1275 VM_OBJECT_WLOCK(*object); 1276 vm_object_set_memattr(*object, attr); 1277 VM_OBJECT_WUNLOCK(*object); 1278 } 1279 *offset = 0; 1280 return (0); 1281 } 1282 1283 struct cdevsw linuxcdevsw = { 1284 .d_version = D_VERSION, 1285 .d_fdopen = linux_dev_fdopen, 1286 .d_name = "lkpidev", 1287 }; 1288 1289 static int 1290 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 1291 int flags, struct thread *td) 1292 { 1293 struct linux_file *filp; 1294 ssize_t bytes; 1295 int error; 1296 1297 error = 0; 1298 filp = (struct linux_file *)file->f_data; 1299 filp->f_flags = file->f_flag; 1300 /* XXX no support for I/O vectors currently */ 1301 if (uio->uio_iovcnt != 1) 1302 return (EOPNOTSUPP); 1303 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1304 return (EINVAL); 1305 linux_set_current(td); 1306 if (filp->f_op->read) { 1307 bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base, 1308 uio->uio_iov->iov_len, &uio->uio_offset)); 1309 if (bytes >= 0) { 1310 uio->uio_iov->iov_base = 1311 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1312 uio->uio_iov->iov_len -= bytes; 1313 uio->uio_resid -= bytes; 1314 } else { 1315 error = linux_get_error(current, -bytes); 1316 } 1317 } else 1318 error = ENXIO; 1319 1320 /* update kqfilter status, if any */ 1321 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ); 1322 1323 return (error); 1324 } 1325 1326 static int 1327 linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred, 1328 int flags, struct thread *td) 1329 { 1330 struct linux_file *filp; 1331 ssize_t bytes; 1332 int error; 1333 1334 error = 0; 1335 filp = (struct linux_file *)file->f_data; 1336 filp->f_flags = file->f_flag; 1337 /* XXX no support for I/O vectors currently */ 1338 if (uio->uio_iovcnt != 1) 1339 return (EOPNOTSUPP); 1340 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1341 return (EINVAL); 1342 linux_set_current(td); 1343 if (filp->f_op->write) { 1344 bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base, 1345 uio->uio_iov->iov_len, &uio->uio_offset)); 1346 if (bytes >= 0) { 1347 uio->uio_iov->iov_base = 1348 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 1349 uio->uio_iov->iov_len -= bytes; 1350 uio->uio_resid -= bytes; 1351 } else { 1352 error = linux_get_error(current, -bytes); 1353 } 1354 } else 1355 error = ENXIO; 1356 1357 /* update kqfilter status, if any */ 1358 linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE); 1359 1360 return (error); 1361 } 1362 1363 static int 1364 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 1365 struct thread *td) 1366 { 1367 struct linux_file *filp; 1368 int revents; 1369 1370 filp = (struct linux_file *)file->f_data; 1371 filp->f_flags = file->f_flag; 1372 linux_set_current(td); 1373 if (filp->f_op->poll != NULL) 1374 revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events; 1375 else 1376 revents = 0; 1377 1378 return (revents); 1379 } 1380 1381 static int 1382 linux_file_close(struct file *file, struct thread *td) 1383 { 1384 struct linux_file *filp; 1385 int error; 1386 1387 filp = (struct linux_file *)file->f_data; 1388 1389 KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp))); 1390 1391 filp->f_flags = file->f_flag; 1392 linux_set_current(td); 1393 linux_poll_wait_dequeue(filp); 1394 error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp)); 1395 funsetown(&filp->f_sigio); 1396 if (filp->f_vnode != NULL) 1397 vdrop(filp->f_vnode); 1398 kfree(filp); 1399 1400 return (error); 1401 } 1402 1403 static int 1404 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 1405 struct thread *td) 1406 { 1407 struct linux_file *filp; 1408 int error; 1409 1410 filp = (struct linux_file *)fp->f_data; 1411 filp->f_flags = fp->f_flag; 1412 error = 0; 1413 1414 linux_set_current(td); 1415 switch (cmd) { 1416 case FIONBIO: 1417 break; 1418 case FIOASYNC: 1419 if (filp->f_op->fasync == NULL) 1420 break; 1421 error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC)); 1422 break; 1423 case FIOSETOWN: 1424 error = fsetown(*(int *)data, &filp->f_sigio); 1425 if (error == 0) { 1426 if (filp->f_op->fasync == NULL) 1427 break; 1428 error = -OPW(fp, td, filp->f_op->fasync(0, filp, 1429 fp->f_flag & FASYNC)); 1430 } 1431 break; 1432 case FIOGETOWN: 1433 *(int *)data = fgetown(&filp->f_sigio); 1434 break; 1435 default: 1436 error = linux_file_ioctl_sub(fp, filp, cmd, data, td); 1437 break; 1438 } 1439 return (error); 1440 } 1441 1442 static int 1443 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1444 vm_prot_t *maxprotp, int *flagsp, struct file *fp, 1445 vm_ooffset_t *foff, vm_object_t *objp) 1446 { 1447 /* 1448 * Character devices do not provide private mappings 1449 * of any kind: 1450 */ 1451 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1452 (prot & VM_PROT_WRITE) != 0) 1453 return (EACCES); 1454 if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0) 1455 return (EINVAL); 1456 1457 return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td)); 1458 } 1459 1460 static int 1461 linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1462 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1463 struct thread *td) 1464 { 1465 struct linux_file *filp; 1466 struct mount *mp; 1467 struct vnode *vp; 1468 vm_object_t object; 1469 vm_prot_t maxprot; 1470 int error; 1471 1472 filp = (struct linux_file *)fp->f_data; 1473 1474 vp = filp->f_vnode; 1475 if (vp == NULL) 1476 return (EOPNOTSUPP); 1477 1478 /* 1479 * Ensure that file and memory protections are 1480 * compatible. 1481 */ 1482 mp = vp->v_mount; 1483 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1484 maxprot = VM_PROT_NONE; 1485 if ((prot & VM_PROT_EXECUTE) != 0) 1486 return (EACCES); 1487 } else 1488 maxprot = VM_PROT_EXECUTE; 1489 if ((fp->f_flag & FREAD) != 0) 1490 maxprot |= VM_PROT_READ; 1491 else if ((prot & VM_PROT_READ) != 0) 1492 return (EACCES); 1493 1494 /* 1495 * If we are sharing potential changes via MAP_SHARED and we 1496 * are trying to get write permission although we opened it 1497 * without asking for it, bail out. 1498 * 1499 * Note that most character devices always share mappings. 1500 * 1501 * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE 1502 * requests rather than doing it here. 1503 */ 1504 if ((flags & MAP_SHARED) != 0) { 1505 if ((fp->f_flag & FWRITE) != 0) 1506 maxprot |= VM_PROT_WRITE; 1507 else if ((prot & VM_PROT_WRITE) != 0) 1508 return (EACCES); 1509 } 1510 maxprot &= cap_maxprot; 1511 1512 error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff, 1513 &object); 1514 if (error != 0) 1515 return (error); 1516 1517 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1518 foff, FALSE, td); 1519 if (error != 0) 1520 vm_object_deallocate(object); 1521 return (error); 1522 } 1523 1524 static int 1525 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 1526 struct thread *td) 1527 { 1528 struct linux_file *filp; 1529 struct vnode *vp; 1530 int error; 1531 1532 filp = (struct linux_file *)fp->f_data; 1533 if (filp->f_vnode == NULL) 1534 return (EOPNOTSUPP); 1535 1536 vp = filp->f_vnode; 1537 1538 vn_lock(vp, LK_SHARED | LK_RETRY); 1539 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 1540 VOP_UNLOCK(vp, 0); 1541 1542 return (error); 1543 } 1544 1545 static int 1546 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 1547 struct filedesc *fdp) 1548 { 1549 1550 return (0); 1551 } 1552 1553 unsigned int 1554 linux_iminor(struct inode *inode) 1555 { 1556 struct linux_cdev *ldev; 1557 1558 if (inode == NULL || inode->v_rdev == NULL || 1559 inode->v_rdev->si_devsw != &linuxcdevsw) 1560 return (-1U); 1561 ldev = inode->v_rdev->si_drv1; 1562 if (ldev == NULL) 1563 return (-1U); 1564 1565 return (minor(ldev->dev)); 1566 } 1567 1568 struct fileops linuxfileops = { 1569 .fo_read = linux_file_read, 1570 .fo_write = linux_file_write, 1571 .fo_truncate = invfo_truncate, 1572 .fo_kqfilter = linux_file_kqfilter, 1573 .fo_stat = linux_file_stat, 1574 .fo_fill_kinfo = linux_file_fill_kinfo, 1575 .fo_poll = linux_file_poll, 1576 .fo_close = linux_file_close, 1577 .fo_ioctl = linux_file_ioctl, 1578 .fo_mmap = linux_file_mmap, 1579 .fo_chmod = invfo_chmod, 1580 .fo_chown = invfo_chown, 1581 .fo_sendfile = invfo_sendfile, 1582 .fo_flags = DFLAG_PASSABLE, 1583 }; 1584 1585 /* 1586 * Hash of vmmap addresses. This is infrequently accessed and does not 1587 * need to be particularly large. This is done because we must store the 1588 * caller's idea of the map size to properly unmap. 1589 */ 1590 struct vmmap { 1591 LIST_ENTRY(vmmap) vm_next; 1592 void *vm_addr; 1593 unsigned long vm_size; 1594 }; 1595 1596 struct vmmaphd { 1597 struct vmmap *lh_first; 1598 }; 1599 #define VMMAP_HASH_SIZE 64 1600 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 1601 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 1602 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 1603 static struct mtx vmmaplock; 1604 1605 static void 1606 vmmap_add(void *addr, unsigned long size) 1607 { 1608 struct vmmap *vmmap; 1609 1610 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 1611 mtx_lock(&vmmaplock); 1612 vmmap->vm_size = size; 1613 vmmap->vm_addr = addr; 1614 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 1615 mtx_unlock(&vmmaplock); 1616 } 1617 1618 static struct vmmap * 1619 vmmap_remove(void *addr) 1620 { 1621 struct vmmap *vmmap; 1622 1623 mtx_lock(&vmmaplock); 1624 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 1625 if (vmmap->vm_addr == addr) 1626 break; 1627 if (vmmap) 1628 LIST_REMOVE(vmmap, vm_next); 1629 mtx_unlock(&vmmaplock); 1630 1631 return (vmmap); 1632 } 1633 1634 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1635 void * 1636 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 1637 { 1638 void *addr; 1639 1640 addr = pmap_mapdev_attr(phys_addr, size, attr); 1641 if (addr == NULL) 1642 return (NULL); 1643 vmmap_add(addr, size); 1644 1645 return (addr); 1646 } 1647 #endif 1648 1649 void 1650 iounmap(void *addr) 1651 { 1652 struct vmmap *vmmap; 1653 1654 vmmap = vmmap_remove(addr); 1655 if (vmmap == NULL) 1656 return; 1657 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) 1658 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 1659 #endif 1660 kfree(vmmap); 1661 } 1662 1663 1664 void * 1665 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 1666 { 1667 vm_offset_t off; 1668 size_t size; 1669 1670 size = count * PAGE_SIZE; 1671 off = kva_alloc(size); 1672 if (off == 0) 1673 return (NULL); 1674 vmmap_add((void *)off, size); 1675 pmap_qenter(off, pages, count); 1676 1677 return ((void *)off); 1678 } 1679 1680 void 1681 vunmap(void *addr) 1682 { 1683 struct vmmap *vmmap; 1684 1685 vmmap = vmmap_remove(addr); 1686 if (vmmap == NULL) 1687 return; 1688 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 1689 kva_free((vm_offset_t)addr, vmmap->vm_size); 1690 kfree(vmmap); 1691 } 1692 1693 char * 1694 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1695 { 1696 unsigned int len; 1697 char *p; 1698 va_list aq; 1699 1700 va_copy(aq, ap); 1701 len = vsnprintf(NULL, 0, fmt, aq); 1702 va_end(aq); 1703 1704 p = kmalloc(len + 1, gfp); 1705 if (p != NULL) 1706 vsnprintf(p, len + 1, fmt, ap); 1707 1708 return (p); 1709 } 1710 1711 char * 1712 kasprintf(gfp_t gfp, const char *fmt, ...) 1713 { 1714 va_list ap; 1715 char *p; 1716 1717 va_start(ap, fmt); 1718 p = kvasprintf(gfp, fmt, ap); 1719 va_end(ap); 1720 1721 return (p); 1722 } 1723 1724 static void 1725 linux_timer_callback_wrapper(void *context) 1726 { 1727 struct timer_list *timer; 1728 1729 linux_set_current(curthread); 1730 1731 timer = context; 1732 timer->function(timer->data); 1733 } 1734 1735 void 1736 mod_timer(struct timer_list *timer, int expires) 1737 { 1738 1739 timer->expires = expires; 1740 callout_reset(&timer->callout, 1741 linux_timer_jiffies_until(expires), 1742 &linux_timer_callback_wrapper, timer); 1743 } 1744 1745 void 1746 add_timer(struct timer_list *timer) 1747 { 1748 1749 callout_reset(&timer->callout, 1750 linux_timer_jiffies_until(timer->expires), 1751 &linux_timer_callback_wrapper, timer); 1752 } 1753 1754 void 1755 add_timer_on(struct timer_list *timer, int cpu) 1756 { 1757 1758 callout_reset_on(&timer->callout, 1759 linux_timer_jiffies_until(timer->expires), 1760 &linux_timer_callback_wrapper, timer, cpu); 1761 } 1762 1763 static void 1764 linux_timer_init(void *arg) 1765 { 1766 1767 /* 1768 * Compute an internal HZ value which can divide 2**32 to 1769 * avoid timer rounding problems when the tick value wraps 1770 * around 2**32: 1771 */ 1772 linux_timer_hz_mask = 1; 1773 while (linux_timer_hz_mask < (unsigned long)hz) 1774 linux_timer_hz_mask *= 2; 1775 linux_timer_hz_mask--; 1776 } 1777 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1778 1779 void 1780 linux_complete_common(struct completion *c, int all) 1781 { 1782 int wakeup_swapper; 1783 1784 sleepq_lock(c); 1785 if (all) { 1786 c->done = UINT_MAX; 1787 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1788 } else { 1789 if (c->done != UINT_MAX) 1790 c->done++; 1791 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1792 } 1793 sleepq_release(c); 1794 if (wakeup_swapper) 1795 kick_proc0(); 1796 } 1797 1798 /* 1799 * Indefinite wait for done != 0 with or without signals. 1800 */ 1801 int 1802 linux_wait_for_common(struct completion *c, int flags) 1803 { 1804 struct task_struct *task; 1805 int error; 1806 1807 if (SCHEDULER_STOPPED()) 1808 return (0); 1809 1810 task = current; 1811 1812 if (flags != 0) 1813 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1814 else 1815 flags = SLEEPQ_SLEEP; 1816 error = 0; 1817 for (;;) { 1818 sleepq_lock(c); 1819 if (c->done) 1820 break; 1821 sleepq_add(c, NULL, "completion", flags, 0); 1822 if (flags & SLEEPQ_INTERRUPTIBLE) { 1823 DROP_GIANT(); 1824 error = -sleepq_wait_sig(c, 0); 1825 PICKUP_GIANT(); 1826 if (error != 0) { 1827 linux_schedule_save_interrupt_value(task, error); 1828 error = -ERESTARTSYS; 1829 goto intr; 1830 } 1831 } else { 1832 DROP_GIANT(); 1833 sleepq_wait(c, 0); 1834 PICKUP_GIANT(); 1835 } 1836 } 1837 if (c->done != UINT_MAX) 1838 c->done--; 1839 sleepq_release(c); 1840 1841 intr: 1842 return (error); 1843 } 1844 1845 /* 1846 * Time limited wait for done != 0 with or without signals. 1847 */ 1848 int 1849 linux_wait_for_timeout_common(struct completion *c, int timeout, int flags) 1850 { 1851 struct task_struct *task; 1852 int end = jiffies + timeout; 1853 int error; 1854 1855 if (SCHEDULER_STOPPED()) 1856 return (0); 1857 1858 task = current; 1859 1860 if (flags != 0) 1861 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1862 else 1863 flags = SLEEPQ_SLEEP; 1864 1865 for (;;) { 1866 sleepq_lock(c); 1867 if (c->done) 1868 break; 1869 sleepq_add(c, NULL, "completion", flags, 0); 1870 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1871 1872 DROP_GIANT(); 1873 if (flags & SLEEPQ_INTERRUPTIBLE) 1874 error = -sleepq_timedwait_sig(c, 0); 1875 else 1876 error = -sleepq_timedwait(c, 0); 1877 PICKUP_GIANT(); 1878 1879 if (error != 0) { 1880 /* check for timeout */ 1881 if (error == -EWOULDBLOCK) { 1882 error = 0; /* timeout */ 1883 } else { 1884 /* signal happened */ 1885 linux_schedule_save_interrupt_value(task, error); 1886 error = -ERESTARTSYS; 1887 } 1888 goto done; 1889 } 1890 } 1891 if (c->done != UINT_MAX) 1892 c->done--; 1893 sleepq_release(c); 1894 1895 /* return how many jiffies are left */ 1896 error = linux_timer_jiffies_until(end); 1897 done: 1898 return (error); 1899 } 1900 1901 int 1902 linux_try_wait_for_completion(struct completion *c) 1903 { 1904 int isdone; 1905 1906 sleepq_lock(c); 1907 isdone = (c->done != 0); 1908 if (c->done != 0 && c->done != UINT_MAX) 1909 c->done--; 1910 sleepq_release(c); 1911 return (isdone); 1912 } 1913 1914 int 1915 linux_completion_done(struct completion *c) 1916 { 1917 int isdone; 1918 1919 sleepq_lock(c); 1920 isdone = (c->done != 0); 1921 sleepq_release(c); 1922 return (isdone); 1923 } 1924 1925 static void 1926 linux_cdev_release(struct kobject *kobj) 1927 { 1928 struct linux_cdev *cdev; 1929 struct kobject *parent; 1930 1931 cdev = container_of(kobj, struct linux_cdev, kobj); 1932 parent = kobj->parent; 1933 if (cdev->cdev) 1934 destroy_dev(cdev->cdev); 1935 kfree(cdev); 1936 kobject_put(parent); 1937 } 1938 1939 static void 1940 linux_cdev_static_release(struct kobject *kobj) 1941 { 1942 struct linux_cdev *cdev; 1943 struct kobject *parent; 1944 1945 cdev = container_of(kobj, struct linux_cdev, kobj); 1946 parent = kobj->parent; 1947 if (cdev->cdev) 1948 destroy_dev(cdev->cdev); 1949 kobject_put(parent); 1950 } 1951 1952 const struct kobj_type linux_cdev_ktype = { 1953 .release = linux_cdev_release, 1954 }; 1955 1956 const struct kobj_type linux_cdev_static_ktype = { 1957 .release = linux_cdev_static_release, 1958 }; 1959 1960 static void 1961 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1962 { 1963 struct notifier_block *nb; 1964 1965 nb = arg; 1966 if (linkstate == LINK_STATE_UP) 1967 nb->notifier_call(nb, NETDEV_UP, ifp); 1968 else 1969 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1970 } 1971 1972 static void 1973 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1974 { 1975 struct notifier_block *nb; 1976 1977 nb = arg; 1978 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1979 } 1980 1981 static void 1982 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1983 { 1984 struct notifier_block *nb; 1985 1986 nb = arg; 1987 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1988 } 1989 1990 static void 1991 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1992 { 1993 struct notifier_block *nb; 1994 1995 nb = arg; 1996 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1997 } 1998 1999 static void 2000 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 2001 { 2002 struct notifier_block *nb; 2003 2004 nb = arg; 2005 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 2006 } 2007 2008 int 2009 register_netdevice_notifier(struct notifier_block *nb) 2010 { 2011 2012 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 2013 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 2014 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 2015 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 2016 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 2017 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 2018 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 2019 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 2020 2021 return (0); 2022 } 2023 2024 int 2025 register_inetaddr_notifier(struct notifier_block *nb) 2026 { 2027 2028 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 2029 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 2030 return (0); 2031 } 2032 2033 int 2034 unregister_netdevice_notifier(struct notifier_block *nb) 2035 { 2036 2037 EVENTHANDLER_DEREGISTER(ifnet_link_event, 2038 nb->tags[NETDEV_UP]); 2039 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 2040 nb->tags[NETDEV_REGISTER]); 2041 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2042 nb->tags[NETDEV_UNREGISTER]); 2043 EVENTHANDLER_DEREGISTER(iflladdr_event, 2044 nb->tags[NETDEV_CHANGEADDR]); 2045 2046 return (0); 2047 } 2048 2049 int 2050 unregister_inetaddr_notifier(struct notifier_block *nb) 2051 { 2052 2053 EVENTHANDLER_DEREGISTER(ifaddr_event, 2054 nb->tags[NETDEV_CHANGEIFADDR]); 2055 2056 return (0); 2057 } 2058 2059 struct list_sort_thunk { 2060 int (*cmp)(void *, struct list_head *, struct list_head *); 2061 void *priv; 2062 }; 2063 2064 static inline int 2065 linux_le_cmp(void *priv, const void *d1, const void *d2) 2066 { 2067 struct list_head *le1, *le2; 2068 struct list_sort_thunk *thunk; 2069 2070 thunk = priv; 2071 le1 = *(__DECONST(struct list_head **, d1)); 2072 le2 = *(__DECONST(struct list_head **, d2)); 2073 return ((thunk->cmp)(thunk->priv, le1, le2)); 2074 } 2075 2076 void 2077 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 2078 struct list_head *a, struct list_head *b)) 2079 { 2080 struct list_sort_thunk thunk; 2081 struct list_head **ar, *le; 2082 size_t count, i; 2083 2084 count = 0; 2085 list_for_each(le, head) 2086 count++; 2087 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 2088 i = 0; 2089 list_for_each(le, head) 2090 ar[i++] = le; 2091 thunk.cmp = cmp; 2092 thunk.priv = priv; 2093 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 2094 INIT_LIST_HEAD(head); 2095 for (i = 0; i < count; i++) 2096 list_add_tail(ar[i], head); 2097 free(ar, M_KMALLOC); 2098 } 2099 2100 void 2101 linux_irq_handler(void *ent) 2102 { 2103 struct irq_ent *irqe; 2104 2105 linux_set_current(curthread); 2106 2107 irqe = ent; 2108 irqe->handler(irqe->irq, irqe->arg); 2109 } 2110 2111 #if defined(__i386__) || defined(__amd64__) 2112 int 2113 linux_wbinvd_on_all_cpus(void) 2114 { 2115 2116 pmap_invalidate_cache(); 2117 return (0); 2118 } 2119 #endif 2120 2121 int 2122 linux_on_each_cpu(void callback(void *), void *data) 2123 { 2124 2125 smp_rendezvous(smp_no_rendezvous_barrier, callback, 2126 smp_no_rendezvous_barrier, data); 2127 return (0); 2128 } 2129 2130 int 2131 linux_in_atomic(void) 2132 { 2133 2134 return ((curthread->td_pflags & TDP_NOFAULTING) != 0); 2135 } 2136 2137 struct linux_cdev * 2138 linux_find_cdev(const char *name, unsigned major, unsigned minor) 2139 { 2140 dev_t dev = MKDEV(major, minor); 2141 struct cdev *cdev; 2142 2143 dev_lock(); 2144 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 2145 struct linux_cdev *ldev = cdev->si_drv1; 2146 if (ldev->dev == dev && 2147 strcmp(kobject_name(&ldev->kobj), name) == 0) { 2148 break; 2149 } 2150 } 2151 dev_unlock(); 2152 2153 return (cdev != NULL ? cdev->si_drv1 : NULL); 2154 } 2155 2156 int 2157 __register_chrdev(unsigned int major, unsigned int baseminor, 2158 unsigned int count, const char *name, 2159 const struct file_operations *fops) 2160 { 2161 struct linux_cdev *cdev; 2162 int ret = 0; 2163 int i; 2164 2165 for (i = baseminor; i < baseminor + count; i++) { 2166 cdev = cdev_alloc(); 2167 cdev_init(cdev, fops); 2168 kobject_set_name(&cdev->kobj, name); 2169 2170 ret = cdev_add(cdev, makedev(major, i), 1); 2171 if (ret != 0) 2172 break; 2173 } 2174 return (ret); 2175 } 2176 2177 int 2178 __register_chrdev_p(unsigned int major, unsigned int baseminor, 2179 unsigned int count, const char *name, 2180 const struct file_operations *fops, uid_t uid, 2181 gid_t gid, int mode) 2182 { 2183 struct linux_cdev *cdev; 2184 int ret = 0; 2185 int i; 2186 2187 for (i = baseminor; i < baseminor + count; i++) { 2188 cdev = cdev_alloc(); 2189 cdev_init(cdev, fops); 2190 kobject_set_name(&cdev->kobj, name); 2191 2192 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 2193 if (ret != 0) 2194 break; 2195 } 2196 return (ret); 2197 } 2198 2199 void 2200 __unregister_chrdev(unsigned int major, unsigned int baseminor, 2201 unsigned int count, const char *name) 2202 { 2203 struct linux_cdev *cdevp; 2204 int i; 2205 2206 for (i = baseminor; i < baseminor + count; i++) { 2207 cdevp = linux_find_cdev(name, major, i); 2208 if (cdevp != NULL) 2209 cdev_del(cdevp); 2210 } 2211 } 2212 2213 void 2214 linux_dump_stack(void) 2215 { 2216 #ifdef STACK 2217 struct stack st; 2218 2219 stack_zero(&st); 2220 stack_save(&st); 2221 stack_print(&st); 2222 #endif 2223 } 2224 2225 #if defined(__i386__) || defined(__amd64__) 2226 bool linux_cpu_has_clflush; 2227 #endif 2228 2229 static void 2230 linux_compat_init(void *arg) 2231 { 2232 struct sysctl_oid *rootoid; 2233 int i; 2234 2235 #if defined(__i386__) || defined(__amd64__) 2236 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 2237 #endif 2238 rw_init(&linux_vma_lock, "lkpi-vma-lock"); 2239 2240 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 2241 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 2242 kobject_init(&linux_class_root, &linux_class_ktype); 2243 kobject_set_name(&linux_class_root, "class"); 2244 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 2245 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 2246 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 2247 kobject_set_name(&linux_root_device.kobj, "device"); 2248 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 2249 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 2250 "device"); 2251 linux_root_device.bsddev = root_bus; 2252 linux_class_misc.name = "misc"; 2253 class_register(&linux_class_misc); 2254 INIT_LIST_HEAD(&pci_drivers); 2255 INIT_LIST_HEAD(&pci_devices); 2256 spin_lock_init(&pci_lock); 2257 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 2258 for (i = 0; i < VMMAP_HASH_SIZE; i++) 2259 LIST_INIT(&vmmaphead[i]); 2260 } 2261 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 2262 2263 static void 2264 linux_compat_uninit(void *arg) 2265 { 2266 linux_kobject_kfree_name(&linux_class_root); 2267 linux_kobject_kfree_name(&linux_root_device.kobj); 2268 linux_kobject_kfree_name(&linux_class_misc.kobj); 2269 2270 mtx_destroy(&vmmaplock); 2271 spin_lock_destroy(&pci_lock); 2272 rw_destroy(&linux_vma_lock); 2273 } 2274 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 2275 2276 /* 2277 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 2278 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 2279 * used. Assert these types have the same size, else some parts of the 2280 * LinuxKPI may not work like expected: 2281 */ 2282 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 2283