xref: /freebsd/sys/compat/linuxkpi/common/src/linux_compat.c (revision 43a251d687a16a671f578ad3edfd73879e27e23b)
1 /*-
2  * Copyright (c) 2010 Isilon Systems, Inc.
3  * Copyright (c) 2010 iX Systems, Inc.
4  * Copyright (c) 2010 Panasas, Inc.
5  * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice unmodified, this list of conditions, and the following
13  *    disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
37 #include <sys/sysctl.h>
38 #include <sys/proc.h>
39 #include <sys/sglist.h>
40 #include <sys/sleepqueue.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/bus.h>
44 #include <sys/fcntl.h>
45 #include <sys/file.h>
46 #include <sys/filio.h>
47 #include <sys/rwlock.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_object.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_pager.h>
54 
55 #include <machine/stdarg.h>
56 
57 #if defined(__i386__) || defined(__amd64__)
58 #include <machine/md_var.h>
59 #endif
60 
61 #include <linux/kobject.h>
62 #include <linux/device.h>
63 #include <linux/slab.h>
64 #include <linux/module.h>
65 #include <linux/moduleparam.h>
66 #include <linux/cdev.h>
67 #include <linux/file.h>
68 #include <linux/sysfs.h>
69 #include <linux/mm.h>
70 #include <linux/io.h>
71 #include <linux/vmalloc.h>
72 #include <linux/netdevice.h>
73 #include <linux/timer.h>
74 #include <linux/interrupt.h>
75 #include <linux/uaccess.h>
76 #include <linux/kernel.h>
77 #include <linux/list.h>
78 #include <linux/compat.h>
79 #include <linux/poll.h>
80 #include <linux/smp.h>
81 
82 #if defined(__i386__) || defined(__amd64__)
83 #include <asm/smp.h>
84 #endif
85 
86 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters");
87 
88 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat");
89 
90 #include <linux/rbtree.h>
91 /* Undo Linux compat changes. */
92 #undef RB_ROOT
93 #undef file
94 #undef cdev
95 #define	RB_ROOT(head)	(head)->rbh_root
96 
97 static struct vm_area_struct *linux_cdev_handle_find(void *handle);
98 
99 struct kobject linux_class_root;
100 struct device linux_root_device;
101 struct class linux_class_misc;
102 struct list_head pci_drivers;
103 struct list_head pci_devices;
104 spinlock_t pci_lock;
105 
106 unsigned long linux_timer_hz_mask;
107 
108 int
109 panic_cmp(struct rb_node *one, struct rb_node *two)
110 {
111 	panic("no cmp");
112 }
113 
114 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp);
115 
116 int
117 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args)
118 {
119 	va_list tmp_va;
120 	int len;
121 	char *old;
122 	char *name;
123 	char dummy;
124 
125 	old = kobj->name;
126 
127 	if (old && fmt == NULL)
128 		return (0);
129 
130 	/* compute length of string */
131 	va_copy(tmp_va, args);
132 	len = vsnprintf(&dummy, 0, fmt, tmp_va);
133 	va_end(tmp_va);
134 
135 	/* account for zero termination */
136 	len++;
137 
138 	/* check for error */
139 	if (len < 1)
140 		return (-EINVAL);
141 
142 	/* allocate memory for string */
143 	name = kzalloc(len, GFP_KERNEL);
144 	if (name == NULL)
145 		return (-ENOMEM);
146 	vsnprintf(name, len, fmt, args);
147 	kobj->name = name;
148 
149 	/* free old string */
150 	kfree(old);
151 
152 	/* filter new string */
153 	for (; *name != '\0'; name++)
154 		if (*name == '/')
155 			*name = '!';
156 	return (0);
157 }
158 
159 int
160 kobject_set_name(struct kobject *kobj, const char *fmt, ...)
161 {
162 	va_list args;
163 	int error;
164 
165 	va_start(args, fmt);
166 	error = kobject_set_name_vargs(kobj, fmt, args);
167 	va_end(args);
168 
169 	return (error);
170 }
171 
172 static int
173 kobject_add_complete(struct kobject *kobj, struct kobject *parent)
174 {
175 	const struct kobj_type *t;
176 	int error;
177 
178 	kobj->parent = parent;
179 	error = sysfs_create_dir(kobj);
180 	if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) {
181 		struct attribute **attr;
182 		t = kobj->ktype;
183 
184 		for (attr = t->default_attrs; *attr != NULL; attr++) {
185 			error = sysfs_create_file(kobj, *attr);
186 			if (error)
187 				break;
188 		}
189 		if (error)
190 			sysfs_remove_dir(kobj);
191 
192 	}
193 	return (error);
194 }
195 
196 int
197 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...)
198 {
199 	va_list args;
200 	int error;
201 
202 	va_start(args, fmt);
203 	error = kobject_set_name_vargs(kobj, fmt, args);
204 	va_end(args);
205 	if (error)
206 		return (error);
207 
208 	return kobject_add_complete(kobj, parent);
209 }
210 
211 void
212 linux_kobject_release(struct kref *kref)
213 {
214 	struct kobject *kobj;
215 	char *name;
216 
217 	kobj = container_of(kref, struct kobject, kref);
218 	sysfs_remove_dir(kobj);
219 	name = kobj->name;
220 	if (kobj->ktype && kobj->ktype->release)
221 		kobj->ktype->release(kobj);
222 	kfree(name);
223 }
224 
225 static void
226 linux_kobject_kfree(struct kobject *kobj)
227 {
228 	kfree(kobj);
229 }
230 
231 static void
232 linux_kobject_kfree_name(struct kobject *kobj)
233 {
234 	if (kobj) {
235 		kfree(kobj->name);
236 	}
237 }
238 
239 const struct kobj_type linux_kfree_type = {
240 	.release = linux_kobject_kfree
241 };
242 
243 static void
244 linux_device_release(struct device *dev)
245 {
246 	pr_debug("linux_device_release: %s\n", dev_name(dev));
247 	kfree(dev);
248 }
249 
250 static ssize_t
251 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf)
252 {
253 	struct class_attribute *dattr;
254 	ssize_t error;
255 
256 	dattr = container_of(attr, struct class_attribute, attr);
257 	error = -EIO;
258 	if (dattr->show)
259 		error = dattr->show(container_of(kobj, struct class, kobj),
260 		    dattr, buf);
261 	return (error);
262 }
263 
264 static ssize_t
265 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf,
266     size_t count)
267 {
268 	struct class_attribute *dattr;
269 	ssize_t error;
270 
271 	dattr = container_of(attr, struct class_attribute, attr);
272 	error = -EIO;
273 	if (dattr->store)
274 		error = dattr->store(container_of(kobj, struct class, kobj),
275 		    dattr, buf, count);
276 	return (error);
277 }
278 
279 static void
280 linux_class_release(struct kobject *kobj)
281 {
282 	struct class *class;
283 
284 	class = container_of(kobj, struct class, kobj);
285 	if (class->class_release)
286 		class->class_release(class);
287 }
288 
289 static const struct sysfs_ops linux_class_sysfs = {
290 	.show  = linux_class_show,
291 	.store = linux_class_store,
292 };
293 
294 const struct kobj_type linux_class_ktype = {
295 	.release = linux_class_release,
296 	.sysfs_ops = &linux_class_sysfs
297 };
298 
299 static void
300 linux_dev_release(struct kobject *kobj)
301 {
302 	struct device *dev;
303 
304 	dev = container_of(kobj, struct device, kobj);
305 	/* This is the precedence defined by linux. */
306 	if (dev->release)
307 		dev->release(dev);
308 	else if (dev->class && dev->class->dev_release)
309 		dev->class->dev_release(dev);
310 }
311 
312 static ssize_t
313 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf)
314 {
315 	struct device_attribute *dattr;
316 	ssize_t error;
317 
318 	dattr = container_of(attr, struct device_attribute, attr);
319 	error = -EIO;
320 	if (dattr->show)
321 		error = dattr->show(container_of(kobj, struct device, kobj),
322 		    dattr, buf);
323 	return (error);
324 }
325 
326 static ssize_t
327 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf,
328     size_t count)
329 {
330 	struct device_attribute *dattr;
331 	ssize_t error;
332 
333 	dattr = container_of(attr, struct device_attribute, attr);
334 	error = -EIO;
335 	if (dattr->store)
336 		error = dattr->store(container_of(kobj, struct device, kobj),
337 		    dattr, buf, count);
338 	return (error);
339 }
340 
341 static const struct sysfs_ops linux_dev_sysfs = {
342 	.show  = linux_dev_show,
343 	.store = linux_dev_store,
344 };
345 
346 const struct kobj_type linux_dev_ktype = {
347 	.release = linux_dev_release,
348 	.sysfs_ops = &linux_dev_sysfs
349 };
350 
351 struct device *
352 device_create(struct class *class, struct device *parent, dev_t devt,
353     void *drvdata, const char *fmt, ...)
354 {
355 	struct device *dev;
356 	va_list args;
357 
358 	dev = kzalloc(sizeof(*dev), M_WAITOK);
359 	dev->parent = parent;
360 	dev->class = class;
361 	dev->devt = devt;
362 	dev->driver_data = drvdata;
363 	dev->release = linux_device_release;
364 	va_start(args, fmt);
365 	kobject_set_name_vargs(&dev->kobj, fmt, args);
366 	va_end(args);
367 	device_register(dev);
368 
369 	return (dev);
370 }
371 
372 int
373 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype,
374     struct kobject *parent, const char *fmt, ...)
375 {
376 	va_list args;
377 	int error;
378 
379 	kobject_init(kobj, ktype);
380 	kobj->ktype = ktype;
381 	kobj->parent = parent;
382 	kobj->name = NULL;
383 
384 	va_start(args, fmt);
385 	error = kobject_set_name_vargs(kobj, fmt, args);
386 	va_end(args);
387 	if (error)
388 		return (error);
389 	return kobject_add_complete(kobj, parent);
390 }
391 
392 static void
393 linux_file_dtor(void *cdp)
394 {
395 	struct linux_file *filp;
396 
397 	linux_set_current(curthread);
398 	filp = cdp;
399 	filp->f_op->release(filp->f_vnode, filp);
400 	vdrop(filp->f_vnode);
401 	kfree(filp);
402 }
403 
404 void
405 linux_file_free(struct linux_file *filp)
406 {
407 	if (filp->_file == NULL) {
408 		kfree(filp);
409 	} else {
410 		/*
411 		 * The close method of the character device or file
412 		 * will free the linux_file structure:
413 		 */
414 		_fdrop(filp->_file, curthread);
415 	}
416 }
417 
418 static int
419 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type,
420     vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
421 {
422 	struct vm_area_struct *vmap;
423 	struct vm_fault vmf;
424 	int err;
425 
426 	linux_set_current(curthread);
427 
428 	/* get VM area structure */
429 	vmap = linux_cdev_handle_find(vm_obj->handle);
430 	MPASS(vmap != NULL);
431 	MPASS(vmap->vm_private_data == vm_obj->handle);
432 
433 	/* fill out VM fault structure */
434 	vmf.virtual_address = (void *)((uintptr_t)pidx << PAGE_SHIFT);
435 	vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
436 	vmf.pgoff = 0;
437 	vmf.page = NULL;
438 
439 	VM_OBJECT_WUNLOCK(vm_obj);
440 
441 	down_write(&vmap->vm_mm->mmap_sem);
442 	if (unlikely(vmap->vm_ops == NULL)) {
443 		err = VM_FAULT_SIGBUS;
444 	} else {
445 		vmap->vm_pfn_count = 0;
446 		vmap->vm_pfn_pcount = &vmap->vm_pfn_count;
447 		vmap->vm_obj = vm_obj;
448 
449 		err = vmap->vm_ops->fault(vmap, &vmf);
450 
451 		while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) {
452 			kern_yield(PRI_USER);
453 			err = vmap->vm_ops->fault(vmap, &vmf);
454 		}
455 	}
456 
457 	/* translate return code */
458 	switch (err) {
459 	case VM_FAULT_OOM:
460 		err = VM_PAGER_AGAIN;
461 		break;
462 	case VM_FAULT_SIGBUS:
463 		err = VM_PAGER_BAD;
464 		break;
465 	case VM_FAULT_NOPAGE:
466 		/*
467 		 * By contract the fault handler will return having
468 		 * busied all the pages itself. If pidx is already
469 		 * found in the object, it will simply xbusy the first
470 		 * page and return with vm_pfn_count set to 1.
471 		 */
472 		*first = vmap->vm_pfn_first;
473 		*last = *first + vmap->vm_pfn_count - 1;
474 		err = VM_PAGER_OK;
475 		break;
476 	default:
477 		err = VM_PAGER_ERROR;
478 		break;
479 	}
480 	up_write(&vmap->vm_mm->mmap_sem);
481 	VM_OBJECT_WLOCK(vm_obj);
482 	return (err);
483 }
484 
485 static struct rwlock linux_vma_lock;
486 static TAILQ_HEAD(, vm_area_struct) linux_vma_head =
487     TAILQ_HEAD_INITIALIZER(linux_vma_head);
488 
489 static void
490 linux_cdev_handle_free(struct vm_area_struct *vmap)
491 {
492 	/* Drop reference on vm_file */
493 	if (vmap->vm_file != NULL)
494 		fput(vmap->vm_file);
495 
496 	/* Drop reference on mm_struct */
497 	mmput(vmap->vm_mm);
498 
499 	kfree(vmap);
500 }
501 
502 static struct vm_area_struct *
503 linux_cdev_handle_insert(void *handle, struct vm_area_struct *vmap)
504 {
505 	struct vm_area_struct *ptr;
506 
507 	rw_wlock(&linux_vma_lock);
508 	TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) {
509 		if (ptr->vm_private_data == handle) {
510 			rw_wunlock(&linux_vma_lock);
511 			linux_cdev_handle_free(vmap);
512 			return (NULL);
513 		}
514 	}
515 	TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry);
516 	rw_wunlock(&linux_vma_lock);
517 	return (vmap);
518 }
519 
520 static void
521 linux_cdev_handle_remove(struct vm_area_struct *vmap)
522 {
523 	rw_wlock(&linux_vma_lock);
524 	TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry);
525 	rw_wunlock(&linux_vma_lock);
526 }
527 
528 static struct vm_area_struct *
529 linux_cdev_handle_find(void *handle)
530 {
531 	struct vm_area_struct *vmap;
532 
533 	rw_rlock(&linux_vma_lock);
534 	TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) {
535 		if (vmap->vm_private_data == handle)
536 			break;
537 	}
538 	rw_runlock(&linux_vma_lock);
539 	return (vmap);
540 }
541 
542 static int
543 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
544 		      vm_ooffset_t foff, struct ucred *cred, u_short *color)
545 {
546 
547 	MPASS(linux_cdev_handle_find(handle) != NULL);
548 	*color = 0;
549 	return (0);
550 }
551 
552 static void
553 linux_cdev_pager_dtor(void *handle)
554 {
555 	const struct vm_operations_struct *vm_ops;
556 	struct vm_area_struct *vmap;
557 
558 	vmap = linux_cdev_handle_find(handle);
559 	MPASS(vmap != NULL);
560 
561 	/*
562 	 * Remove handle before calling close operation to prevent
563 	 * other threads from reusing the handle pointer.
564 	 */
565 	linux_cdev_handle_remove(vmap);
566 
567 	down_write(&vmap->vm_mm->mmap_sem);
568 	vm_ops = vmap->vm_ops;
569 	if (likely(vm_ops != NULL))
570 		vm_ops->close(vmap);
571 	up_write(&vmap->vm_mm->mmap_sem);
572 
573 	linux_cdev_handle_free(vmap);
574 }
575 
576 static struct cdev_pager_ops linux_cdev_pager_ops = {
577 	.cdev_pg_populate	= linux_cdev_pager_populate,
578 	.cdev_pg_ctor	= linux_cdev_pager_ctor,
579 	.cdev_pg_dtor	= linux_cdev_pager_dtor
580 };
581 
582 static int
583 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
584 {
585 	struct linux_cdev *ldev;
586 	struct linux_file *filp;
587 	struct file *file;
588 	int error;
589 
590 	file = td->td_fpop;
591 	ldev = dev->si_drv1;
592 	if (ldev == NULL)
593 		return (ENODEV);
594 	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
595 	filp->f_dentry = &filp->f_dentry_store;
596 	filp->f_op = ldev->ops;
597 	filp->f_flags = file->f_flag;
598 	vhold(file->f_vnode);
599 	filp->f_vnode = file->f_vnode;
600 	linux_set_current(td);
601 	filp->_file = file;
602 
603 	if (filp->f_op->open) {
604 		error = -filp->f_op->open(file->f_vnode, filp);
605 		if (error) {
606 			vdrop(filp->f_vnode);
607 			kfree(filp);
608 			goto done;
609 		}
610 	}
611 	error = devfs_set_cdevpriv(filp, linux_file_dtor);
612 	if (error) {
613 		filp->f_op->release(file->f_vnode, filp);
614 		vdrop(filp->f_vnode);
615 		kfree(filp);
616 	}
617 done:
618 	return (error);
619 }
620 
621 static int
622 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
623 {
624 	struct linux_file *filp;
625 	struct file *file;
626 	int error;
627 
628 	file = td->td_fpop;
629 	if (dev->si_drv1 == NULL)
630 		return (0);
631 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
632 		return (error);
633 	filp->f_flags = file->f_flag;
634 	devfs_clear_cdevpriv();
635 
636 	return (0);
637 }
638 
639 #define	LINUX_IOCTL_MIN_PTR 0x10000UL
640 #define	LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX)
641 
642 static inline int
643 linux_remap_address(void **uaddr, size_t len)
644 {
645 	uintptr_t uaddr_val = (uintptr_t)(*uaddr);
646 
647 	if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR &&
648 	    uaddr_val < LINUX_IOCTL_MAX_PTR)) {
649 		struct task_struct *pts = current;
650 		if (pts == NULL) {
651 			*uaddr = NULL;
652 			return (1);
653 		}
654 
655 		/* compute data offset */
656 		uaddr_val -= LINUX_IOCTL_MIN_PTR;
657 
658 		/* check that length is within bounds */
659 		if ((len > IOCPARM_MAX) ||
660 		    (uaddr_val + len) > pts->bsd_ioctl_len) {
661 			*uaddr = NULL;
662 			return (1);
663 		}
664 
665 		/* re-add kernel buffer address */
666 		uaddr_val += (uintptr_t)pts->bsd_ioctl_data;
667 
668 		/* update address location */
669 		*uaddr = (void *)uaddr_val;
670 		return (1);
671 	}
672 	return (0);
673 }
674 
675 int
676 linux_copyin(const void *uaddr, void *kaddr, size_t len)
677 {
678 	if (linux_remap_address(__DECONST(void **, &uaddr), len)) {
679 		if (uaddr == NULL)
680 			return (-EFAULT);
681 		memcpy(kaddr, uaddr, len);
682 		return (0);
683 	}
684 	return (-copyin(uaddr, kaddr, len));
685 }
686 
687 int
688 linux_copyout(const void *kaddr, void *uaddr, size_t len)
689 {
690 	if (linux_remap_address(&uaddr, len)) {
691 		if (uaddr == NULL)
692 			return (-EFAULT);
693 		memcpy(uaddr, kaddr, len);
694 		return (0);
695 	}
696 	return (-copyout(kaddr, uaddr, len));
697 }
698 
699 size_t
700 linux_clear_user(void *_uaddr, size_t _len)
701 {
702 	uint8_t *uaddr = _uaddr;
703 	size_t len = _len;
704 
705 	/* make sure uaddr is aligned before going into the fast loop */
706 	while (((uintptr_t)uaddr & 7) != 0 && len > 7) {
707 		if (subyte(uaddr, 0))
708 			return (_len);
709 		uaddr++;
710 		len--;
711 	}
712 
713 	/* zero 8 bytes at a time */
714 	while (len > 7) {
715 #ifdef __LP64__
716 		if (suword64(uaddr, 0))
717 			return (_len);
718 #else
719 		if (suword32(uaddr, 0))
720 			return (_len);
721 		if (suword32(uaddr + 4, 0))
722 			return (_len);
723 #endif
724 		uaddr += 8;
725 		len -= 8;
726 	}
727 
728 	/* zero fill end, if any */
729 	while (len > 0) {
730 		if (subyte(uaddr, 0))
731 			return (_len);
732 		uaddr++;
733 		len--;
734 	}
735 	return (0);
736 }
737 
738 int
739 linux_access_ok(int rw, const void *uaddr, size_t len)
740 {
741 	uintptr_t saddr;
742 	uintptr_t eaddr;
743 
744 	/* get start and end address */
745 	saddr = (uintptr_t)uaddr;
746 	eaddr = (uintptr_t)uaddr + len;
747 
748 	/* verify addresses are valid for userspace */
749 	return ((saddr == eaddr) ||
750 	    (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS));
751 }
752 
753 static int
754 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
755     struct thread *td)
756 {
757 	struct linux_file *filp;
758 	struct file *file;
759 	unsigned size;
760 	int error;
761 
762 	file = td->td_fpop;
763 	if (dev->si_drv1 == NULL)
764 		return (ENXIO);
765 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
766 		return (error);
767 	filp->f_flags = file->f_flag;
768 
769 	linux_set_current(td);
770 	size = IOCPARM_LEN(cmd);
771 	/* refer to logic in sys_ioctl() */
772 	if (size > 0) {
773 		/*
774 		 * Setup hint for linux_copyin() and linux_copyout().
775 		 *
776 		 * Background: Linux code expects a user-space address
777 		 * while FreeBSD supplies a kernel-space address.
778 		 */
779 		current->bsd_ioctl_data = data;
780 		current->bsd_ioctl_len = size;
781 		data = (void *)LINUX_IOCTL_MIN_PTR;
782 	} else {
783 		/* fetch user-space pointer */
784 		data = *(void **)data;
785 	}
786 	if (filp->f_op->unlocked_ioctl)
787 		error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data);
788 	else
789 		error = ENOTTY;
790 	if (size > 0) {
791 		current->bsd_ioctl_data = NULL;
792 		current->bsd_ioctl_len = 0;
793 	}
794 
795 	return (error);
796 }
797 
798 static int
799 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
800 {
801 	struct linux_file *filp;
802 	struct thread *td;
803 	struct file *file;
804 	ssize_t bytes;
805 	int error;
806 
807 	td = curthread;
808 	file = td->td_fpop;
809 	if (dev->si_drv1 == NULL)
810 		return (ENXIO);
811 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
812 		return (error);
813 	filp->f_flags = file->f_flag;
814 	/* XXX no support for I/O vectors currently */
815 	if (uio->uio_iovcnt != 1)
816 		return (EOPNOTSUPP);
817 	linux_set_current(td);
818 	if (filp->f_op->read) {
819 		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
820 		    uio->uio_iov->iov_len, &uio->uio_offset);
821 		if (bytes >= 0) {
822 			uio->uio_iov->iov_base =
823 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
824 			uio->uio_iov->iov_len -= bytes;
825 			uio->uio_resid -= bytes;
826 		} else
827 			error = -bytes;
828 	} else
829 		error = ENXIO;
830 
831 	return (error);
832 }
833 
834 static int
835 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
836 {
837 	struct linux_file *filp;
838 	struct thread *td;
839 	struct file *file;
840 	ssize_t bytes;
841 	int error;
842 
843 	td = curthread;
844 	file = td->td_fpop;
845 	if (dev->si_drv1 == NULL)
846 		return (ENXIO);
847 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
848 		return (error);
849 	filp->f_flags = file->f_flag;
850 	/* XXX no support for I/O vectors currently */
851 	if (uio->uio_iovcnt != 1)
852 		return (EOPNOTSUPP);
853 	linux_set_current(td);
854 	if (filp->f_op->write) {
855 		bytes = filp->f_op->write(filp, uio->uio_iov->iov_base,
856 		    uio->uio_iov->iov_len, &uio->uio_offset);
857 		if (bytes >= 0) {
858 			uio->uio_iov->iov_base =
859 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
860 			uio->uio_iov->iov_len -= bytes;
861 			uio->uio_resid -= bytes;
862 		} else
863 			error = -bytes;
864 	} else
865 		error = ENXIO;
866 
867 	return (error);
868 }
869 
870 static int
871 linux_dev_poll(struct cdev *dev, int events, struct thread *td)
872 {
873 	struct linux_file *filp;
874 	struct file *file;
875 	int revents;
876 
877 	if (dev->si_drv1 == NULL)
878 		goto error;
879 	if (devfs_get_cdevpriv((void **)&filp) != 0)
880 		goto error;
881 
882 	file = td->td_fpop;
883 	filp->f_flags = file->f_flag;
884 	linux_set_current(td);
885 	if (filp->f_op->poll)
886 		revents = filp->f_op->poll(filp, NULL) & events;
887 	else
888 		revents = 0;
889 
890 	return (revents);
891 error:
892 	return (events & (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
893 }
894 
895 static int
896 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
897     vm_size_t size, struct vm_object **object, int nprot)
898 {
899 	struct vm_area_struct *vmap;
900 	struct mm_struct *mm;
901 	struct linux_file *filp;
902 	struct thread *td;
903 	struct file *file;
904 	vm_memattr_t attr;
905 	int error;
906 
907 	td = curthread;
908 	file = td->td_fpop;
909 	if (dev->si_drv1 == NULL)
910 		return (ENODEV);
911 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
912 		return (error);
913 	filp->f_flags = file->f_flag;
914 
915 	if (filp->f_op->mmap == NULL)
916 		return (ENODEV);
917 
918 	linux_set_current(td);
919 
920 	/*
921 	 * The same VM object might be shared by multiple processes
922 	 * and the mm_struct is usually freed when a process exits.
923 	 *
924 	 * The atomic reference below makes sure the mm_struct is
925 	 * available as long as the vmap is in the linux_vma_head.
926 	 */
927 	mm = current->mm;
928 	if (atomic_inc_not_zero(&mm->mm_users) == 0)
929 		return (EINVAL);
930 
931 	vmap = kzalloc(sizeof(*vmap), GFP_KERNEL);
932 	vmap->vm_start = 0;
933 	vmap->vm_end = size;
934 	vmap->vm_pgoff = *offset / PAGE_SIZE;
935 	vmap->vm_pfn = 0;
936 	vmap->vm_flags = vmap->vm_page_prot = nprot;
937 	vmap->vm_ops = NULL;
938 	vmap->vm_file = get_file(filp);
939 	vmap->vm_mm = mm;
940 
941 	if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
942 		error = EINTR;
943 	} else {
944 		error = -filp->f_op->mmap(filp, vmap);
945 		up_write(&vmap->vm_mm->mmap_sem);
946 	}
947 
948 	if (error != 0) {
949 		linux_cdev_handle_free(vmap);
950 		return (error);
951 	}
952 
953 	attr = pgprot2cachemode(vmap->vm_page_prot);
954 
955 	if (vmap->vm_ops != NULL) {
956 		void *vm_private_data;
957 
958 		if (vmap->vm_ops->fault == NULL ||
959 		    vmap->vm_ops->open == NULL ||
960 		    vmap->vm_ops->close == NULL ||
961 		    vmap->vm_private_data == NULL) {
962 			linux_cdev_handle_free(vmap);
963 			return (EINVAL);
964 		}
965 
966 		vm_private_data = vmap->vm_private_data;
967 
968 		vmap = linux_cdev_handle_insert(vm_private_data, vmap);
969 
970 		*object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE,
971 		    &linux_cdev_pager_ops, size, nprot, *offset, curthread->td_ucred);
972 
973 		if (*object == NULL) {
974 			linux_cdev_handle_remove(vmap);
975 			linux_cdev_handle_free(vmap);
976 			return (EINVAL);
977 		}
978 	} else {
979 		struct sglist *sg;
980 
981 		sg = sglist_alloc(1, M_WAITOK);
982 		sglist_append_phys(sg, (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len);
983 
984 		*object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len,
985 		    nprot, 0, curthread->td_ucred);
986 
987 		linux_cdev_handle_free(vmap);
988 
989 		if (*object == NULL) {
990 			sglist_free(sg);
991 			return (EINVAL);
992 		}
993 	}
994 
995 	if (attr != VM_MEMATTR_DEFAULT) {
996 		VM_OBJECT_WLOCK(*object);
997 		vm_object_set_memattr(*object, attr);
998 		VM_OBJECT_WUNLOCK(*object);
999 	}
1000 	*offset = 0;
1001 	return (0);
1002 }
1003 
1004 struct cdevsw linuxcdevsw = {
1005 	.d_version = D_VERSION,
1006 	.d_flags = D_TRACKCLOSE,
1007 	.d_open = linux_dev_open,
1008 	.d_close = linux_dev_close,
1009 	.d_read = linux_dev_read,
1010 	.d_write = linux_dev_write,
1011 	.d_ioctl = linux_dev_ioctl,
1012 	.d_mmap_single = linux_dev_mmap_single,
1013 	.d_poll = linux_dev_poll,
1014 	.d_name = "lkpidev",
1015 };
1016 
1017 static int
1018 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
1019     int flags, struct thread *td)
1020 {
1021 	struct linux_file *filp;
1022 	ssize_t bytes;
1023 	int error;
1024 
1025 	error = 0;
1026 	filp = (struct linux_file *)file->f_data;
1027 	filp->f_flags = file->f_flag;
1028 	/* XXX no support for I/O vectors currently */
1029 	if (uio->uio_iovcnt != 1)
1030 		return (EOPNOTSUPP);
1031 	linux_set_current(td);
1032 	if (filp->f_op->read) {
1033 		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
1034 		    uio->uio_iov->iov_len, &uio->uio_offset);
1035 		if (bytes >= 0) {
1036 			uio->uio_iov->iov_base =
1037 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
1038 			uio->uio_iov->iov_len -= bytes;
1039 			uio->uio_resid -= bytes;
1040 		} else
1041 			error = -bytes;
1042 	} else
1043 		error = ENXIO;
1044 
1045 	return (error);
1046 }
1047 
1048 static int
1049 linux_file_poll(struct file *file, int events, struct ucred *active_cred,
1050     struct thread *td)
1051 {
1052 	struct linux_file *filp;
1053 	int revents;
1054 
1055 	filp = (struct linux_file *)file->f_data;
1056 	filp->f_flags = file->f_flag;
1057 	linux_set_current(td);
1058 	if (filp->f_op->poll)
1059 		revents = filp->f_op->poll(filp, NULL) & events;
1060 	else
1061 		revents = 0;
1062 
1063 	return (revents);
1064 }
1065 
1066 static int
1067 linux_file_close(struct file *file, struct thread *td)
1068 {
1069 	struct linux_file *filp;
1070 	int error;
1071 
1072 	filp = (struct linux_file *)file->f_data;
1073 	filp->f_flags = file->f_flag;
1074 	linux_set_current(td);
1075 	error = -filp->f_op->release(NULL, filp);
1076 	funsetown(&filp->f_sigio);
1077 	kfree(filp);
1078 
1079 	return (error);
1080 }
1081 
1082 static int
1083 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred,
1084     struct thread *td)
1085 {
1086 	struct linux_file *filp;
1087 	int error;
1088 
1089 	filp = (struct linux_file *)fp->f_data;
1090 	filp->f_flags = fp->f_flag;
1091 	error = 0;
1092 
1093 	linux_set_current(td);
1094 	switch (cmd) {
1095 	case FIONBIO:
1096 		break;
1097 	case FIOASYNC:
1098 		if (filp->f_op->fasync == NULL)
1099 			break;
1100 		error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC);
1101 		break;
1102 	case FIOSETOWN:
1103 		error = fsetown(*(int *)data, &filp->f_sigio);
1104 		if (error == 0)
1105 			error = filp->f_op->fasync(0, filp,
1106 			    fp->f_flag & FASYNC);
1107 		break;
1108 	case FIOGETOWN:
1109 		*(int *)data = fgetown(&filp->f_sigio);
1110 		break;
1111 	default:
1112 		error = ENOTTY;
1113 		break;
1114 	}
1115 	return (error);
1116 }
1117 
1118 static int
1119 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
1120     struct thread *td)
1121 {
1122 
1123 	return (EOPNOTSUPP);
1124 }
1125 
1126 static int
1127 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif,
1128     struct filedesc *fdp)
1129 {
1130 
1131 	return (0);
1132 }
1133 
1134 struct fileops linuxfileops = {
1135 	.fo_read = linux_file_read,
1136 	.fo_write = invfo_rdwr,
1137 	.fo_truncate = invfo_truncate,
1138 	.fo_kqfilter = invfo_kqfilter,
1139 	.fo_stat = linux_file_stat,
1140 	.fo_fill_kinfo = linux_file_fill_kinfo,
1141 	.fo_poll = linux_file_poll,
1142 	.fo_close = linux_file_close,
1143 	.fo_ioctl = linux_file_ioctl,
1144 	.fo_chmod = invfo_chmod,
1145 	.fo_chown = invfo_chown,
1146 	.fo_sendfile = invfo_sendfile,
1147 };
1148 
1149 /*
1150  * Hash of vmmap addresses.  This is infrequently accessed and does not
1151  * need to be particularly large.  This is done because we must store the
1152  * caller's idea of the map size to properly unmap.
1153  */
1154 struct vmmap {
1155 	LIST_ENTRY(vmmap)	vm_next;
1156 	void 			*vm_addr;
1157 	unsigned long		vm_size;
1158 };
1159 
1160 struct vmmaphd {
1161 	struct vmmap *lh_first;
1162 };
1163 #define	VMMAP_HASH_SIZE	64
1164 #define	VMMAP_HASH_MASK	(VMMAP_HASH_SIZE - 1)
1165 #define	VM_HASH(addr)	((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
1166 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE];
1167 static struct mtx vmmaplock;
1168 
1169 static void
1170 vmmap_add(void *addr, unsigned long size)
1171 {
1172 	struct vmmap *vmmap;
1173 
1174 	vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL);
1175 	mtx_lock(&vmmaplock);
1176 	vmmap->vm_size = size;
1177 	vmmap->vm_addr = addr;
1178 	LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next);
1179 	mtx_unlock(&vmmaplock);
1180 }
1181 
1182 static struct vmmap *
1183 vmmap_remove(void *addr)
1184 {
1185 	struct vmmap *vmmap;
1186 
1187 	mtx_lock(&vmmaplock);
1188 	LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next)
1189 		if (vmmap->vm_addr == addr)
1190 			break;
1191 	if (vmmap)
1192 		LIST_REMOVE(vmmap, vm_next);
1193 	mtx_unlock(&vmmaplock);
1194 
1195 	return (vmmap);
1196 }
1197 
1198 #if defined(__i386__) || defined(__amd64__)
1199 void *
1200 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr)
1201 {
1202 	void *addr;
1203 
1204 	addr = pmap_mapdev_attr(phys_addr, size, attr);
1205 	if (addr == NULL)
1206 		return (NULL);
1207 	vmmap_add(addr, size);
1208 
1209 	return (addr);
1210 }
1211 #endif
1212 
1213 void
1214 iounmap(void *addr)
1215 {
1216 	struct vmmap *vmmap;
1217 
1218 	vmmap = vmmap_remove(addr);
1219 	if (vmmap == NULL)
1220 		return;
1221 #if defined(__i386__) || defined(__amd64__)
1222 	pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size);
1223 #endif
1224 	kfree(vmmap);
1225 }
1226 
1227 
1228 void *
1229 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot)
1230 {
1231 	vm_offset_t off;
1232 	size_t size;
1233 
1234 	size = count * PAGE_SIZE;
1235 	off = kva_alloc(size);
1236 	if (off == 0)
1237 		return (NULL);
1238 	vmmap_add((void *)off, size);
1239 	pmap_qenter(off, pages, count);
1240 
1241 	return ((void *)off);
1242 }
1243 
1244 void
1245 vunmap(void *addr)
1246 {
1247 	struct vmmap *vmmap;
1248 
1249 	vmmap = vmmap_remove(addr);
1250 	if (vmmap == NULL)
1251 		return;
1252 	pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE);
1253 	kva_free((vm_offset_t)addr, vmmap->vm_size);
1254 	kfree(vmmap);
1255 }
1256 
1257 char *
1258 kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
1259 {
1260 	unsigned int len;
1261 	char *p;
1262 	va_list aq;
1263 
1264 	va_copy(aq, ap);
1265 	len = vsnprintf(NULL, 0, fmt, aq);
1266 	va_end(aq);
1267 
1268 	p = kmalloc(len + 1, gfp);
1269 	if (p != NULL)
1270 		vsnprintf(p, len + 1, fmt, ap);
1271 
1272 	return (p);
1273 }
1274 
1275 char *
1276 kasprintf(gfp_t gfp, const char *fmt, ...)
1277 {
1278 	va_list ap;
1279 	char *p;
1280 
1281 	va_start(ap, fmt);
1282 	p = kvasprintf(gfp, fmt, ap);
1283 	va_end(ap);
1284 
1285 	return (p);
1286 }
1287 
1288 static void
1289 linux_timer_callback_wrapper(void *context)
1290 {
1291 	struct timer_list *timer;
1292 
1293 	linux_set_current(curthread);
1294 
1295 	timer = context;
1296 	timer->function(timer->data);
1297 }
1298 
1299 void
1300 mod_timer(struct timer_list *timer, unsigned long expires)
1301 {
1302 
1303 	timer->expires = expires;
1304 	callout_reset(&timer->timer_callout,
1305 	    linux_timer_jiffies_until(expires),
1306 	    &linux_timer_callback_wrapper, timer);
1307 }
1308 
1309 void
1310 add_timer(struct timer_list *timer)
1311 {
1312 
1313 	callout_reset(&timer->timer_callout,
1314 	    linux_timer_jiffies_until(timer->expires),
1315 	    &linux_timer_callback_wrapper, timer);
1316 }
1317 
1318 void
1319 add_timer_on(struct timer_list *timer, int cpu)
1320 {
1321 
1322 	callout_reset_on(&timer->timer_callout,
1323 	    linux_timer_jiffies_until(timer->expires),
1324 	    &linux_timer_callback_wrapper, timer, cpu);
1325 }
1326 
1327 static void
1328 linux_timer_init(void *arg)
1329 {
1330 
1331 	/*
1332 	 * Compute an internal HZ value which can divide 2**32 to
1333 	 * avoid timer rounding problems when the tick value wraps
1334 	 * around 2**32:
1335 	 */
1336 	linux_timer_hz_mask = 1;
1337 	while (linux_timer_hz_mask < (unsigned long)hz)
1338 		linux_timer_hz_mask *= 2;
1339 	linux_timer_hz_mask--;
1340 }
1341 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
1342 
1343 void
1344 linux_complete_common(struct completion *c, int all)
1345 {
1346 	int wakeup_swapper;
1347 
1348 	sleepq_lock(c);
1349 	c->done++;
1350 	if (all)
1351 		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
1352 	else
1353 		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
1354 	sleepq_release(c);
1355 	if (wakeup_swapper)
1356 		kick_proc0();
1357 }
1358 
1359 /*
1360  * Indefinite wait for done != 0 with or without signals.
1361  */
1362 long
1363 linux_wait_for_common(struct completion *c, int flags)
1364 {
1365 	long error;
1366 
1367 	if (SCHEDULER_STOPPED())
1368 		return (0);
1369 
1370 	DROP_GIANT();
1371 
1372 	if (flags != 0)
1373 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
1374 	else
1375 		flags = SLEEPQ_SLEEP;
1376 	error = 0;
1377 	for (;;) {
1378 		sleepq_lock(c);
1379 		if (c->done)
1380 			break;
1381 		sleepq_add(c, NULL, "completion", flags, 0);
1382 		if (flags & SLEEPQ_INTERRUPTIBLE) {
1383 			if (sleepq_wait_sig(c, 0) != 0) {
1384 				error = -ERESTARTSYS;
1385 				goto intr;
1386 			}
1387 		} else
1388 			sleepq_wait(c, 0);
1389 	}
1390 	c->done--;
1391 	sleepq_release(c);
1392 
1393 intr:
1394 	PICKUP_GIANT();
1395 
1396 	return (error);
1397 }
1398 
1399 /*
1400  * Time limited wait for done != 0 with or without signals.
1401  */
1402 long
1403 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags)
1404 {
1405 	long end = jiffies + timeout, error;
1406 	int ret;
1407 
1408 	if (SCHEDULER_STOPPED())
1409 		return (0);
1410 
1411 	DROP_GIANT();
1412 
1413 	if (flags != 0)
1414 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
1415 	else
1416 		flags = SLEEPQ_SLEEP;
1417 
1418 	error = 0;
1419 	ret = 0;
1420 	for (;;) {
1421 		sleepq_lock(c);
1422 		if (c->done)
1423 			break;
1424 		sleepq_add(c, NULL, "completion", flags, 0);
1425 		sleepq_set_timeout(c, linux_timer_jiffies_until(end));
1426 		if (flags & SLEEPQ_INTERRUPTIBLE)
1427 			ret = sleepq_timedwait_sig(c, 0);
1428 		else
1429 			ret = sleepq_timedwait(c, 0);
1430 		if (ret != 0) {
1431 			/* check for timeout or signal */
1432 			if (ret == EWOULDBLOCK)
1433 				error = 0;
1434 			else
1435 				error = -ERESTARTSYS;
1436 			goto intr;
1437 		}
1438 	}
1439 	c->done--;
1440 	sleepq_release(c);
1441 
1442 intr:
1443 	PICKUP_GIANT();
1444 
1445 	/* return how many jiffies are left */
1446 	return (ret != 0 ? error : linux_timer_jiffies_until(end));
1447 }
1448 
1449 int
1450 linux_try_wait_for_completion(struct completion *c)
1451 {
1452 	int isdone;
1453 
1454 	isdone = 1;
1455 	sleepq_lock(c);
1456 	if (c->done)
1457 		c->done--;
1458 	else
1459 		isdone = 0;
1460 	sleepq_release(c);
1461 	return (isdone);
1462 }
1463 
1464 int
1465 linux_completion_done(struct completion *c)
1466 {
1467 	int isdone;
1468 
1469 	isdone = 1;
1470 	sleepq_lock(c);
1471 	if (c->done == 0)
1472 		isdone = 0;
1473 	sleepq_release(c);
1474 	return (isdone);
1475 }
1476 
1477 static void
1478 linux_cdev_release(struct kobject *kobj)
1479 {
1480 	struct linux_cdev *cdev;
1481 	struct kobject *parent;
1482 
1483 	cdev = container_of(kobj, struct linux_cdev, kobj);
1484 	parent = kobj->parent;
1485 	if (cdev->cdev)
1486 		destroy_dev(cdev->cdev);
1487 	kfree(cdev);
1488 	kobject_put(parent);
1489 }
1490 
1491 static void
1492 linux_cdev_static_release(struct kobject *kobj)
1493 {
1494 	struct linux_cdev *cdev;
1495 	struct kobject *parent;
1496 
1497 	cdev = container_of(kobj, struct linux_cdev, kobj);
1498 	parent = kobj->parent;
1499 	if (cdev->cdev)
1500 		destroy_dev(cdev->cdev);
1501 	kobject_put(parent);
1502 }
1503 
1504 const struct kobj_type linux_cdev_ktype = {
1505 	.release = linux_cdev_release,
1506 };
1507 
1508 const struct kobj_type linux_cdev_static_ktype = {
1509 	.release = linux_cdev_static_release,
1510 };
1511 
1512 static void
1513 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate)
1514 {
1515 	struct notifier_block *nb;
1516 
1517 	nb = arg;
1518 	if (linkstate == LINK_STATE_UP)
1519 		nb->notifier_call(nb, NETDEV_UP, ifp);
1520 	else
1521 		nb->notifier_call(nb, NETDEV_DOWN, ifp);
1522 }
1523 
1524 static void
1525 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp)
1526 {
1527 	struct notifier_block *nb;
1528 
1529 	nb = arg;
1530 	nb->notifier_call(nb, NETDEV_REGISTER, ifp);
1531 }
1532 
1533 static void
1534 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp)
1535 {
1536 	struct notifier_block *nb;
1537 
1538 	nb = arg;
1539 	nb->notifier_call(nb, NETDEV_UNREGISTER, ifp);
1540 }
1541 
1542 static void
1543 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp)
1544 {
1545 	struct notifier_block *nb;
1546 
1547 	nb = arg;
1548 	nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp);
1549 }
1550 
1551 static void
1552 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp)
1553 {
1554 	struct notifier_block *nb;
1555 
1556 	nb = arg;
1557 	nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp);
1558 }
1559 
1560 int
1561 register_netdevice_notifier(struct notifier_block *nb)
1562 {
1563 
1564 	nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER(
1565 	    ifnet_link_event, linux_handle_ifnet_link_event, nb, 0);
1566 	nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER(
1567 	    ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0);
1568 	nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER(
1569 	    ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0);
1570 	nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER(
1571 	    iflladdr_event, linux_handle_iflladdr_event, nb, 0);
1572 
1573 	return (0);
1574 }
1575 
1576 int
1577 register_inetaddr_notifier(struct notifier_block *nb)
1578 {
1579 
1580         nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER(
1581             ifaddr_event, linux_handle_ifaddr_event, nb, 0);
1582         return (0);
1583 }
1584 
1585 int
1586 unregister_netdevice_notifier(struct notifier_block *nb)
1587 {
1588 
1589         EVENTHANDLER_DEREGISTER(ifnet_link_event,
1590 	    nb->tags[NETDEV_UP]);
1591         EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
1592 	    nb->tags[NETDEV_REGISTER]);
1593         EVENTHANDLER_DEREGISTER(ifnet_departure_event,
1594 	    nb->tags[NETDEV_UNREGISTER]);
1595         EVENTHANDLER_DEREGISTER(iflladdr_event,
1596 	    nb->tags[NETDEV_CHANGEADDR]);
1597 
1598 	return (0);
1599 }
1600 
1601 int
1602 unregister_inetaddr_notifier(struct notifier_block *nb)
1603 {
1604 
1605         EVENTHANDLER_DEREGISTER(ifaddr_event,
1606             nb->tags[NETDEV_CHANGEIFADDR]);
1607 
1608         return (0);
1609 }
1610 
1611 struct list_sort_thunk {
1612 	int (*cmp)(void *, struct list_head *, struct list_head *);
1613 	void *priv;
1614 };
1615 
1616 static inline int
1617 linux_le_cmp(void *priv, const void *d1, const void *d2)
1618 {
1619 	struct list_head *le1, *le2;
1620 	struct list_sort_thunk *thunk;
1621 
1622 	thunk = priv;
1623 	le1 = *(__DECONST(struct list_head **, d1));
1624 	le2 = *(__DECONST(struct list_head **, d2));
1625 	return ((thunk->cmp)(thunk->priv, le1, le2));
1626 }
1627 
1628 void
1629 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv,
1630     struct list_head *a, struct list_head *b))
1631 {
1632 	struct list_sort_thunk thunk;
1633 	struct list_head **ar, *le;
1634 	size_t count, i;
1635 
1636 	count = 0;
1637 	list_for_each(le, head)
1638 		count++;
1639 	ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK);
1640 	i = 0;
1641 	list_for_each(le, head)
1642 		ar[i++] = le;
1643 	thunk.cmp = cmp;
1644 	thunk.priv = priv;
1645 	qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp);
1646 	INIT_LIST_HEAD(head);
1647 	for (i = 0; i < count; i++)
1648 		list_add_tail(ar[i], head);
1649 	free(ar, M_KMALLOC);
1650 }
1651 
1652 void
1653 linux_irq_handler(void *ent)
1654 {
1655 	struct irq_ent *irqe;
1656 
1657 	linux_set_current(curthread);
1658 
1659 	irqe = ent;
1660 	irqe->handler(irqe->irq, irqe->arg);
1661 }
1662 
1663 #if defined(__i386__) || defined(__amd64__)
1664 int
1665 linux_wbinvd_on_all_cpus(void)
1666 {
1667 
1668 	pmap_invalidate_cache();
1669 	return (0);
1670 }
1671 #endif
1672 
1673 int
1674 linux_on_each_cpu(void callback(void *), void *data)
1675 {
1676 
1677 	smp_rendezvous(smp_no_rendezvous_barrier, callback,
1678 	    smp_no_rendezvous_barrier, data);
1679 	return (0);
1680 }
1681 
1682 struct linux_cdev *
1683 linux_find_cdev(const char *name, unsigned major, unsigned minor)
1684 {
1685 	int unit = MKDEV(major, minor);
1686 	struct cdev *cdev;
1687 
1688 	dev_lock();
1689 	LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) {
1690 		struct linux_cdev *ldev = cdev->si_drv1;
1691 		if (dev2unit(cdev) == unit &&
1692 		    strcmp(kobject_name(&ldev->kobj), name) == 0) {
1693 			break;
1694 		}
1695 	}
1696 	dev_unlock();
1697 
1698 	return (cdev != NULL ? cdev->si_drv1 : NULL);
1699 }
1700 
1701 int
1702 __register_chrdev(unsigned int major, unsigned int baseminor,
1703     unsigned int count, const char *name,
1704     const struct file_operations *fops)
1705 {
1706 	struct linux_cdev *cdev;
1707 	int ret = 0;
1708 	int i;
1709 
1710 	for (i = baseminor; i < baseminor + count; i++) {
1711 		cdev = cdev_alloc();
1712 		cdev_init(cdev, fops);
1713 		kobject_set_name(&cdev->kobj, name);
1714 
1715 		ret = cdev_add(cdev, makedev(major, i), 1);
1716 		if (ret != 0)
1717 			break;
1718 	}
1719 	return (ret);
1720 }
1721 
1722 int
1723 __register_chrdev_p(unsigned int major, unsigned int baseminor,
1724     unsigned int count, const char *name,
1725     const struct file_operations *fops, uid_t uid,
1726     gid_t gid, int mode)
1727 {
1728 	struct linux_cdev *cdev;
1729 	int ret = 0;
1730 	int i;
1731 
1732 	for (i = baseminor; i < baseminor + count; i++) {
1733 		cdev = cdev_alloc();
1734 		cdev_init(cdev, fops);
1735 		kobject_set_name(&cdev->kobj, name);
1736 
1737 		ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode);
1738 		if (ret != 0)
1739 			break;
1740 	}
1741 	return (ret);
1742 }
1743 
1744 void
1745 __unregister_chrdev(unsigned int major, unsigned int baseminor,
1746     unsigned int count, const char *name)
1747 {
1748 	struct linux_cdev *cdevp;
1749 	int i;
1750 
1751 	for (i = baseminor; i < baseminor + count; i++) {
1752 		cdevp = linux_find_cdev(name, major, i);
1753 		if (cdevp != NULL)
1754 			cdev_del(cdevp);
1755 	}
1756 }
1757 
1758 #if defined(__i386__) || defined(__amd64__)
1759 bool linux_cpu_has_clflush;
1760 #endif
1761 
1762 static void
1763 linux_compat_init(void *arg)
1764 {
1765 	struct sysctl_oid *rootoid;
1766 	int i;
1767 
1768 #if defined(__i386__) || defined(__amd64__)
1769 	linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH);
1770 #endif
1771 	rw_init(&linux_vma_lock, "lkpi-vma-lock");
1772 
1773 	rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
1774 	    OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
1775 	kobject_init(&linux_class_root, &linux_class_ktype);
1776 	kobject_set_name(&linux_class_root, "class");
1777 	linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid),
1778 	    OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class");
1779 	kobject_init(&linux_root_device.kobj, &linux_dev_ktype);
1780 	kobject_set_name(&linux_root_device.kobj, "device");
1781 	linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL,
1782 	    SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL,
1783 	    "device");
1784 	linux_root_device.bsddev = root_bus;
1785 	linux_class_misc.name = "misc";
1786 	class_register(&linux_class_misc);
1787 	INIT_LIST_HEAD(&pci_drivers);
1788 	INIT_LIST_HEAD(&pci_devices);
1789 	spin_lock_init(&pci_lock);
1790 	mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF);
1791 	for (i = 0; i < VMMAP_HASH_SIZE; i++)
1792 		LIST_INIT(&vmmaphead[i]);
1793 }
1794 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
1795 
1796 static void
1797 linux_compat_uninit(void *arg)
1798 {
1799 	linux_kobject_kfree_name(&linux_class_root);
1800 	linux_kobject_kfree_name(&linux_root_device.kobj);
1801 	linux_kobject_kfree_name(&linux_class_misc.kobj);
1802 
1803 	rw_destroy(&linux_vma_lock);
1804 }
1805 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
1806 
1807 /*
1808  * NOTE: Linux frequently uses "unsigned long" for pointer to integer
1809  * conversion and vice versa, where in FreeBSD "uintptr_t" would be
1810  * used. Assert these types have the same size, else some parts of the
1811  * LinuxKPI may not work like expected:
1812  */
1813 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t));
1814