xref: /freebsd/sys/compat/linuxkpi/common/src/linux_compat.c (revision 38f004fbb0f684e25399b3428b870cf006eb9cae)
1 /*-
2  * Copyright (c) 2010 Isilon Systems, Inc.
3  * Copyright (c) 2010 iX Systems, Inc.
4  * Copyright (c) 2010 Panasas, Inc.
5  * Copyright (c) 2013-2015 Mellanox Technologies, Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice unmodified, this list of conditions, and the following
13  *    disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
37 #include <sys/sysctl.h>
38 #include <sys/proc.h>
39 #include <sys/sglist.h>
40 #include <sys/sleepqueue.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/bus.h>
44 #include <sys/fcntl.h>
45 #include <sys/file.h>
46 #include <sys/filio.h>
47 #include <sys/rwlock.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 
52 #include <machine/stdarg.h>
53 #include <machine/pmap.h>
54 
55 #include <linux/kobject.h>
56 #include <linux/device.h>
57 #include <linux/slab.h>
58 #include <linux/module.h>
59 #include <linux/cdev.h>
60 #include <linux/file.h>
61 #include <linux/sysfs.h>
62 #include <linux/mm.h>
63 #include <linux/io.h>
64 #include <linux/vmalloc.h>
65 #include <linux/netdevice.h>
66 #include <linux/timer.h>
67 #include <linux/workqueue.h>
68 
69 #include <vm/vm_pager.h>
70 
71 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat");
72 
73 #include <linux/rbtree.h>
74 /* Undo Linux compat changes. */
75 #undef RB_ROOT
76 #undef file
77 #undef cdev
78 #define	RB_ROOT(head)	(head)->rbh_root
79 
80 struct kobject linux_class_root;
81 struct device linux_root_device;
82 struct class linux_class_misc;
83 struct list_head pci_drivers;
84 struct list_head pci_devices;
85 struct net init_net;
86 spinlock_t pci_lock;
87 
88 unsigned long linux_timer_hz_mask;
89 
90 int
91 panic_cmp(struct rb_node *one, struct rb_node *two)
92 {
93 	panic("no cmp");
94 }
95 
96 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp);
97 
98 int
99 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args)
100 {
101 	va_list tmp_va;
102 	int len;
103 	char *old;
104 	char *name;
105 	char dummy;
106 
107 	old = kobj->name;
108 
109 	if (old && fmt == NULL)
110 		return (0);
111 
112 	/* compute length of string */
113 	va_copy(tmp_va, args);
114 	len = vsnprintf(&dummy, 0, fmt, tmp_va);
115 	va_end(tmp_va);
116 
117 	/* account for zero termination */
118 	len++;
119 
120 	/* check for error */
121 	if (len < 1)
122 		return (-EINVAL);
123 
124 	/* allocate memory for string */
125 	name = kzalloc(len, GFP_KERNEL);
126 	if (name == NULL)
127 		return (-ENOMEM);
128 	vsnprintf(name, len, fmt, args);
129 	kobj->name = name;
130 
131 	/* free old string */
132 	kfree(old);
133 
134 	/* filter new string */
135 	for (; *name != '\0'; name++)
136 		if (*name == '/')
137 			*name = '!';
138 	return (0);
139 }
140 
141 int
142 kobject_set_name(struct kobject *kobj, const char *fmt, ...)
143 {
144 	va_list args;
145 	int error;
146 
147 	va_start(args, fmt);
148 	error = kobject_set_name_vargs(kobj, fmt, args);
149 	va_end(args);
150 
151 	return (error);
152 }
153 
154 static int
155 kobject_add_complete(struct kobject *kobj, struct kobject *parent)
156 {
157 	const struct kobj_type *t;
158 	int error;
159 
160 	kobj->parent = parent;
161 	error = sysfs_create_dir(kobj);
162 	if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) {
163 		struct attribute **attr;
164 		t = kobj->ktype;
165 
166 		for (attr = t->default_attrs; *attr != NULL; attr++) {
167 			error = sysfs_create_file(kobj, *attr);
168 			if (error)
169 				break;
170 		}
171 		if (error)
172 			sysfs_remove_dir(kobj);
173 
174 	}
175 	return (error);
176 }
177 
178 int
179 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...)
180 {
181 	va_list args;
182 	int error;
183 
184 	va_start(args, fmt);
185 	error = kobject_set_name_vargs(kobj, fmt, args);
186 	va_end(args);
187 	if (error)
188 		return (error);
189 
190 	return kobject_add_complete(kobj, parent);
191 }
192 
193 void
194 linux_kobject_release(struct kref *kref)
195 {
196 	struct kobject *kobj;
197 	char *name;
198 
199 	kobj = container_of(kref, struct kobject, kref);
200 	sysfs_remove_dir(kobj);
201 	name = kobj->name;
202 	if (kobj->ktype && kobj->ktype->release)
203 		kobj->ktype->release(kobj);
204 	kfree(name);
205 }
206 
207 static void
208 linux_kobject_kfree(struct kobject *kobj)
209 {
210 	kfree(kobj);
211 }
212 
213 static void
214 linux_kobject_kfree_name(struct kobject *kobj)
215 {
216 	if (kobj) {
217 		kfree(kobj->name);
218 	}
219 }
220 
221 const struct kobj_type linux_kfree_type = {
222 	.release = linux_kobject_kfree
223 };
224 
225 static void
226 linux_device_release(struct device *dev)
227 {
228 	pr_debug("linux_device_release: %s\n", dev_name(dev));
229 	kfree(dev);
230 }
231 
232 static ssize_t
233 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf)
234 {
235 	struct class_attribute *dattr;
236 	ssize_t error;
237 
238 	dattr = container_of(attr, struct class_attribute, attr);
239 	error = -EIO;
240 	if (dattr->show)
241 		error = dattr->show(container_of(kobj, struct class, kobj),
242 		    dattr, buf);
243 	return (error);
244 }
245 
246 static ssize_t
247 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf,
248     size_t count)
249 {
250 	struct class_attribute *dattr;
251 	ssize_t error;
252 
253 	dattr = container_of(attr, struct class_attribute, attr);
254 	error = -EIO;
255 	if (dattr->store)
256 		error = dattr->store(container_of(kobj, struct class, kobj),
257 		    dattr, buf, count);
258 	return (error);
259 }
260 
261 static void
262 linux_class_release(struct kobject *kobj)
263 {
264 	struct class *class;
265 
266 	class = container_of(kobj, struct class, kobj);
267 	if (class->class_release)
268 		class->class_release(class);
269 }
270 
271 static const struct sysfs_ops linux_class_sysfs = {
272 	.show  = linux_class_show,
273 	.store = linux_class_store,
274 };
275 
276 const struct kobj_type linux_class_ktype = {
277 	.release = linux_class_release,
278 	.sysfs_ops = &linux_class_sysfs
279 };
280 
281 static void
282 linux_dev_release(struct kobject *kobj)
283 {
284 	struct device *dev;
285 
286 	dev = container_of(kobj, struct device, kobj);
287 	/* This is the precedence defined by linux. */
288 	if (dev->release)
289 		dev->release(dev);
290 	else if (dev->class && dev->class->dev_release)
291 		dev->class->dev_release(dev);
292 }
293 
294 static ssize_t
295 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf)
296 {
297 	struct device_attribute *dattr;
298 	ssize_t error;
299 
300 	dattr = container_of(attr, struct device_attribute, attr);
301 	error = -EIO;
302 	if (dattr->show)
303 		error = dattr->show(container_of(kobj, struct device, kobj),
304 		    dattr, buf);
305 	return (error);
306 }
307 
308 static ssize_t
309 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf,
310     size_t count)
311 {
312 	struct device_attribute *dattr;
313 	ssize_t error;
314 
315 	dattr = container_of(attr, struct device_attribute, attr);
316 	error = -EIO;
317 	if (dattr->store)
318 		error = dattr->store(container_of(kobj, struct device, kobj),
319 		    dattr, buf, count);
320 	return (error);
321 }
322 
323 static const struct sysfs_ops linux_dev_sysfs = {
324 	.show  = linux_dev_show,
325 	.store = linux_dev_store,
326 };
327 
328 const struct kobj_type linux_dev_ktype = {
329 	.release = linux_dev_release,
330 	.sysfs_ops = &linux_dev_sysfs
331 };
332 
333 struct device *
334 device_create(struct class *class, struct device *parent, dev_t devt,
335     void *drvdata, const char *fmt, ...)
336 {
337 	struct device *dev;
338 	va_list args;
339 
340 	dev = kzalloc(sizeof(*dev), M_WAITOK);
341 	dev->parent = parent;
342 	dev->class = class;
343 	dev->devt = devt;
344 	dev->driver_data = drvdata;
345 	dev->release = linux_device_release;
346 	va_start(args, fmt);
347 	kobject_set_name_vargs(&dev->kobj, fmt, args);
348 	va_end(args);
349 	device_register(dev);
350 
351 	return (dev);
352 }
353 
354 int
355 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype,
356     struct kobject *parent, const char *fmt, ...)
357 {
358 	va_list args;
359 	int error;
360 
361 	kobject_init(kobj, ktype);
362 	kobj->ktype = ktype;
363 	kobj->parent = parent;
364 	kobj->name = NULL;
365 
366 	va_start(args, fmt);
367 	error = kobject_set_name_vargs(kobj, fmt, args);
368 	va_end(args);
369 	if (error)
370 		return (error);
371 	return kobject_add_complete(kobj, parent);
372 }
373 
374 static void
375 linux_file_dtor(void *cdp)
376 {
377 	struct linux_file *filp;
378 
379 	filp = cdp;
380 	filp->f_op->release(filp->f_vnode, filp);
381 	vdrop(filp->f_vnode);
382 	kfree(filp);
383 }
384 
385 static int
386 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
387 {
388 	struct linux_cdev *ldev;
389 	struct linux_file *filp;
390 	struct file *file;
391 	int error;
392 
393 	file = curthread->td_fpop;
394 	ldev = dev->si_drv1;
395 	if (ldev == NULL)
396 		return (ENODEV);
397 	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
398 	filp->f_dentry = &filp->f_dentry_store;
399 	filp->f_op = ldev->ops;
400 	filp->f_flags = file->f_flag;
401 	vhold(file->f_vnode);
402 	filp->f_vnode = file->f_vnode;
403 	if (filp->f_op->open) {
404 		error = -filp->f_op->open(file->f_vnode, filp);
405 		if (error) {
406 			kfree(filp);
407 			return (error);
408 		}
409 	}
410 	error = devfs_set_cdevpriv(filp, linux_file_dtor);
411 	if (error) {
412 		filp->f_op->release(file->f_vnode, filp);
413 		kfree(filp);
414 		return (error);
415 	}
416 
417 	return 0;
418 }
419 
420 static int
421 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
422 {
423 	struct linux_cdev *ldev;
424 	struct linux_file *filp;
425 	struct file *file;
426 	int error;
427 
428 	file = curthread->td_fpop;
429 	ldev = dev->si_drv1;
430 	if (ldev == NULL)
431 		return (0);
432 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
433 		return (error);
434 	filp->f_flags = file->f_flag;
435         devfs_clear_cdevpriv();
436 
437 
438 	return (0);
439 }
440 
441 static int
442 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
443     struct thread *td)
444 {
445 	struct linux_cdev *ldev;
446 	struct linux_file *filp;
447 	struct file *file;
448 	int error;
449 
450 	file = curthread->td_fpop;
451 	ldev = dev->si_drv1;
452 	if (ldev == NULL)
453 		return (0);
454 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
455 		return (error);
456 	filp->f_flags = file->f_flag;
457 	/*
458 	 * Linux does not have a generic ioctl copyin/copyout layer.  All
459 	 * linux ioctls must be converted to void ioctls which pass a
460 	 * pointer to the address of the data.  We want the actual user
461 	 * address so we dereference here.
462 	 */
463 	data = *(void **)data;
464 	if (filp->f_op->unlocked_ioctl)
465 		error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data);
466 	else
467 		error = ENOTTY;
468 
469 	return (error);
470 }
471 
472 static int
473 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
474 {
475 	struct linux_cdev *ldev;
476 	struct linux_file *filp;
477 	struct file *file;
478 	ssize_t bytes;
479 	int error;
480 
481 	file = curthread->td_fpop;
482 	ldev = dev->si_drv1;
483 	if (ldev == NULL)
484 		return (0);
485 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
486 		return (error);
487 	filp->f_flags = file->f_flag;
488 	if (uio->uio_iovcnt != 1)
489 		panic("linux_dev_read: uio %p iovcnt %d",
490 		    uio, uio->uio_iovcnt);
491 	if (filp->f_op->read) {
492 		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
493 		    uio->uio_iov->iov_len, &uio->uio_offset);
494 		if (bytes >= 0) {
495 			uio->uio_iov->iov_base =
496 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
497 			uio->uio_iov->iov_len -= bytes;
498 			uio->uio_resid -= bytes;
499 		} else
500 			error = -bytes;
501 	} else
502 		error = ENXIO;
503 
504 	return (error);
505 }
506 
507 static int
508 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
509 {
510 	struct linux_cdev *ldev;
511 	struct linux_file *filp;
512 	struct file *file;
513 	ssize_t bytes;
514 	int error;
515 
516 	file = curthread->td_fpop;
517 	ldev = dev->si_drv1;
518 	if (ldev == NULL)
519 		return (0);
520 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
521 		return (error);
522 	filp->f_flags = file->f_flag;
523 	if (uio->uio_iovcnt != 1)
524 		panic("linux_dev_write: uio %p iovcnt %d",
525 		    uio, uio->uio_iovcnt);
526 	if (filp->f_op->write) {
527 		bytes = filp->f_op->write(filp, uio->uio_iov->iov_base,
528 		    uio->uio_iov->iov_len, &uio->uio_offset);
529 		if (bytes >= 0) {
530 			uio->uio_iov->iov_base =
531 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
532 			uio->uio_iov->iov_len -= bytes;
533 			uio->uio_resid -= bytes;
534 		} else
535 			error = -bytes;
536 	} else
537 		error = ENXIO;
538 
539 	return (error);
540 }
541 
542 static int
543 linux_dev_poll(struct cdev *dev, int events, struct thread *td)
544 {
545 	struct linux_cdev *ldev;
546 	struct linux_file *filp;
547 	struct file *file;
548 	int revents;
549 	int error;
550 
551 	file = curthread->td_fpop;
552 	ldev = dev->si_drv1;
553 	if (ldev == NULL)
554 		return (0);
555 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
556 		return (error);
557 	filp->f_flags = file->f_flag;
558 	if (filp->f_op->poll)
559 		revents = filp->f_op->poll(filp, NULL) & events;
560 	else
561 		revents = 0;
562 
563 	return (revents);
564 }
565 
566 static int
567 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
568     vm_size_t size, struct vm_object **object, int nprot)
569 {
570 	struct linux_cdev *ldev;
571 	struct linux_file *filp;
572 	struct file *file;
573 	struct vm_area_struct vma;
574 	int error;
575 
576 	file = curthread->td_fpop;
577 	ldev = dev->si_drv1;
578 	if (ldev == NULL)
579 		return (ENODEV);
580 	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
581 		return (error);
582 	filp->f_flags = file->f_flag;
583 	vma.vm_start = 0;
584 	vma.vm_end = size;
585 	vma.vm_pgoff = *offset / PAGE_SIZE;
586 	vma.vm_pfn = 0;
587 	vma.vm_page_prot = 0;
588 	if (filp->f_op->mmap) {
589 		error = -filp->f_op->mmap(filp, &vma);
590 		if (error == 0) {
591 			struct sglist *sg;
592 
593 			sg = sglist_alloc(1, M_WAITOK);
594 			sglist_append_phys(sg,
595 			    (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len);
596 			*object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len,
597 			    nprot, 0, curthread->td_ucred);
598 		        if (*object == NULL) {
599 				sglist_free(sg);
600 				return (EINVAL);
601 			}
602 			*offset = 0;
603 			if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) {
604 				VM_OBJECT_WLOCK(*object);
605 				vm_object_set_memattr(*object,
606 				    vma.vm_page_prot);
607 				VM_OBJECT_WUNLOCK(*object);
608 			}
609 		}
610 	} else
611 		error = ENODEV;
612 
613 	return (error);
614 }
615 
616 struct cdevsw linuxcdevsw = {
617 	.d_version = D_VERSION,
618 	.d_flags = D_TRACKCLOSE,
619 	.d_open = linux_dev_open,
620 	.d_close = linux_dev_close,
621 	.d_read = linux_dev_read,
622 	.d_write = linux_dev_write,
623 	.d_ioctl = linux_dev_ioctl,
624 	.d_mmap_single = linux_dev_mmap_single,
625 	.d_poll = linux_dev_poll,
626 };
627 
628 static int
629 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
630     int flags, struct thread *td)
631 {
632 	struct linux_file *filp;
633 	ssize_t bytes;
634 	int error;
635 
636 	error = 0;
637 	filp = (struct linux_file *)file->f_data;
638 	filp->f_flags = file->f_flag;
639 	if (uio->uio_iovcnt != 1)
640 		panic("linux_file_read: uio %p iovcnt %d",
641 		    uio, uio->uio_iovcnt);
642 	if (filp->f_op->read) {
643 		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
644 		    uio->uio_iov->iov_len, &uio->uio_offset);
645 		if (bytes >= 0) {
646 			uio->uio_iov->iov_base =
647 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
648 			uio->uio_iov->iov_len -= bytes;
649 			uio->uio_resid -= bytes;
650 		} else
651 			error = -bytes;
652 	} else
653 		error = ENXIO;
654 
655 	return (error);
656 }
657 
658 static int
659 linux_file_poll(struct file *file, int events, struct ucred *active_cred,
660     struct thread *td)
661 {
662 	struct linux_file *filp;
663 	int revents;
664 
665 	filp = (struct linux_file *)file->f_data;
666 	filp->f_flags = file->f_flag;
667 	if (filp->f_op->poll)
668 		revents = filp->f_op->poll(filp, NULL) & events;
669 	else
670 		revents = 0;
671 
672 	return (0);
673 }
674 
675 static int
676 linux_file_close(struct file *file, struct thread *td)
677 {
678 	struct linux_file *filp;
679 	int error;
680 
681 	filp = (struct linux_file *)file->f_data;
682 	filp->f_flags = file->f_flag;
683 	error = -filp->f_op->release(NULL, filp);
684 	funsetown(&filp->f_sigio);
685 	kfree(filp);
686 
687 	return (error);
688 }
689 
690 static int
691 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred,
692     struct thread *td)
693 {
694 	struct linux_file *filp;
695 	int error;
696 
697 	filp = (struct linux_file *)fp->f_data;
698 	filp->f_flags = fp->f_flag;
699 	error = 0;
700 
701 	switch (cmd) {
702 	case FIONBIO:
703 		break;
704 	case FIOASYNC:
705 		if (filp->f_op->fasync == NULL)
706 			break;
707 		error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC);
708 		break;
709 	case FIOSETOWN:
710 		error = fsetown(*(int *)data, &filp->f_sigio);
711 		if (error == 0)
712 			error = filp->f_op->fasync(0, filp,
713 			    fp->f_flag & FASYNC);
714 		break;
715 	case FIOGETOWN:
716 		*(int *)data = fgetown(&filp->f_sigio);
717 		break;
718 	default:
719 		error = ENOTTY;
720 		break;
721 	}
722 	return (error);
723 }
724 
725 static int
726 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
727     struct thread *td)
728 {
729 
730 	return (EOPNOTSUPP);
731 }
732 
733 static int
734 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif,
735     struct filedesc *fdp)
736 {
737 
738 	return (0);
739 }
740 
741 struct fileops linuxfileops = {
742 	.fo_read = linux_file_read,
743 	.fo_write = invfo_rdwr,
744 	.fo_truncate = invfo_truncate,
745 	.fo_kqfilter = invfo_kqfilter,
746 	.fo_stat = linux_file_stat,
747 	.fo_fill_kinfo = linux_file_fill_kinfo,
748 	.fo_poll = linux_file_poll,
749 	.fo_close = linux_file_close,
750 	.fo_ioctl = linux_file_ioctl,
751 	.fo_chmod = invfo_chmod,
752 	.fo_chown = invfo_chown,
753 	.fo_sendfile = invfo_sendfile,
754 };
755 
756 /*
757  * Hash of vmmap addresses.  This is infrequently accessed and does not
758  * need to be particularly large.  This is done because we must store the
759  * caller's idea of the map size to properly unmap.
760  */
761 struct vmmap {
762 	LIST_ENTRY(vmmap)	vm_next;
763 	void 			*vm_addr;
764 	unsigned long		vm_size;
765 };
766 
767 struct vmmaphd {
768 	struct vmmap *lh_first;
769 };
770 #define	VMMAP_HASH_SIZE	64
771 #define	VMMAP_HASH_MASK	(VMMAP_HASH_SIZE - 1)
772 #define	VM_HASH(addr)	((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
773 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE];
774 static struct mtx vmmaplock;
775 
776 static void
777 vmmap_add(void *addr, unsigned long size)
778 {
779 	struct vmmap *vmmap;
780 
781 	vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL);
782 	mtx_lock(&vmmaplock);
783 	vmmap->vm_size = size;
784 	vmmap->vm_addr = addr;
785 	LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next);
786 	mtx_unlock(&vmmaplock);
787 }
788 
789 static struct vmmap *
790 vmmap_remove(void *addr)
791 {
792 	struct vmmap *vmmap;
793 
794 	mtx_lock(&vmmaplock);
795 	LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next)
796 		if (vmmap->vm_addr == addr)
797 			break;
798 	if (vmmap)
799 		LIST_REMOVE(vmmap, vm_next);
800 	mtx_unlock(&vmmaplock);
801 
802 	return (vmmap);
803 }
804 
805 #if defined(__i386__) || defined(__amd64__)
806 void *
807 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr)
808 {
809 	void *addr;
810 
811 	addr = pmap_mapdev_attr(phys_addr, size, attr);
812 	if (addr == NULL)
813 		return (NULL);
814 	vmmap_add(addr, size);
815 
816 	return (addr);
817 }
818 #endif
819 
820 void
821 iounmap(void *addr)
822 {
823 	struct vmmap *vmmap;
824 
825 	vmmap = vmmap_remove(addr);
826 	if (vmmap == NULL)
827 		return;
828 #if defined(__i386__) || defined(__amd64__)
829 	pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size);
830 #endif
831 	kfree(vmmap);
832 }
833 
834 
835 void *
836 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot)
837 {
838 	vm_offset_t off;
839 	size_t size;
840 
841 	size = count * PAGE_SIZE;
842 	off = kva_alloc(size);
843 	if (off == 0)
844 		return (NULL);
845 	vmmap_add((void *)off, size);
846 	pmap_qenter(off, pages, count);
847 
848 	return ((void *)off);
849 }
850 
851 void
852 vunmap(void *addr)
853 {
854 	struct vmmap *vmmap;
855 
856 	vmmap = vmmap_remove(addr);
857 	if (vmmap == NULL)
858 		return;
859 	pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE);
860 	kva_free((vm_offset_t)addr, vmmap->vm_size);
861 	kfree(vmmap);
862 }
863 
864 char *
865 kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
866 {
867 	unsigned int len;
868 	char *p;
869 	va_list aq;
870 
871 	va_copy(aq, ap);
872 	len = vsnprintf(NULL, 0, fmt, aq);
873 	va_end(aq);
874 
875 	p = kmalloc(len + 1, gfp);
876 	if (p != NULL)
877 		vsnprintf(p, len + 1, fmt, ap);
878 
879 	return (p);
880 }
881 
882 char *
883 kasprintf(gfp_t gfp, const char *fmt, ...)
884 {
885 	va_list ap;
886 	char *p;
887 
888 	va_start(ap, fmt);
889 	p = kvasprintf(gfp, fmt, ap);
890 	va_end(ap);
891 
892 	return (p);
893 }
894 
895 static int
896 linux_timer_jiffies_until(unsigned long expires)
897 {
898 	int delta = expires - jiffies;
899 	/* guard against already expired values */
900 	if (delta < 1)
901 		delta = 1;
902 	return (delta);
903 }
904 
905 static void
906 linux_timer_callback_wrapper(void *context)
907 {
908 	struct timer_list *timer;
909 
910 	timer = context;
911 	timer->function(timer->data);
912 }
913 
914 void
915 mod_timer(struct timer_list *timer, unsigned long expires)
916 {
917 
918 	timer->expires = expires;
919 	callout_reset(&timer->timer_callout,
920 	    linux_timer_jiffies_until(expires),
921 	    &linux_timer_callback_wrapper, timer);
922 }
923 
924 void
925 add_timer(struct timer_list *timer)
926 {
927 
928 	callout_reset(&timer->timer_callout,
929 	    linux_timer_jiffies_until(timer->expires),
930 	    &linux_timer_callback_wrapper, timer);
931 }
932 
933 static void
934 linux_timer_init(void *arg)
935 {
936 
937 	/*
938 	 * Compute an internal HZ value which can divide 2**32 to
939 	 * avoid timer rounding problems when the tick value wraps
940 	 * around 2**32:
941 	 */
942 	linux_timer_hz_mask = 1;
943 	while (linux_timer_hz_mask < (unsigned long)hz)
944 		linux_timer_hz_mask *= 2;
945 	linux_timer_hz_mask--;
946 }
947 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
948 
949 void
950 linux_complete_common(struct completion *c, int all)
951 {
952 	int wakeup_swapper;
953 
954 	sleepq_lock(c);
955 	c->done++;
956 	if (all)
957 		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
958 	else
959 		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
960 	sleepq_release(c);
961 	if (wakeup_swapper)
962 		kick_proc0();
963 }
964 
965 /*
966  * Indefinite wait for done != 0 with or without signals.
967  */
968 long
969 linux_wait_for_common(struct completion *c, int flags)
970 {
971 
972 	if (flags != 0)
973 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
974 	else
975 		flags = SLEEPQ_SLEEP;
976 	for (;;) {
977 		sleepq_lock(c);
978 		if (c->done)
979 			break;
980 		sleepq_add(c, NULL, "completion", flags, 0);
981 		if (flags & SLEEPQ_INTERRUPTIBLE) {
982 			if (sleepq_wait_sig(c, 0) != 0)
983 				return (-ERESTARTSYS);
984 		} else
985 			sleepq_wait(c, 0);
986 	}
987 	c->done--;
988 	sleepq_release(c);
989 
990 	return (0);
991 }
992 
993 /*
994  * Time limited wait for done != 0 with or without signals.
995  */
996 long
997 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags)
998 {
999 	long end = jiffies + timeout;
1000 
1001 	if (flags != 0)
1002 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
1003 	else
1004 		flags = SLEEPQ_SLEEP;
1005 	for (;;) {
1006 		int ret;
1007 
1008 		sleepq_lock(c);
1009 		if (c->done)
1010 			break;
1011 		sleepq_add(c, NULL, "completion", flags, 0);
1012 		sleepq_set_timeout(c, linux_timer_jiffies_until(end));
1013 		if (flags & SLEEPQ_INTERRUPTIBLE)
1014 			ret = sleepq_timedwait_sig(c, 0);
1015 		else
1016 			ret = sleepq_timedwait(c, 0);
1017 		if (ret != 0) {
1018 			/* check for timeout or signal */
1019 			if (ret == EWOULDBLOCK)
1020 				return (0);
1021 			else
1022 				return (-ERESTARTSYS);
1023 		}
1024 	}
1025 	c->done--;
1026 	sleepq_release(c);
1027 
1028 	/* return how many jiffies are left */
1029 	return (linux_timer_jiffies_until(end));
1030 }
1031 
1032 int
1033 linux_try_wait_for_completion(struct completion *c)
1034 {
1035 	int isdone;
1036 
1037 	isdone = 1;
1038 	sleepq_lock(c);
1039 	if (c->done)
1040 		c->done--;
1041 	else
1042 		isdone = 0;
1043 	sleepq_release(c);
1044 	return (isdone);
1045 }
1046 
1047 int
1048 linux_completion_done(struct completion *c)
1049 {
1050 	int isdone;
1051 
1052 	isdone = 1;
1053 	sleepq_lock(c);
1054 	if (c->done == 0)
1055 		isdone = 0;
1056 	sleepq_release(c);
1057 	return (isdone);
1058 }
1059 
1060 void
1061 linux_delayed_work_fn(void *arg)
1062 {
1063 	struct delayed_work *work;
1064 
1065 	work = arg;
1066 	taskqueue_enqueue(work->work.taskqueue, &work->work.work_task);
1067 }
1068 
1069 void
1070 linux_work_fn(void *context, int pending)
1071 {
1072 	struct work_struct *work;
1073 
1074 	work = context;
1075 	work->fn(work);
1076 }
1077 
1078 void
1079 linux_flush_fn(void *context, int pending)
1080 {
1081 }
1082 
1083 struct workqueue_struct *
1084 linux_create_workqueue_common(const char *name, int cpus)
1085 {
1086 	struct workqueue_struct *wq;
1087 
1088 	wq = kmalloc(sizeof(*wq), M_WAITOK);
1089 	wq->taskqueue = taskqueue_create(name, M_WAITOK,
1090 	    taskqueue_thread_enqueue,  &wq->taskqueue);
1091 	atomic_set(&wq->draining, 0);
1092 	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
1093 
1094 	return (wq);
1095 }
1096 
1097 void
1098 destroy_workqueue(struct workqueue_struct *wq)
1099 {
1100 	taskqueue_free(wq->taskqueue);
1101 	kfree(wq);
1102 }
1103 
1104 static void
1105 linux_cdev_release(struct kobject *kobj)
1106 {
1107 	struct linux_cdev *cdev;
1108 	struct kobject *parent;
1109 
1110 	cdev = container_of(kobj, struct linux_cdev, kobj);
1111 	parent = kobj->parent;
1112 	if (cdev->cdev)
1113 		destroy_dev(cdev->cdev);
1114 	kfree(cdev);
1115 	kobject_put(parent);
1116 }
1117 
1118 static void
1119 linux_cdev_static_release(struct kobject *kobj)
1120 {
1121 	struct linux_cdev *cdev;
1122 	struct kobject *parent;
1123 
1124 	cdev = container_of(kobj, struct linux_cdev, kobj);
1125 	parent = kobj->parent;
1126 	if (cdev->cdev)
1127 		destroy_dev(cdev->cdev);
1128 	kobject_put(parent);
1129 }
1130 
1131 const struct kobj_type linux_cdev_ktype = {
1132 	.release = linux_cdev_release,
1133 };
1134 
1135 const struct kobj_type linux_cdev_static_ktype = {
1136 	.release = linux_cdev_static_release,
1137 };
1138 
1139 static void
1140 linux_compat_init(void *arg)
1141 {
1142 	struct sysctl_oid *rootoid;
1143 	int i;
1144 
1145 	rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
1146 	    OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
1147 	kobject_init(&linux_class_root, &linux_class_ktype);
1148 	kobject_set_name(&linux_class_root, "class");
1149 	linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid),
1150 	    OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class");
1151 	kobject_init(&linux_root_device.kobj, &linux_dev_ktype);
1152 	kobject_set_name(&linux_root_device.kobj, "device");
1153 	linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL,
1154 	    SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL,
1155 	    "device");
1156 	linux_root_device.bsddev = root_bus;
1157 	linux_class_misc.name = "misc";
1158 	class_register(&linux_class_misc);
1159 	INIT_LIST_HEAD(&pci_drivers);
1160 	INIT_LIST_HEAD(&pci_devices);
1161 	spin_lock_init(&pci_lock);
1162 	mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF);
1163 	for (i = 0; i < VMMAP_HASH_SIZE; i++)
1164 		LIST_INIT(&vmmaphead[i]);
1165 }
1166 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
1167 
1168 static void
1169 linux_compat_uninit(void *arg)
1170 {
1171 	linux_kobject_kfree_name(&linux_class_root);
1172 	linux_kobject_kfree_name(&linux_root_device.kobj);
1173 	linux_kobject_kfree_name(&linux_class_misc.kobj);
1174 }
1175 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
1176 
1177 /*
1178  * NOTE: Linux frequently uses "unsigned long" for pointer to integer
1179  * conversion and vice versa, where in FreeBSD "uintptr_t" would be
1180  * used. Assert these types have the same size, else some parts of the
1181  * LinuxKPI may not work like expected:
1182  */
1183 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t));
1184