xref: /titanic_51/usr/src/uts/common/os/driver.c (revision 99ebb4ca412cb0a19d77a3899a87c055b9c30fa8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/conf.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/buf.h>
36 #include <sys/cred.h>
37 #include <sys/user.h>
38 #include <sys/stat.h>
39 #include <sys/uio.h>
40 #include <sys/vnode.h>
41 #include <sys/fs/snode.h>
42 #include <sys/open.h>
43 #include <sys/kmem.h>
44 #include <sys/file.h>
45 #include <sys/debug.h>
46 #include <sys/tnf_probe.h>
47 
48 /* Don't #include <sys/ddi.h> - it #undef's getmajor() */
49 
50 #include <sys/sunddi.h>
51 #include <sys/sunndi.h>
52 #include <sys/sunpm.h>
53 #include <sys/ddi_impldefs.h>
54 #include <sys/ndi_impldefs.h>
55 #include <sys/esunddi.h>
56 #include <sys/autoconf.h>
57 #include <sys/modctl.h>
58 #include <sys/epm.h>
59 #include <sys/dacf.h>
60 #include <sys/sunmdi.h>
61 #include <sys/instance.h>
62 #include <sys/sdt.h>
63 
64 static void i_attach_ctlop(dev_info_t *, ddi_attach_cmd_t, ddi_pre_post_t, int);
65 static void i_detach_ctlop(dev_info_t *, ddi_detach_cmd_t, ddi_pre_post_t, int);
66 
67 /* decide what to do when a double dev_lclose is detected */
68 #ifdef	DEBUG
69 int		dev_lclose_ce = CE_PANIC;
70 #else	/* DEBUG */
71 int		dev_lclose_ce = CE_WARN;
72 #endif	/* DEBUG */
73 
74 /*
75  * Configuration-related entry points for nexus and leaf drivers
76  */
77 int
78 devi_identify(dev_info_t *devi)
79 {
80 	struct dev_ops *ops;
81 	int (*fn)(dev_info_t *);
82 
83 	if ((ops = ddi_get_driver(devi)) == NULL ||
84 	    (fn = ops->devo_identify) == NULL)
85 		return (-1);
86 
87 	return ((*fn)(devi));
88 }
89 
90 int
91 devi_probe(dev_info_t *devi)
92 {
93 	int rv, probe_failed;
94 	pm_ppm_cookie_t ppm_cookie;
95 	struct dev_ops *ops;
96 	int (*fn)(dev_info_t *);
97 
98 	ops = ddi_get_driver(devi);
99 	ASSERT(ops);
100 
101 	pm_pre_probe(devi, &ppm_cookie);
102 
103 	/*
104 	 * probe(9E) in 2.0 implies that you can get
105 	 * away with not writing one of these .. so we
106 	 * pretend we're 'nulldev' if we don't find one (sigh).
107 	 */
108 	if ((fn = ops->devo_probe) == NULL)
109 		rv = DDI_PROBE_DONTCARE;
110 	else
111 		rv = (*fn)(devi);
112 
113 	switch (rv) {
114 	case DDI_PROBE_DONTCARE:
115 	case DDI_PROBE_SUCCESS:
116 		probe_failed = 0;
117 		break;
118 	default:
119 		probe_failed = 1;
120 		break;
121 	}
122 	pm_post_probe(&ppm_cookie, rv, probe_failed);
123 
124 	return (rv);
125 }
126 
127 
128 /*
129  * devi_attach()
130  * 	attach a device instance to the system if the driver supplies an
131  * 	attach(9E) entrypoint.
132  */
133 int
134 devi_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
135 {
136 	struct dev_ops *ops;
137 	int error;
138 	int (*fn)(dev_info_t *, ddi_attach_cmd_t);
139 	pm_ppm_cookie_t pc;
140 
141 	if ((error = mdi_pre_attach(devi, cmd)) != DDI_SUCCESS) {
142 		return (error);
143 	}
144 
145 	pm_pre_attach(devi, &pc, cmd);
146 
147 	if ((cmd == DDI_RESUME || cmd == DDI_PM_RESUME) &&
148 	    e_ddi_parental_suspend_resume(devi)) {
149 		error = e_ddi_resume(devi, cmd);
150 		goto done;
151 	}
152 	ops = ddi_get_driver(devi);
153 	ASSERT(ops);
154 	if ((fn = ops->devo_attach) == NULL) {
155 		error = DDI_FAILURE;
156 		goto done;
157 	}
158 
159 	/*
160 	 * Call the driver's attach(9e) entrypoint
161 	 */
162 	i_attach_ctlop(devi, cmd, DDI_PRE, 0);
163 	error = (*fn)(devi, cmd);
164 	i_attach_ctlop(devi, cmd, DDI_POST, error);
165 
166 done:
167 	pm_post_attach(&pc, error);
168 	mdi_post_attach(devi, cmd, error);
169 
170 	return (error);
171 }
172 
173 /*
174  * devi_detach()
175  * 	detach a device instance from the system if the driver supplies a
176  * 	detach(9E) entrypoint.
177  */
178 int
179 devi_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
180 {
181 	struct dev_ops *ops;
182 	int error;
183 	int (*fn)(dev_info_t *, ddi_detach_cmd_t);
184 	pm_ppm_cookie_t pc;
185 
186 	ASSERT(cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND ||
187 	    cmd == DDI_DETACH);
188 
189 	if ((cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND) &&
190 	    e_ddi_parental_suspend_resume(devi)) {
191 		return (e_ddi_suspend(devi, cmd));
192 	}
193 	ops = ddi_get_driver(devi);
194 	ASSERT(ops);
195 	if ((fn = ops->devo_detach) == NULL)
196 		return (DDI_FAILURE);
197 
198 	if ((error = mdi_pre_detach(devi, cmd)) != DDI_SUCCESS) {
199 		return (error);
200 	}
201 	i_detach_ctlop(devi, cmd, DDI_PRE, 0);
202 	pm_pre_detach(devi, cmd, &pc);
203 
204 	/*
205 	 * Call the driver's detach routine
206 	 */
207 	error = (*fn)(devi, cmd);
208 
209 	pm_post_detach(&pc, error);
210 	i_detach_ctlop(devi, cmd, DDI_POST, error);
211 	mdi_post_detach(devi, cmd, error);
212 
213 	return (error);
214 }
215 
216 static void
217 i_attach_ctlop(dev_info_t *devi, ddi_attach_cmd_t cmd, ddi_pre_post_t w,
218     int ret)
219 {
220 	int error;
221 	struct attachspec as;
222 	dev_info_t *pdip = ddi_get_parent(devi);
223 
224 	as.cmd = cmd;
225 	as.when = w;
226 	as.pdip = pdip;
227 	as.result = ret;
228 	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_ATTACH, &as, &error);
229 }
230 
231 static void
232 i_detach_ctlop(dev_info_t *devi, ddi_detach_cmd_t cmd, ddi_pre_post_t w,
233     int ret)
234 {
235 	int error;
236 	struct detachspec ds;
237 	dev_info_t *pdip = ddi_get_parent(devi);
238 
239 	ds.cmd = cmd;
240 	ds.when = w;
241 	ds.pdip = pdip;
242 	ds.result = ret;
243 	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_DETACH, &ds, &error);
244 }
245 
246 /*
247  * This entry point not defined by Solaris 2.0 DDI/DKI, so
248  * its inclusion here is somewhat moot.
249  */
250 int
251 devi_reset(dev_info_t *devi, ddi_reset_cmd_t cmd)
252 {
253 	struct dev_ops *ops;
254 	int (*fn)(dev_info_t *, ddi_reset_cmd_t);
255 
256 	if ((ops = ddi_get_driver(devi)) == NULL ||
257 	    (fn = ops->devo_reset) == NULL)
258 		return (DDI_FAILURE);
259 
260 	return ((*fn)(devi, cmd));
261 }
262 
263 /*
264  * Leaf driver entry points. The following [cb]dev_* functions are *not* part
265  * of the DDI, please use functions defined in <sys/sunldi.h> and driver_lyr.c.
266  */
267 int
268 dev_open(dev_t *devp, int flag, int type, struct cred *cred)
269 {
270 	struct cb_ops   *cb;
271 
272 	cb = devopsp[getmajor(*devp)]->devo_cb_ops;
273 	return ((*cb->cb_open)(devp, flag, type, cred));
274 }
275 
276 int
277 dev_close(dev_t dev, int flag, int type, struct cred *cred)
278 {
279 	struct cb_ops   *cb;
280 
281 	cb = (devopsp[getmajor(dev)])->devo_cb_ops;
282 	return ((*cb->cb_close)(dev, flag, type, cred));
283 }
284 
285 /*
286  * New Leaf driver open entry point.  We make a vnode and go through specfs
287  * in order to obtain open close exclusions guarantees.  Note that we drop
288  * OTYP_LYR if it was specified - we are going through specfs and it provides
289  * last close semantics (FKLYR is provided to open(9E)).  Also, since
290  * spec_open will drive attach via e_ddi_hold_devi_by_dev for a makespecvp
291  * vnode with no SDIP_SET on the common snode, the dev_lopen caller no longer
292  * needs to call ddi_hold_installed_driver.
293  */
294 int
295 dev_lopen(dev_t *devp, int flag, int otype, struct cred *cred)
296 {
297 	struct vnode	*vp;
298 	int		error;
299 	struct vnode	*cvp;
300 
301 	vp = makespecvp(*devp, (otype == OTYP_BLK) ? VBLK : VCHR);
302 	error = VOP_OPEN(&vp, flag | FKLYR, cred);
303 	if (error == 0) {
304 		/* Pick up the (possibly) new dev_t value. */
305 		*devp = vp->v_rdev;
306 
307 		/*
308 		 * Place extra hold on the common vnode, which contains the
309 		 * open count, so that it is not destroyed by the VN_RELE of
310 		 * the shadow makespecvp vnode below.
311 		 */
312 		cvp = STOV(VTOCS(vp));
313 		VN_HOLD(cvp);
314 	}
315 
316 	/* release the shadow makespecvp vnode. */
317 	VN_RELE(vp);
318 	return (error);
319 }
320 
321 /*
322  * Leaf driver close entry point.  We make a vnode and go through specfs in
323  * order to obtain open close exclusions guarantees.  Note that we drop
324  * OTYP_LYR if it was specified - we are going through specfs and it provides
325  * last close semantics (FLKYR is provided to close(9E)).
326  */
327 int
328 dev_lclose(dev_t dev, int flag, int otype, struct cred *cred)
329 {
330 	struct vnode	*vp;
331 	int		error;
332 	struct vnode	*cvp;
333 	char		*funcname;
334 	ulong_t		offset;
335 
336 	vp = makespecvp(dev, (otype == OTYP_BLK) ? VBLK : VCHR);
337 	error = VOP_CLOSE(vp, flag | FKLYR, 1, (offset_t)0, cred);
338 
339 	/*
340 	 * Release the extra dev_lopen hold on the common vnode. We inline a
341 	 * VN_RELE(cvp) call so that we can detect more dev_lclose calls than
342 	 * dev_lopen calls without panic. See vn_rele.  If our inline of
343 	 * vn_rele called VOP_INACTIVE(cvp, CRED()) we would panic on the
344 	 * "release the makespecvp vnode" VN_RELE(vp) that follows  - so
345 	 * instead we diagnose this situation.  Note that the driver has
346 	 * still seen a double close(9E), but that would have occurred with
347 	 * the old dev_close implementation too.
348 	 */
349 	cvp = STOV(VTOCS(vp));
350 	mutex_enter(&cvp->v_lock);
351 	switch (cvp->v_count) {
352 	default:
353 		cvp->v_count--;
354 		break;
355 
356 	case 0:
357 		VTOS(vp)->s_commonvp = NULL;	/* avoid panic */
358 		/*FALLTHROUGH*/
359 	case 1:
360 		/*
361 		 * The following message indicates a serious problem in the
362 		 * identified driver, the driver should be fixed. If obtaining
363 		 * a panic dump is needed to diagnose the driver problem then
364 		 * adding "set dev_lclose_ce=3" to /etc/system will cause a
365 		 * panic when this occurs.
366 		 */
367 		funcname = modgetsymname((uintptr_t)caller(), &offset);
368 		cmn_err(dev_lclose_ce, "dev_lclose: extra close of dev_t 0x%lx "
369 		    "from %s`%s()", dev, mod_containing_pc(caller()),
370 		    funcname ? funcname : "unknown...");
371 		break;
372 	}
373 	mutex_exit(&cvp->v_lock);
374 
375 	/* release the makespecvp vnode. */
376 	VN_RELE(vp);
377 	return (error);
378 }
379 
380 /*
381  * Returns -1 or the instance number of the given dev_t as
382  * interpreted by the device driver.  The code may load the driver
383  * but it does not attach any instances.
384  *
385  * Instance is supposed to be a int but drivers have assumed that
386  * the pointer was a pointer to "void *" instead of a pointer to
387  * "int *" so we now explicitly pass a pointer to "void *" and then
388  * cast the result to an int when returning the value.
389  */
390 int
391 dev_to_instance(dev_t dev)
392 {
393 	major_t		major = getmajor(dev);
394 	struct dev_ops	*ops;
395 	void		*vinstance;
396 	int		error;
397 
398 	/* verify that the major number is reasonable and driver is loaded */
399 	if ((major >= devcnt) ||
400 	    ((ops = mod_hold_dev_by_major(major)) == NULL))
401 		return (-1);
402 	ASSERT(CB_DRV_INSTALLED(ops));
403 
404 	/* verify that it supports the getinfo(9E) entry point */
405 	if (ops->devo_getinfo == NULL) {
406 		mod_rele_dev_by_major(major);
407 		return (-1);
408 	}
409 
410 	/* ask the driver to extract the instance number from the devt */
411 	error = (*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2INSTANCE,
412 	    (void *)dev, &vinstance);
413 
414 	/* release the driver */
415 	mod_rele_dev_by_major(major);
416 
417 	if (error != DDI_SUCCESS)
418 		return (-1);
419 
420 	return ((int)(uintptr_t)vinstance);
421 }
422 
423 static void
424 bdev_strategy_tnf_probe(struct buf *bp)
425 {
426 	/* Kernel probe */
427 	TNF_PROBE_5(strategy, "io blockio", /* CSTYLED */,
428 		tnf_device,	device,		bp->b_edev,
429 		tnf_diskaddr,	block,		bp->b_lblkno,
430 		tnf_size,	size,		bp->b_bcount,
431 		tnf_opaque,	buf,		bp,
432 		tnf_bioflags,	flags,		bp->b_flags);
433 }
434 
435 int
436 bdev_strategy(struct buf *bp)
437 {
438 	struct dev_ops *ops;
439 
440 	ops = devopsp[getmajor(bp->b_edev)];
441 
442 	/*
443 	 * Before we hit the io:::start probe, we need to fill in the b_dip
444 	 * field of the buf structure.  This should be -- for the most part --
445 	 * incredibly cheap.  If you're in this code looking to bum cycles,
446 	 * there is almost certainly bigger game further down the I/O path...
447 	 */
448 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
449 	    (void *)bp->b_edev, (void **)&bp->b_dip);
450 
451 	DTRACE_IO1(start, struct buf *, bp);
452 	bp->b_flags |= B_STARTED;
453 
454 	/*
455 	 * Call the TNF probe here instead of the inline code
456 	 * to force our compiler to use the tail call optimization.
457 	 */
458 	bdev_strategy_tnf_probe(bp);
459 
460 	return (ops->devo_cb_ops->cb_strategy(bp));
461 }
462 
463 int
464 bdev_print(dev_t dev, caddr_t str)
465 {
466 	struct cb_ops	*cb;
467 
468 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
469 	return ((*cb->cb_print)(dev, str));
470 }
471 
472 int
473 bdev_size(dev_t dev)
474 {
475 	return (e_ddi_getprop(dev, VBLK, "nblocks",
476 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1));
477 }
478 
479 /*
480  * Same for 64-bit Nblocks property
481  */
482 uint64_t
483 bdev_Size(dev_t dev)
484 {
485 	return (e_ddi_getprop_int64(dev, VBLK, "Nblocks",
486 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1));
487 }
488 
489 int
490 bdev_dump(dev_t dev, caddr_t addr, daddr_t blkno, int blkcnt)
491 {
492 	struct cb_ops	*cb;
493 
494 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
495 	return ((*cb->cb_dump)(dev, addr, blkno, blkcnt));
496 }
497 
498 int
499 cdev_read(dev_t dev, struct uio *uiop, struct cred *cred)
500 {
501 	struct cb_ops	*cb;
502 
503 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
504 	return ((*cb->cb_read)(dev, uiop, cred));
505 }
506 
507 int
508 cdev_write(dev_t dev, struct uio *uiop, struct cred *cred)
509 {
510 	struct cb_ops	*cb;
511 
512 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
513 	return ((*cb->cb_write)(dev, uiop, cred));
514 }
515 
516 int
517 cdev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cred,
518     int *rvalp)
519 {
520 	struct cb_ops	*cb;
521 
522 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
523 	return ((*cb->cb_ioctl)(dev, cmd, arg, mode, cred, rvalp));
524 }
525 
526 int
527 cdev_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
528 	size_t *maplen, uint_t mode)
529 {
530 	struct cb_ops	*cb;
531 
532 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
533 	return ((*cb->cb_devmap)(dev, dhp, off, len, maplen, mode));
534 }
535 
536 int
537 cdev_mmap(int (*mapfunc)(dev_t, off_t, int), dev_t dev, off_t off, int prot)
538 {
539 	return ((*mapfunc)(dev, off, prot));
540 }
541 
542 int
543 cdev_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
544 	    uint_t prot, uint_t maxprot, uint_t flags, cred_t *credp)
545 {
546 	struct cb_ops	*cb;
547 
548 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
549 	return ((*cb->cb_segmap)(dev, off, as, addrp,
550 	    len, prot, maxprot, flags, credp));
551 }
552 
553 int
554 cdev_poll(dev_t dev, short events, int anyyet, short *reventsp,
555 	struct pollhead **pollhdrp)
556 {
557 	struct cb_ops	*cb;
558 
559 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
560 	return ((*cb->cb_chpoll)(dev, events, anyyet, reventsp, pollhdrp));
561 }
562 
563 /*
564  * A 'size' property can be provided by a VCHR device.
565  *
566  * Since it's defined as zero for STREAMS devices, so we avoid the
567  * overhead of looking it up.  Note also that we don't force an
568  * unused driver into memory simply to ask about it's size.  We also
569  * don't bother to ask it its size unless it's already been attached
570  * (the attach routine is the earliest place the property will be created)
571  *
572  * XXX	In an ideal world, we'd call this at VOP_GETATTR() time.
573  */
574 int
575 cdev_size(dev_t dev)
576 {
577 	major_t maj;
578 	struct devnames *dnp;
579 
580 	if ((maj = getmajor(dev)) >= devcnt)
581 		return (0);
582 
583 	dnp = &(devnamesp[maj]);
584 	LOCK_DEV_OPS(&dnp->dn_lock);
585 	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
586 	    !devopsp[maj]->devo_cb_ops->cb_str) {
587 		UNLOCK_DEV_OPS(&dnp->dn_lock);
588 		return (e_ddi_getprop(dev, VCHR, "size",
589 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
590 	}
591 	UNLOCK_DEV_OPS(&dnp->dn_lock);
592 	return (0);
593 }
594 
595 /*
596  * same for 64-bit Size property
597  */
598 uint64_t
599 cdev_Size(dev_t dev)
600 {
601 	major_t maj;
602 	struct devnames *dnp;
603 
604 	if ((maj = getmajor(dev)) >= devcnt)
605 		return (0);
606 
607 	dnp = &(devnamesp[maj]);
608 	LOCK_DEV_OPS(&dnp->dn_lock);
609 	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
610 	    !devopsp[maj]->devo_cb_ops->cb_str) {
611 		UNLOCK_DEV_OPS(&dnp->dn_lock);
612 		return (e_ddi_getprop_int64(dev, VCHR, "Size",
613 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
614 	}
615 	UNLOCK_DEV_OPS(&dnp->dn_lock);
616 	return (0);
617 }
618 
619 /*
620  * XXX	This routine is poorly named, because block devices can and do
621  *	have properties (see bdev_size() above).
622  *
623  * XXX	fix the comment in devops.h that claims that cb_prop_op
624  *	is character-only.
625  */
626 int
627 cdev_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
628     char *name, caddr_t valuep, int *lengthp)
629 {
630 	struct cb_ops	*cb;
631 
632 	if ((cb = devopsp[DEVI(dip)->devi_major]->devo_cb_ops) == NULL)
633 		return (DDI_PROP_NOT_FOUND);
634 
635 	return ((*cb->cb_prop_op)(dev, dip, prop_op, mod_flags,
636 	    name, valuep, lengthp));
637 }
638