xref: /freebsd/sys/geom/geom_dev.c (revision cddbc3b40812213ff00041f79174cac0be360a2a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2002 Poul-Henning Kamp
5  * Copyright (c) 2002 Networks Associates Technology, Inc.
6  * All rights reserved.
7  *
8  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
9  * and NAI Labs, the Security Research Division of Network Associates, Inc.
10  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11  * DARPA CHATS research program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. The names of the authors may not be used to endorse or promote
22  *    products derived from this software without specific prior written
23  *    permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/ctype.h>
47 #include <sys/bio.h>
48 #include <sys/bus.h>
49 #include <sys/lock.h>
50 #include <sys/mutex.h>
51 #include <sys/proc.h>
52 #include <sys/errno.h>
53 #include <sys/time.h>
54 #include <sys/disk.h>
55 #include <sys/fcntl.h>
56 #include <sys/limits.h>
57 #include <sys/sysctl.h>
58 #include <geom/geom.h>
59 #include <geom/geom_int.h>
60 #include <machine/stdarg.h>
61 
62 struct g_dev_softc {
63 	struct mtx	 sc_mtx;
64 	struct cdev	*sc_dev;
65 	struct cdev	*sc_alias;
66 	int		 sc_open;
67 	u_int		 sc_active;
68 #define	SC_A_DESTROY	(1 << 31)
69 #define	SC_A_OPEN	(1 << 30)
70 #define	SC_A_ACTIVE	(SC_A_OPEN - 1)
71 };
72 
73 static d_open_t		g_dev_open;
74 static d_close_t	g_dev_close;
75 static d_strategy_t	g_dev_strategy;
76 static d_ioctl_t	g_dev_ioctl;
77 
78 static struct cdevsw g_dev_cdevsw = {
79 	.d_version =	D_VERSION,
80 	.d_open =	g_dev_open,
81 	.d_close =	g_dev_close,
82 	.d_read =	physread,
83 	.d_write =	physwrite,
84 	.d_ioctl =	g_dev_ioctl,
85 	.d_strategy =	g_dev_strategy,
86 	.d_name =	"g_dev",
87 	.d_flags =	D_DISK | D_TRACKCLOSE,
88 };
89 
90 static g_init_t g_dev_init;
91 static g_fini_t g_dev_fini;
92 static g_taste_t g_dev_taste;
93 static g_orphan_t g_dev_orphan;
94 static g_attrchanged_t g_dev_attrchanged;
95 static g_resize_t g_dev_resize;
96 
97 static struct g_class g_dev_class	= {
98 	.name = "DEV",
99 	.version = G_VERSION,
100 	.init = g_dev_init,
101 	.fini = g_dev_fini,
102 	.taste = g_dev_taste,
103 	.orphan = g_dev_orphan,
104 	.attrchanged = g_dev_attrchanged,
105 	.resize = g_dev_resize
106 };
107 
108 /*
109  * We target 262144 (8 x 32768) sectors by default as this significantly
110  * increases the throughput on commonly used SSD's with a marginal
111  * increase in non-interruptible request latency.
112  */
113 static uint64_t g_dev_del_max_sectors = 262144;
114 SYSCTL_DECL(_kern_geom);
115 SYSCTL_NODE(_kern_geom, OID_AUTO, dev, CTLFLAG_RW, 0, "GEOM_DEV stuff");
116 SYSCTL_QUAD(_kern_geom_dev, OID_AUTO, delete_max_sectors, CTLFLAG_RW,
117     &g_dev_del_max_sectors, 0, "Maximum number of sectors in a single "
118     "delete request sent to the provider. Larger requests are chunked "
119     "so they can be interrupted. (0 = disable chunking)");
120 
121 static char *dumpdev = NULL;
122 static void
123 g_dev_init(struct g_class *mp)
124 {
125 
126 	dumpdev = kern_getenv("dumpdev");
127 }
128 
129 static void
130 g_dev_fini(struct g_class *mp)
131 {
132 
133 	freeenv(dumpdev);
134 	dumpdev = NULL;
135 }
136 
137 static int
138 g_dev_setdumpdev(struct cdev *dev, struct diocskerneldump_arg *kda,
139     struct thread *td)
140 {
141 	struct g_kerneldump kd;
142 	struct g_consumer *cp;
143 	int error, len;
144 
145 	if (dev == NULL || kda == NULL)
146 		return (clear_dumper(td));
147 
148 	cp = dev->si_drv2;
149 	len = sizeof(kd);
150 	memset(&kd, 0, len);
151 	kd.offset = 0;
152 	kd.length = OFF_MAX;
153 	error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd);
154 	if (error != 0)
155 		return (error);
156 
157 	error = set_dumper(&kd.di, devtoname(dev), td, kda->kda_compression,
158 	    kda->kda_encryption, kda->kda_key, kda->kda_encryptedkeysize,
159 	    kda->kda_encryptedkey);
160 	if (error == 0)
161 		dev->si_flags |= SI_DUMPDEV;
162 
163 	return (error);
164 }
165 
166 static int
167 init_dumpdev(struct cdev *dev)
168 {
169 	struct diocskerneldump_arg kda;
170 	struct g_consumer *cp;
171 	const char *devprefix = "/dev/", *devname;
172 	int error;
173 	size_t len;
174 
175 	bzero(&kda, sizeof(kda));
176 	kda.kda_enable = 1;
177 
178 	if (dumpdev == NULL)
179 		return (0);
180 
181 	len = strlen(devprefix);
182 	devname = devtoname(dev);
183 	if (strcmp(devname, dumpdev) != 0 &&
184 	   (strncmp(dumpdev, devprefix, len) != 0 ||
185 	    strcmp(devname, dumpdev + len) != 0))
186 		return (0);
187 
188 	cp = (struct g_consumer *)dev->si_drv2;
189 	error = g_access(cp, 1, 0, 0);
190 	if (error != 0)
191 		return (error);
192 
193 	error = g_dev_setdumpdev(dev, &kda, curthread);
194 	if (error == 0) {
195 		freeenv(dumpdev);
196 		dumpdev = NULL;
197 	}
198 
199 	(void)g_access(cp, -1, 0, 0);
200 
201 	return (error);
202 }
203 
204 static void
205 g_dev_destroy(void *arg, int flags __unused)
206 {
207 	struct g_consumer *cp;
208 	struct g_geom *gp;
209 	struct g_dev_softc *sc;
210 	char buf[SPECNAMELEN + 6];
211 
212 	g_topology_assert();
213 	cp = arg;
214 	gp = cp->geom;
215 	sc = cp->private;
216 	g_trace(G_T_TOPOLOGY, "g_dev_destroy(%p(%s))", cp, gp->name);
217 	snprintf(buf, sizeof(buf), "cdev=%s", gp->name);
218 	devctl_notify_f("GEOM", "DEV", "DESTROY", buf, M_WAITOK);
219 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
220 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
221 	g_detach(cp);
222 	g_destroy_consumer(cp);
223 	g_destroy_geom(gp);
224 	mtx_destroy(&sc->sc_mtx);
225 	g_free(sc);
226 }
227 
228 void
229 g_dev_print(void)
230 {
231 	struct g_geom *gp;
232 	char const *p = "";
233 
234 	LIST_FOREACH(gp, &g_dev_class.geom, geom) {
235 		printf("%s%s", p, gp->name);
236 		p = " ";
237 	}
238 	printf("\n");
239 }
240 
241 static void
242 g_dev_set_physpath(struct g_consumer *cp)
243 {
244 	struct g_dev_softc *sc;
245 	char *physpath;
246 	int error, physpath_len;
247 
248 	if (g_access(cp, 1, 0, 0) != 0)
249 		return;
250 
251 	sc = cp->private;
252 	physpath_len = MAXPATHLEN;
253 	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
254 	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
255 	g_access(cp, -1, 0, 0);
256 	if (error == 0 && strlen(physpath) != 0) {
257 		struct cdev *dev, *old_alias_dev;
258 		struct cdev **alias_devp;
259 
260 		dev = sc->sc_dev;
261 		old_alias_dev = sc->sc_alias;
262 		alias_devp = (struct cdev **)&sc->sc_alias;
263 		make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, dev,
264 		    old_alias_dev, physpath);
265 	} else if (sc->sc_alias) {
266 		destroy_dev((struct cdev *)sc->sc_alias);
267 		sc->sc_alias = NULL;
268 	}
269 	g_free(physpath);
270 }
271 
272 static void
273 g_dev_set_media(struct g_consumer *cp)
274 {
275 	struct g_dev_softc *sc;
276 	struct cdev *dev;
277 	char buf[SPECNAMELEN + 6];
278 
279 	sc = cp->private;
280 	dev = sc->sc_dev;
281 	snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name);
282 	devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK);
283 	devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK);
284 	dev = sc->sc_alias;
285 	if (dev != NULL) {
286 		snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name);
287 		devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK);
288 		devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK);
289 	}
290 }
291 
292 static void
293 g_dev_attrchanged(struct g_consumer *cp, const char *attr)
294 {
295 
296 	if (strcmp(attr, "GEOM::media") == 0) {
297 		g_dev_set_media(cp);
298 		return;
299 	}
300 
301 	if (strcmp(attr, "GEOM::physpath") == 0) {
302 		g_dev_set_physpath(cp);
303 		return;
304 	}
305 }
306 
307 static void
308 g_dev_resize(struct g_consumer *cp)
309 {
310 	char buf[SPECNAMELEN + 6];
311 
312 	snprintf(buf, sizeof(buf), "cdev=%s", cp->provider->name);
313 	devctl_notify_f("GEOM", "DEV", "SIZECHANGE", buf, M_WAITOK);
314 }
315 
316 struct g_provider *
317 g_dev_getprovider(struct cdev *dev)
318 {
319 	struct g_consumer *cp;
320 
321 	g_topology_assert();
322 	if (dev == NULL)
323 		return (NULL);
324 	if (dev->si_devsw != &g_dev_cdevsw)
325 		return (NULL);
326 	cp = dev->si_drv2;
327 	return (cp->provider);
328 }
329 
330 static struct g_geom *
331 g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
332 {
333 	struct g_geom *gp;
334 	struct g_geom_alias *gap;
335 	struct g_consumer *cp;
336 	struct g_dev_softc *sc;
337 	int error;
338 	struct cdev *dev, *adev;
339 	char buf[SPECNAMELEN + 6];
340 
341 	g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
342 	g_topology_assert();
343 	gp = g_new_geomf(mp, "%s", pp->name);
344 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
345 	mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF);
346 	cp = g_new_consumer(gp);
347 	cp->private = sc;
348 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
349 	error = g_attach(cp, pp);
350 	KASSERT(error == 0,
351 	    ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));
352 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev,
353 	    &g_dev_cdevsw, NULL, UID_ROOT, GID_OPERATOR, 0640, "%s", gp->name);
354 	if (error != 0) {
355 		printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n",
356 		    __func__, gp->name, error);
357 		g_detach(cp);
358 		g_destroy_consumer(cp);
359 		g_destroy_geom(gp);
360 		mtx_destroy(&sc->sc_mtx);
361 		g_free(sc);
362 		return (NULL);
363 	}
364 	dev->si_flags |= SI_UNMAPPED;
365 	sc->sc_dev = dev;
366 
367 	dev->si_iosize_max = MAXPHYS;
368 	dev->si_drv2 = cp;
369 	error = init_dumpdev(dev);
370 	if (error != 0)
371 		printf("%s: init_dumpdev() failed (gp->name=%s, error=%d)\n",
372 		    __func__, gp->name, error);
373 
374 	g_dev_attrchanged(cp, "GEOM::physpath");
375 	snprintf(buf, sizeof(buf), "cdev=%s", gp->name);
376 	devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK);
377 	/*
378 	 * Now add all the aliases for this drive
379 	 */
380 	LIST_FOREACH(gap, &pp->geom->aliases, ga_next) {
381 		error = make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &adev, dev,
382 		    "%s", gap->ga_alias);
383 		if (error) {
384 			printf("%s: make_dev_alias_p() failed (name=%s, error=%d)\n",
385 			    __func__, gap->ga_alias, error);
386 			continue;
387 		}
388 		snprintf(buf, sizeof(buf), "cdev=%s", gap->ga_alias);
389 		devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK);
390 	}
391 
392 	return (gp);
393 }
394 
395 static int
396 g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
397 {
398 	struct g_consumer *cp;
399 	struct g_dev_softc *sc;
400 	int error, r, w, e;
401 
402 	cp = dev->si_drv2;
403 	if (cp == NULL)
404 		return (ENXIO);		/* g_dev_taste() not done yet */
405 	g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)",
406 	    cp->geom->name, flags, fmt, td);
407 
408 	r = flags & FREAD ? 1 : 0;
409 	w = flags & FWRITE ? 1 : 0;
410 #ifdef notyet
411 	e = flags & O_EXCL ? 1 : 0;
412 #else
413 	e = 0;
414 #endif
415 
416 	/*
417 	 * This happens on attempt to open a device node with O_EXEC.
418 	 */
419 	if (r + w + e == 0)
420 		return (EINVAL);
421 
422 	if (w) {
423 		/*
424 		 * When running in very secure mode, do not allow
425 		 * opens for writing of any disks.
426 		 */
427 		error = securelevel_ge(td->td_ucred, 2);
428 		if (error)
429 			return (error);
430 	}
431 	g_topology_lock();
432 	error = g_access(cp, r, w, e);
433 	g_topology_unlock();
434 	if (error == 0) {
435 		sc = cp->private;
436 		mtx_lock(&sc->sc_mtx);
437 		if (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0)
438 			wakeup(&sc->sc_active);
439 		sc->sc_open += r + w + e;
440 		if (sc->sc_open == 0)
441 			atomic_clear_int(&sc->sc_active, SC_A_OPEN);
442 		else
443 			atomic_set_int(&sc->sc_active, SC_A_OPEN);
444 		mtx_unlock(&sc->sc_mtx);
445 	}
446 	return (error);
447 }
448 
449 static int
450 g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
451 {
452 	struct g_consumer *cp;
453 	struct g_dev_softc *sc;
454 	int error, r, w, e;
455 
456 	cp = dev->si_drv2;
457 	if (cp == NULL)
458 		return (ENXIO);
459 	g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)",
460 	    cp->geom->name, flags, fmt, td);
461 
462 	r = flags & FREAD ? -1 : 0;
463 	w = flags & FWRITE ? -1 : 0;
464 #ifdef notyet
465 	e = flags & O_EXCL ? -1 : 0;
466 #else
467 	e = 0;
468 #endif
469 
470 	/*
471 	 * The vgonel(9) - caused by eg. forced unmount of devfs - calls
472 	 * VOP_CLOSE(9) on devfs vnode without any FREAD or FWRITE flags,
473 	 * which would result in zero deltas, which in turn would cause
474 	 * panic in g_access(9).
475 	 *
476 	 * Note that we cannot zero the counters (ie. do "r = cp->acr"
477 	 * etc) instead, because the consumer might be opened in another
478 	 * devfs instance.
479 	 */
480 	if (r + w + e == 0)
481 		return (EINVAL);
482 
483 	sc = cp->private;
484 	mtx_lock(&sc->sc_mtx);
485 	sc->sc_open += r + w + e;
486 	if (sc->sc_open == 0)
487 		atomic_clear_int(&sc->sc_active, SC_A_OPEN);
488 	else
489 		atomic_set_int(&sc->sc_active, SC_A_OPEN);
490 	while (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0)
491 		msleep(&sc->sc_active, &sc->sc_mtx, 0, "g_dev_close", hz / 10);
492 	mtx_unlock(&sc->sc_mtx);
493 	g_topology_lock();
494 	error = g_access(cp, r, w, e);
495 	g_topology_unlock();
496 	return (error);
497 }
498 
499 /*
500  * XXX: Until we have unmessed the ioctl situation, there is a race against
501  * XXX: a concurrent orphanization.  We cannot close it by holding topology
502  * XXX: since that would prevent us from doing our job, and stalling events
503  * XXX: will break (actually: stall) the BSD disklabel hacks.
504  */
505 static int
506 g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
507 {
508 	struct g_consumer *cp;
509 	struct g_provider *pp;
510 	off_t offset, length, chunk, odd;
511 	int i, error;
512 
513 	cp = dev->si_drv2;
514 	pp = cp->provider;
515 
516 	error = 0;
517 	KASSERT(cp->acr || cp->acw,
518 	    ("Consumer with zero access count in g_dev_ioctl"));
519 
520 	i = IOCPARM_LEN(cmd);
521 	switch (cmd) {
522 	case DIOCGSECTORSIZE:
523 		*(u_int *)data = cp->provider->sectorsize;
524 		if (*(u_int *)data == 0)
525 			error = ENOENT;
526 		break;
527 	case DIOCGMEDIASIZE:
528 		*(off_t *)data = cp->provider->mediasize;
529 		if (*(off_t *)data == 0)
530 			error = ENOENT;
531 		break;
532 	case DIOCGFWSECTORS:
533 		error = g_io_getattr("GEOM::fwsectors", cp, &i, data);
534 		if (error == 0 && *(u_int *)data == 0)
535 			error = ENOENT;
536 		break;
537 	case DIOCGFWHEADS:
538 		error = g_io_getattr("GEOM::fwheads", cp, &i, data);
539 		if (error == 0 && *(u_int *)data == 0)
540 			error = ENOENT;
541 		break;
542 	case DIOCGFRONTSTUFF:
543 		error = g_io_getattr("GEOM::frontstuff", cp, &i, data);
544 		break;
545 #ifdef COMPAT_FREEBSD11
546 	case DIOCSKERNELDUMP_FREEBSD11:
547 	    {
548 		struct diocskerneldump_arg kda;
549 
550 		bzero(&kda, sizeof(kda));
551 		kda.kda_encryption = KERNELDUMP_ENC_NONE;
552 		kda.kda_enable = (uint8_t)*(u_int *)data;
553 		if (kda.kda_enable == 0)
554 			error = g_dev_setdumpdev(NULL, NULL, td);
555 		else
556 			error = g_dev_setdumpdev(dev, &kda, td);
557 		break;
558 	    }
559 #endif
560 	case DIOCSKERNELDUMP:
561 	    {
562 		struct diocskerneldump_arg *kda;
563 		uint8_t *encryptedkey;
564 
565 		kda = (struct diocskerneldump_arg *)data;
566 		if (kda->kda_enable == 0) {
567 			error = g_dev_setdumpdev(NULL, NULL, td);
568 			break;
569 		}
570 
571 		if (kda->kda_encryption != KERNELDUMP_ENC_NONE) {
572 			if (kda->kda_encryptedkeysize <= 0 ||
573 			    kda->kda_encryptedkeysize >
574 			    KERNELDUMP_ENCKEY_MAX_SIZE) {
575 				return (EINVAL);
576 			}
577 			encryptedkey = malloc(kda->kda_encryptedkeysize, M_TEMP,
578 			    M_WAITOK);
579 			error = copyin(kda->kda_encryptedkey, encryptedkey,
580 			    kda->kda_encryptedkeysize);
581 		} else {
582 			encryptedkey = NULL;
583 		}
584 		if (error == 0) {
585 			kda->kda_encryptedkey = encryptedkey;
586 			error = g_dev_setdumpdev(dev, kda, td);
587 		}
588 		if (encryptedkey != NULL) {
589 			explicit_bzero(encryptedkey, kda->kda_encryptedkeysize);
590 			free(encryptedkey, M_TEMP);
591 		}
592 		explicit_bzero(kda, sizeof(*kda));
593 		break;
594 	    }
595 	case DIOCGFLUSH:
596 		error = g_io_flush(cp);
597 		break;
598 	case DIOCGDELETE:
599 		offset = ((off_t *)data)[0];
600 		length = ((off_t *)data)[1];
601 		if ((offset % cp->provider->sectorsize) != 0 ||
602 		    (length % cp->provider->sectorsize) != 0 || length <= 0) {
603 			printf("%s: offset=%jd length=%jd\n", __func__, offset,
604 			    length);
605 			error = EINVAL;
606 			break;
607 		}
608 		if ((cp->provider->mediasize > 0) &&
609 		    (offset >= cp->provider->mediasize)) {
610 			/*
611 			 * Catch out-of-bounds requests here. The problem is
612 			 * that due to historical GEOM I/O implementation
613 			 * peculatities, g_delete_data() would always return
614 			 * success for requests starting just the next byte
615 			 * after providers media boundary. Condition check on
616 			 * non-zero media size, since that condition would
617 			 * (most likely) cause ENXIO instead.
618 			 */
619 			error = EIO;
620 			break;
621 		}
622 		while (length > 0) {
623 			chunk = length;
624 			if (g_dev_del_max_sectors != 0 && chunk >
625 			    g_dev_del_max_sectors * cp->provider->sectorsize) {
626 				chunk = g_dev_del_max_sectors *
627 				    cp->provider->sectorsize;
628 				if (cp->provider->stripesize > 0) {
629 					odd = (offset + chunk +
630 					    cp->provider->stripeoffset) %
631 					    cp->provider->stripesize;
632 					if (chunk > odd)
633 						chunk -= odd;
634 				}
635 			}
636 			error = g_delete_data(cp, offset, chunk);
637 			length -= chunk;
638 			offset += chunk;
639 			if (error)
640 				break;
641 			/*
642 			 * Since the request size can be large, the service
643 			 * time can be is likewise.  We make this ioctl
644 			 * interruptible by checking for signals for each bio.
645 			 */
646 			if (SIGPENDING(td))
647 				break;
648 		}
649 		break;
650 	case DIOCGIDENT:
651 		error = g_io_getattr("GEOM::ident", cp, &i, data);
652 		break;
653 	case DIOCGPROVIDERNAME:
654 		if (pp == NULL)
655 			return (ENOENT);
656 		strlcpy(data, pp->name, i);
657 		break;
658 	case DIOCGSTRIPESIZE:
659 		*(off_t *)data = cp->provider->stripesize;
660 		break;
661 	case DIOCGSTRIPEOFFSET:
662 		*(off_t *)data = cp->provider->stripeoffset;
663 		break;
664 	case DIOCGPHYSPATH:
665 		error = g_io_getattr("GEOM::physpath", cp, &i, data);
666 		if (error == 0 && *(char *)data == '\0')
667 			error = ENOENT;
668 		break;
669 	case DIOCGATTR: {
670 		struct diocgattr_arg *arg = (struct diocgattr_arg *)data;
671 
672 		if (arg->len > sizeof(arg->value)) {
673 			error = EINVAL;
674 			break;
675 		}
676 		error = g_io_getattr(arg->name, cp, &arg->len, &arg->value);
677 		break;
678 	}
679 	case DIOCZONECMD: {
680 		struct disk_zone_args *zone_args =(struct disk_zone_args *)data;
681 		struct disk_zone_rep_entry *new_entries, *old_entries;
682 		struct disk_zone_report *rep;
683 		size_t alloc_size;
684 
685 		old_entries = NULL;
686 		new_entries = NULL;
687 		rep = NULL;
688 		alloc_size = 0;
689 
690 		if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES) {
691 			rep = &zone_args->zone_params.report;
692 #define	MAXENTRIES	(MAXPHYS / sizeof(struct disk_zone_rep_entry))
693 			if (rep->entries_allocated > MAXENTRIES)
694 				rep->entries_allocated = MAXENTRIES;
695 			alloc_size = rep->entries_allocated *
696 			    sizeof(struct disk_zone_rep_entry);
697 			if (alloc_size != 0)
698 				new_entries = g_malloc(alloc_size,
699 				    M_WAITOK| M_ZERO);
700 			old_entries = rep->entries;
701 			rep->entries = new_entries;
702 		}
703 		error = g_io_zonecmd(zone_args, cp);
704 		if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES &&
705 		    alloc_size != 0 && error == 0)
706 			error = copyout(new_entries, old_entries, alloc_size);
707 		if (old_entries != NULL && rep != NULL)
708 			rep->entries = old_entries;
709 		if (new_entries != NULL)
710 			g_free(new_entries);
711 		break;
712 	}
713 	default:
714 		if (cp->provider->geom->ioctl != NULL) {
715 			error = cp->provider->geom->ioctl(cp->provider, cmd, data, fflag, td);
716 		} else {
717 			error = ENOIOCTL;
718 		}
719 	}
720 
721 	return (error);
722 }
723 
724 static void
725 g_dev_done(struct bio *bp2)
726 {
727 	struct g_consumer *cp;
728 	struct g_dev_softc *sc;
729 	struct bio *bp;
730 	int active;
731 
732 	cp = bp2->bio_from;
733 	sc = cp->private;
734 	bp = bp2->bio_parent;
735 	bp->bio_error = bp2->bio_error;
736 	bp->bio_completed = bp2->bio_completed;
737 	bp->bio_resid = bp->bio_length - bp2->bio_completed;
738 	if (bp2->bio_cmd == BIO_ZONE)
739 		bcopy(&bp2->bio_zone, &bp->bio_zone, sizeof(bp->bio_zone));
740 
741 	if (bp2->bio_error != 0) {
742 		g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
743 		    bp2, bp2->bio_error);
744 		bp->bio_flags |= BIO_ERROR;
745 	} else {
746 		g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd",
747 		    bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed);
748 	}
749 	g_destroy_bio(bp2);
750 	active = atomic_fetchadd_int(&sc->sc_active, -1) - 1;
751 	if ((active & SC_A_ACTIVE) == 0) {
752 		if ((active & SC_A_OPEN) == 0)
753 			wakeup(&sc->sc_active);
754 		if (active & SC_A_DESTROY)
755 			g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL);
756 	}
757 	biodone(bp);
758 }
759 
760 static void
761 g_dev_strategy(struct bio *bp)
762 {
763 	struct g_consumer *cp;
764 	struct bio *bp2;
765 	struct cdev *dev;
766 	struct g_dev_softc *sc;
767 
768 	KASSERT(bp->bio_cmd == BIO_READ ||
769 	        bp->bio_cmd == BIO_WRITE ||
770 	        bp->bio_cmd == BIO_DELETE ||
771 		bp->bio_cmd == BIO_FLUSH ||
772 		bp->bio_cmd == BIO_ZONE,
773 		("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd));
774 	dev = bp->bio_dev;
775 	cp = dev->si_drv2;
776 	sc = cp->private;
777 	KASSERT(cp->acr || cp->acw,
778 	    ("Consumer with zero access count in g_dev_strategy"));
779 	biotrack(bp, __func__);
780 #ifdef INVARIANTS
781 	if ((bp->bio_offset % cp->provider->sectorsize) != 0 ||
782 	    (bp->bio_bcount % cp->provider->sectorsize) != 0) {
783 		bp->bio_resid = bp->bio_bcount;
784 		biofinish(bp, NULL, EINVAL);
785 		return;
786 	}
787 #endif
788 	KASSERT(sc->sc_open > 0, ("Closed device in g_dev_strategy"));
789 	atomic_add_int(&sc->sc_active, 1);
790 
791 	for (;;) {
792 		/*
793 		 * XXX: This is not an ideal solution, but I believe it to
794 		 * XXX: deadlock safely, all things considered.
795 		 */
796 		bp2 = g_clone_bio(bp);
797 		if (bp2 != NULL)
798 			break;
799 		pause("gdstrat", hz / 10);
800 	}
801 	KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
802 	bp2->bio_done = g_dev_done;
803 	g_trace(G_T_BIO,
804 	    "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d",
805 	    bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length,
806 	    bp2->bio_data, bp2->bio_cmd);
807 	g_io_request(bp2, cp);
808 	KASSERT(cp->acr || cp->acw,
809 	    ("g_dev_strategy raced with g_dev_close and lost"));
810 
811 }
812 
813 /*
814  * g_dev_callback()
815  *
816  * Called by devfs when asynchronous device destruction is completed.
817  * - Mark that we have no attached device any more.
818  * - If there are no outstanding requests, schedule geom destruction.
819  *   Otherwise destruction will be scheduled later by g_dev_done().
820  */
821 
822 static void
823 g_dev_callback(void *arg)
824 {
825 	struct g_consumer *cp;
826 	struct g_dev_softc *sc;
827 	int active;
828 
829 	cp = arg;
830 	sc = cp->private;
831 	g_trace(G_T_TOPOLOGY, "g_dev_callback(%p(%s))", cp, cp->geom->name);
832 
833 	sc->sc_dev = NULL;
834 	sc->sc_alias = NULL;
835 	active = atomic_fetchadd_int(&sc->sc_active, SC_A_DESTROY);
836 	if ((active & SC_A_ACTIVE) == 0)
837 		g_post_event(g_dev_destroy, cp, M_WAITOK, NULL);
838 }
839 
840 /*
841  * g_dev_orphan()
842  *
843  * Called from below when the provider orphaned us.
844  * - Clear any dump settings.
845  * - Request asynchronous device destruction to prevent any more requests
846  *   from coming in.  The provider is already marked with an error, so
847  *   anything which comes in the interim will be returned immediately.
848  */
849 
850 static void
851 g_dev_orphan(struct g_consumer *cp)
852 {
853 	struct cdev *dev;
854 	struct g_dev_softc *sc;
855 
856 	g_topology_assert();
857 	sc = cp->private;
858 	dev = sc->sc_dev;
859 	g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, cp->geom->name);
860 
861 	/* Reset any dump-area set on this device */
862 	if (dev->si_flags & SI_DUMPDEV)
863 		(void)clear_dumper(curthread);
864 
865 	/* Destroy the struct cdev *so we get no more requests */
866 	destroy_dev_sched_cb(dev, g_dev_callback, cp);
867 }
868 
869 DECLARE_GEOM_CLASS(g_dev_class, g_dev);
870