xref: /freebsd/sys/geom/eli/g_eli.c (revision b37d1deb045d7bc7877fb1d9afdb39d43130dcc4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/cons.h>
35 #include <sys/kenv.h>
36 #include <sys/kernel.h>
37 #include <sys/linker.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/bio.h>
42 #include <sys/sbuf.h>
43 #include <sys/sysctl.h>
44 #include <sys/malloc.h>
45 #include <sys/eventhandler.h>
46 #include <sys/kthread.h>
47 #include <sys/proc.h>
48 #include <sys/sched.h>
49 #include <sys/smp.h>
50 #include <sys/uio.h>
51 #include <sys/vnode.h>
52 
53 #include <machine/vmparam.h>
54 
55 #include <vm/uma.h>
56 #include <vm/vm.h>
57 #include <vm/swap_pager.h>
58 
59 #include <geom/geom.h>
60 #include <geom/geom_dbg.h>
61 #include <geom/eli/g_eli.h>
62 #include <geom/eli/pkcs5v2.h>
63 
64 #include <crypto/intake.h>
65 
66 FEATURE(geom_eli, "GEOM crypto module");
67 
68 MALLOC_DEFINE(M_ELI, "eli_data", "GEOM_ELI Data");
69 
70 SYSCTL_DECL(_kern_geom);
71 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
72     "GEOM_ELI stuff");
73 static int g_eli_version = G_ELI_VERSION;
74 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
75     "GELI version");
76 int g_eli_debug = 0;
77 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
78     "Debug level");
79 static u_int g_eli_tries = 3;
80 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
81     "Number of tries for entering the passphrase");
82 static u_int g_eli_visible_passphrase = GETS_NOECHO;
83 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
84     &g_eli_visible_passphrase, 0,
85     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
86 u_int g_eli_overwrites = G_ELI_OVERWRITES;
87 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
88     0, "Number of times on-disk keys should be overwritten when destroying them");
89 static u_int g_eli_threads = 0;
90 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
91     "Number of threads doing crypto work");
92 u_int g_eli_batch = 0;
93 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
94     "Use crypto operations batching");
95 static u_int g_eli_minbufs = 16;
96 static int sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS);
97 SYSCTL_PROC(_kern_geom_eli, OID_AUTO, minbufs, CTLTYPE_UINT | CTLFLAG_RW |
98     CTLFLAG_MPSAFE, NULL, 0, sysctl_g_eli_minbufs, "IU",
99     "Number of GELI bufs reserved for swap transactions");
100 static bool g_eli_blocking_malloc = false;
101 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, blocking_malloc, CTLFLAG_RWTUN,
102     &g_eli_blocking_malloc, 0, "Use blocking malloc calls for GELI buffers");
103 static bool g_eli_unmapped_io = true;
104 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, unmapped_io, CTLFLAG_RDTUN,
105     &g_eli_unmapped_io, 0, "Enable support for unmapped I/O");
106 
107 static struct sx g_eli_umalock;	/* Controls changes to UMA zone. */
108 SX_SYSINIT(g_eli_umalock, &g_eli_umalock, "GELI UMA");
109 static uma_zone_t g_eli_uma = NULL;
110 static int g_eli_alloc_sz;
111 static volatile int g_eli_umaoutstanding;
112 static volatile int g_eli_devs;
113 
114 /*
115  * Control the number of reserved entries in the GELI zone.
116  * If the GELI zone has already been allocated, update the zone. Otherwise,
117  * simply update the variable for use the next time the zone is created.
118  */
119 static int
120 sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS)
121 {
122 	int error;
123 	u_int new;
124 
125 	new = g_eli_minbufs;
126 	error = sysctl_handle_int(oidp, &new, 0, req);
127 	if (error != 0 || req->newptr == NULL)
128 		return (error);
129 	sx_xlock(&g_eli_umalock);
130 	if (g_eli_uma != NULL) {
131 		if (new != g_eli_minbufs)
132 			uma_zone_reserve(g_eli_uma, new);
133 		if (new > g_eli_minbufs)
134 			uma_prealloc(g_eli_uma, new - g_eli_minbufs);
135 	}
136 	if (new != g_eli_minbufs)
137 		g_eli_minbufs = new;
138 	sx_xunlock(&g_eli_umalock);
139 	return (0);
140 }
141 
142 /*
143  * Passphrase cached during boot, in order to be more user-friendly if
144  * there are multiple providers using the same passphrase.
145  */
146 static char cached_passphrase[256];
147 static u_int g_eli_boot_passcache = 1;
148 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
149 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
150     &g_eli_boot_passcache, 0,
151     "Passphrases are cached during boot process for possible reuse");
152 static void
153 fetch_loader_passphrase(void * dummy)
154 {
155 	char * env_passphrase;
156 
157 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
158 
159 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
160 		/* Extract passphrase from the environment. */
161 		strlcpy(cached_passphrase, env_passphrase,
162 		    sizeof(cached_passphrase));
163 		freeenv(env_passphrase);
164 
165 		/* Wipe the passphrase from the environment. */
166 		kern_unsetenv("kern.geom.eli.passphrase");
167 	}
168 }
169 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
170     fetch_loader_passphrase, NULL);
171 
172 static void
173 zero_boot_passcache(void)
174 {
175 
176         explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
177 }
178 
179 static void
180 zero_geli_intake_keys(void)
181 {
182         struct keybuf *keybuf;
183         int i;
184 
185         if ((keybuf = get_keybuf()) != NULL) {
186                 /* Scan the key buffer, clear all GELI keys. */
187                 for (i = 0; i < keybuf->kb_nents; i++) {
188                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
189                                  explicit_bzero(keybuf->kb_ents[i].ke_data,
190                                      sizeof(keybuf->kb_ents[i].ke_data));
191                                  keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
192                          }
193                 }
194         }
195 }
196 
197 static void
198 zero_intake_passcache(void *dummy)
199 {
200         zero_boot_passcache();
201         zero_geli_intake_keys();
202 }
203 EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
204 
205 static eventhandler_tag g_eli_pre_sync = NULL;
206 
207 static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
208     off_t offset, struct g_eli_metadata *md);
209 
210 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
211     struct g_geom *gp);
212 static void g_eli_init(struct g_class *mp);
213 static void g_eli_fini(struct g_class *mp);
214 
215 static g_taste_t g_eli_taste;
216 static g_dumpconf_t g_eli_dumpconf;
217 
218 struct g_class g_eli_class = {
219 	.name = G_ELI_CLASS_NAME,
220 	.version = G_VERSION,
221 	.ctlreq = g_eli_config,
222 	.taste = g_eli_taste,
223 	.destroy_geom = g_eli_destroy_geom,
224 	.init = g_eli_init,
225 	.fini = g_eli_fini
226 };
227 
228 /*
229  * Code paths:
230  * BIO_READ:
231  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
232  * BIO_WRITE:
233  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
234  */
235 
236 /*
237  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
238  * accelerator or something like this.
239  * The function updates the SID and rerun the operation.
240  */
241 int
242 g_eli_crypto_rerun(struct cryptop *crp)
243 {
244 	struct g_eli_softc *sc;
245 	struct g_eli_worker *wr;
246 	struct bio *bp;
247 	int error;
248 
249 	bp = (struct bio *)crp->crp_opaque;
250 	sc = bp->bio_to->geom->softc;
251 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
252 		if (wr->w_number == G_ELI_WORKER(bp->bio_pflags))
253 			break;
254 	}
255 	KASSERT(wr != NULL, ("Invalid worker (%u).",
256 	    G_ELI_WORKER(bp->bio_pflags)));
257 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).",
258 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid,
259 	    crp->crp_session);
260 	wr->w_sid = crp->crp_session;
261 	crp->crp_etype = 0;
262 	error = crypto_dispatch(crp);
263 	if (error == 0)
264 		return (0);
265 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
266 	crp->crp_etype = error;
267 	return (error);
268 }
269 
270 static void
271 g_eli_getattr_done(struct bio *bp)
272 {
273 	if (bp->bio_error == 0 &&
274 	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
275 		strlcat(bp->bio_data, "/eli", bp->bio_length);
276 	}
277 	g_std_done(bp);
278 }
279 
280 /*
281  * The function is called afer reading encrypted data from the provider.
282  *
283  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
284  */
285 void
286 g_eli_read_done(struct bio *bp)
287 {
288 	struct g_eli_softc *sc;
289 	struct bio *pbp;
290 
291 	G_ELI_LOGREQ(2, bp, "Request done.");
292 	pbp = bp->bio_parent;
293 	if (pbp->bio_error == 0 && bp->bio_error != 0)
294 		pbp->bio_error = bp->bio_error;
295 	g_destroy_bio(bp);
296 	/*
297 	 * Do we have all sectors already?
298 	 */
299 	pbp->bio_inbed++;
300 	if (pbp->bio_inbed < pbp->bio_children)
301 		return;
302 	sc = pbp->bio_to->geom->softc;
303 	if (pbp->bio_error != 0) {
304 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
305 		    pbp->bio_error);
306 		pbp->bio_completed = 0;
307 		g_eli_free_data(pbp);
308 		g_io_deliver(pbp, pbp->bio_error);
309 		if (sc != NULL)
310 			atomic_subtract_int(&sc->sc_inflight, 1);
311 		return;
312 	}
313 	mtx_lock(&sc->sc_queue_mtx);
314 	bioq_insert_tail(&sc->sc_queue, pbp);
315 	mtx_unlock(&sc->sc_queue_mtx);
316 	wakeup(sc);
317 }
318 
319 /*
320  * The function is called after we encrypt and write data.
321  *
322  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
323  */
324 void
325 g_eli_write_done(struct bio *bp)
326 {
327 	struct g_eli_softc *sc;
328 	struct bio *pbp;
329 
330 	G_ELI_LOGREQ(2, bp, "Request done.");
331 	pbp = bp->bio_parent;
332 	if (pbp->bio_error == 0 && bp->bio_error != 0)
333 		pbp->bio_error = bp->bio_error;
334 	g_destroy_bio(bp);
335 	/*
336 	 * Do we have all sectors already?
337 	 */
338 	pbp->bio_inbed++;
339 	if (pbp->bio_inbed < pbp->bio_children)
340 		return;
341 	sc = pbp->bio_to->geom->softc;
342 	g_eli_free_data(pbp);
343 	if (pbp->bio_error != 0) {
344 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
345 		    pbp->bio_error);
346 		pbp->bio_completed = 0;
347 	} else
348 		pbp->bio_completed = pbp->bio_length;
349 
350 	/*
351 	 * Write is finished, send it up.
352 	 */
353 	g_io_deliver(pbp, pbp->bio_error);
354 	if (sc != NULL)
355 		atomic_subtract_int(&sc->sc_inflight, 1);
356 }
357 
358 /*
359  * This function should never be called, but GEOM made as it set ->orphan()
360  * method for every geom.
361  */
362 static void
363 g_eli_orphan_spoil_assert(struct g_consumer *cp)
364 {
365 
366 	panic("Function %s() called for %s.", __func__, cp->geom->name);
367 }
368 
369 static void
370 g_eli_orphan(struct g_consumer *cp)
371 {
372 	struct g_eli_softc *sc;
373 
374 	g_topology_assert();
375 	sc = cp->geom->softc;
376 	if (sc == NULL)
377 		return;
378 	g_eli_destroy(sc, TRUE);
379 }
380 
381 static void
382 g_eli_resize(struct g_consumer *cp)
383 {
384 	struct g_eli_softc *sc;
385 	struct g_provider *epp, *pp;
386 	off_t oldsize;
387 
388 	g_topology_assert();
389 	sc = cp->geom->softc;
390 	if (sc == NULL)
391 		return;
392 
393 	if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) {
394 		G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.",
395 		    (intmax_t)sc->sc_provsize);
396 		return;
397 	}
398 
399 	pp = cp->provider;
400 
401 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
402 		struct g_eli_metadata md;
403 		u_char *sector;
404 		int error;
405 
406 		sector = NULL;
407 
408 		error = g_eli_read_metadata_offset(cp->geom->class, pp,
409 		    sc->sc_provsize - pp->sectorsize, &md);
410 		if (error != 0) {
411 			G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).",
412 			    pp->name, error);
413 			goto iofail;
414 		}
415 
416 		md.md_provsize = pp->mediasize;
417 
418 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
419 		eli_metadata_encode(&md, sector);
420 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
421 		    pp->sectorsize);
422 		if (error != 0) {
423 			G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).",
424 			    pp->name, error);
425 			goto iofail;
426 		}
427 		explicit_bzero(sector, pp->sectorsize);
428 		error = g_write_data(cp, sc->sc_provsize - pp->sectorsize,
429 		    sector, pp->sectorsize);
430 		if (error != 0) {
431 			G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).",
432 			    pp->name, error);
433 			goto iofail;
434 		}
435 iofail:
436 		explicit_bzero(&md, sizeof(md));
437 		zfree(sector, M_ELI);
438 	}
439 
440 	oldsize = sc->sc_mediasize;
441 	sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize);
442 	g_eli_key_resize(sc);
443 	sc->sc_provsize = pp->mediasize;
444 
445 	epp = LIST_FIRST(&sc->sc_geom->provider);
446 	g_resize_provider(epp, sc->sc_mediasize);
447 	G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name,
448 	    (intmax_t)oldsize, (intmax_t)sc->sc_mediasize);
449 }
450 
451 /*
452  * BIO_READ:
453  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
454  * BIO_WRITE:
455  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
456  */
457 static void
458 g_eli_start(struct bio *bp)
459 {
460 	struct g_eli_softc *sc;
461 	struct g_consumer *cp;
462 	struct bio *cbp;
463 
464 	sc = bp->bio_to->geom->softc;
465 	KASSERT(sc != NULL,
466 	    ("Provider's error should be set (error=%d)(device=%s).",
467 	    bp->bio_to->error, bp->bio_to->name));
468 	G_ELI_LOGREQ(2, bp, "Request received.");
469 
470 	switch (bp->bio_cmd) {
471 	case BIO_READ:
472 	case BIO_WRITE:
473 	case BIO_GETATTR:
474 	case BIO_FLUSH:
475 	case BIO_ZONE:
476 	case BIO_SPEEDUP:
477 		break;
478 	case BIO_DELETE:
479 		/*
480 		 * If the user hasn't set the NODELETE flag, we just pass
481 		 * it down the stack and let the layers beneath us do (or
482 		 * not) whatever they do with it.  If they have, we
483 		 * reject it.  A possible extension would be an
484 		 * additional flag to take it as a hint to shred the data
485 		 * with [multiple?] overwrites.
486 		 */
487 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
488 			break;
489 	default:
490 		g_io_deliver(bp, EOPNOTSUPP);
491 		return;
492 	}
493 	cbp = g_clone_bio(bp);
494 	if (cbp == NULL) {
495 		g_io_deliver(bp, ENOMEM);
496 		return;
497 	}
498 	bp->bio_driver1 = cbp;
499 	bp->bio_pflags = 0;
500 	G_ELI_SET_NEW_BIO(bp->bio_pflags);
501 	switch (bp->bio_cmd) {
502 	case BIO_READ:
503 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
504 			g_eli_crypto_read(sc, bp, 0);
505 			break;
506 		}
507 		/* FALLTHROUGH */
508 	case BIO_WRITE:
509 		mtx_lock(&sc->sc_queue_mtx);
510 		bioq_insert_tail(&sc->sc_queue, bp);
511 		mtx_unlock(&sc->sc_queue_mtx);
512 		wakeup(sc);
513 		break;
514 	case BIO_GETATTR:
515 	case BIO_FLUSH:
516 	case BIO_DELETE:
517 	case BIO_SPEEDUP:
518 	case BIO_ZONE:
519 		if (bp->bio_cmd == BIO_GETATTR)
520 			cbp->bio_done = g_eli_getattr_done;
521 		else
522 			cbp->bio_done = g_std_done;
523 		cp = LIST_FIRST(&sc->sc_geom->consumer);
524 		cbp->bio_to = cp->provider;
525 		G_ELI_LOGREQ(2, cbp, "Sending request.");
526 		g_io_request(cbp, cp);
527 		break;
528 	}
529 }
530 
531 static int
532 g_eli_newsession(struct g_eli_worker *wr)
533 {
534 	struct g_eli_softc *sc;
535 	struct crypto_session_params csp;
536 	uint32_t caps;
537 	int error, new_crypto;
538 	void *key;
539 
540 	sc = wr->w_softc;
541 
542 	memset(&csp, 0, sizeof(csp));
543 	csp.csp_mode = CSP_MODE_CIPHER;
544 	csp.csp_cipher_alg = sc->sc_ealgo;
545 	csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo);
546 	csp.csp_cipher_klen = sc->sc_ekeylen / 8;
547 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
548 		csp.csp_cipher_klen <<= 1;
549 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
550 		key = g_eli_key_hold(sc, 0,
551 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
552 		csp.csp_cipher_key = key;
553 	} else {
554 		key = NULL;
555 		csp.csp_cipher_key = sc->sc_ekey;
556 	}
557 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
558 		csp.csp_mode = CSP_MODE_ETA;
559 		csp.csp_auth_alg = sc->sc_aalgo;
560 		csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN;
561 	}
562 
563 	switch (sc->sc_crypto) {
564 	case G_ELI_CRYPTO_SW_ACCEL:
565 	case G_ELI_CRYPTO_SW:
566 		error = crypto_newsession(&wr->w_sid, &csp,
567 		    CRYPTOCAP_F_SOFTWARE);
568 		break;
569 	case G_ELI_CRYPTO_HW:
570 		error = crypto_newsession(&wr->w_sid, &csp,
571 		    CRYPTOCAP_F_HARDWARE);
572 		break;
573 	case G_ELI_CRYPTO_UNKNOWN:
574 		error = crypto_newsession(&wr->w_sid, &csp,
575 		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
576 		if (error == 0) {
577 			caps = crypto_ses2caps(wr->w_sid);
578 			if (caps & CRYPTOCAP_F_HARDWARE)
579 				new_crypto = G_ELI_CRYPTO_HW;
580 			else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE)
581 				new_crypto = G_ELI_CRYPTO_SW_ACCEL;
582 			else
583 				new_crypto = G_ELI_CRYPTO_SW;
584 			mtx_lock(&sc->sc_queue_mtx);
585 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
586 				sc->sc_crypto = new_crypto;
587 			mtx_unlock(&sc->sc_queue_mtx);
588 		}
589 		break;
590 	default:
591 		panic("%s: invalid condition", __func__);
592 	}
593 
594 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
595 		if (error)
596 			g_eli_key_drop(sc, key);
597 		else
598 			wr->w_first_key = key;
599 	}
600 
601 	return (error);
602 }
603 
604 static void
605 g_eli_freesession(struct g_eli_worker *wr)
606 {
607 	struct g_eli_softc *sc;
608 
609 	crypto_freesession(wr->w_sid);
610 	if (wr->w_first_key != NULL) {
611 		sc = wr->w_softc;
612 		g_eli_key_drop(sc, wr->w_first_key);
613 		wr->w_first_key = NULL;
614 	}
615 }
616 
617 static void
618 g_eli_cancel(struct g_eli_softc *sc)
619 {
620 	struct bio *bp;
621 
622 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
623 
624 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
625 		KASSERT(G_ELI_IS_NEW_BIO(bp->bio_pflags),
626 		    ("Not new bio when canceling (bp=%p).", bp));
627 		g_io_deliver(bp, ENXIO);
628 	}
629 }
630 
631 static struct bio *
632 g_eli_takefirst(struct g_eli_softc *sc)
633 {
634 	struct bio *bp;
635 
636 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
637 
638 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
639 		return (bioq_takefirst(&sc->sc_queue));
640 	/*
641 	 * Device suspended, so we skip new I/O requests.
642 	 */
643 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
644 		if (!G_ELI_IS_NEW_BIO(bp->bio_pflags))
645 			break;
646 	}
647 	if (bp != NULL)
648 		bioq_remove(&sc->sc_queue, bp);
649 	return (bp);
650 }
651 
652 /*
653  * This is the main function for kernel worker thread when we don't have
654  * hardware acceleration and we have to do cryptography in software.
655  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
656  * threads with crypto work.
657  */
658 static void
659 g_eli_worker(void *arg)
660 {
661 	struct g_eli_softc *sc;
662 	struct g_eli_worker *wr;
663 	struct bio *bp;
664 	int error __diagused;
665 
666 	wr = arg;
667 	sc = wr->w_softc;
668 #ifdef EARLY_AP_STARTUP
669 	MPASS(!sc->sc_cpubind || smp_started);
670 #elif defined(SMP)
671 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
672 	if (sc->sc_cpubind) {
673 		while (!smp_started)
674 			tsleep(wr, 0, "geli:smp", hz / 4);
675 	}
676 #endif
677 	thread_lock(curthread);
678 	sched_prio(curthread, PUSER);
679 	if (sc->sc_cpubind)
680 		sched_bind(curthread, wr->w_number % mp_ncpus);
681 	thread_unlock(curthread);
682 
683 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
684 
685 	for (;;) {
686 		mtx_lock(&sc->sc_queue_mtx);
687 again:
688 		bp = g_eli_takefirst(sc);
689 		if (bp == NULL) {
690 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
691 				g_eli_cancel(sc);
692 				LIST_REMOVE(wr, w_next);
693 				g_eli_freesession(wr);
694 				free(wr, M_ELI);
695 				G_ELI_DEBUG(1, "Thread %s exiting.",
696 				    curthread->td_proc->p_comm);
697 				wakeup(&sc->sc_workers);
698 				mtx_unlock(&sc->sc_queue_mtx);
699 				kproc_exit(0);
700 			}
701 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
702 				if (sc->sc_inflight > 0) {
703 					G_ELI_DEBUG(0, "inflight=%d",
704 					    sc->sc_inflight);
705 					/*
706 					 * We still have inflight BIOs, so
707 					 * sleep and retry.
708 					 */
709 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
710 					    "geli:inf", hz / 5);
711 					goto again;
712 				}
713 				/*
714 				 * Suspend requested, mark the worker as
715 				 * suspended and go to sleep.
716 				 */
717 				if (wr->w_active) {
718 					g_eli_freesession(wr);
719 					wr->w_active = FALSE;
720 				}
721 				wakeup(&sc->sc_workers);
722 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
723 				    "geli:suspend", 0);
724 				if (!wr->w_active &&
725 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
726 					error = g_eli_newsession(wr);
727 					KASSERT(error == 0,
728 					    ("g_eli_newsession() failed on resume (error=%d)",
729 					    error));
730 					wr->w_active = TRUE;
731 				}
732 				goto again;
733 			}
734 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
735 			continue;
736 		}
737 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags))
738 			atomic_add_int(&sc->sc_inflight, 1);
739 		mtx_unlock(&sc->sc_queue_mtx);
740 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags)) {
741 			G_ELI_SETWORKER(bp->bio_pflags, 0);
742 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
743 				if (bp->bio_cmd == BIO_READ)
744 					g_eli_auth_read(sc, bp);
745 				else
746 					g_eli_auth_run(wr, bp);
747 			} else {
748 				if (bp->bio_cmd == BIO_READ)
749 					g_eli_crypto_read(sc, bp, 1);
750 				else
751 					g_eli_crypto_run(wr, bp);
752 			}
753 		} else {
754 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
755 				g_eli_auth_run(wr, bp);
756 			else
757 				g_eli_crypto_run(wr, bp);
758 		}
759 	}
760 }
761 
762 static int
763 g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
764     off_t offset, struct g_eli_metadata *md)
765 {
766 	struct g_geom *gp;
767 	struct g_consumer *cp;
768 	u_char *buf = NULL;
769 	int error;
770 
771 	g_topology_assert();
772 
773 	gp = g_new_geomf(mp, "eli:taste");
774 	gp->start = g_eli_start;
775 	gp->access = g_std_access;
776 	/*
777 	 * g_eli_read_metadata() is always called from the event thread.
778 	 * Our geom is created and destroyed in the same event, so there
779 	 * could be no orphan nor spoil event in the meantime.
780 	 */
781 	gp->orphan = g_eli_orphan_spoil_assert;
782 	gp->spoiled = g_eli_orphan_spoil_assert;
783 	cp = g_new_consumer(gp);
784 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
785 	error = g_attach(cp, pp);
786 	if (error != 0)
787 		goto end;
788 	error = g_access(cp, 1, 0, 0);
789 	if (error != 0)
790 		goto end;
791 	g_topology_unlock();
792 	buf = g_read_data(cp, offset, pp->sectorsize, &error);
793 	g_topology_lock();
794 	if (buf == NULL)
795 		goto end;
796 	error = eli_metadata_decode(buf, md);
797 	if (error != 0)
798 		goto end;
799 	/* Metadata was read and decoded successfully. */
800 end:
801 	g_free(buf);
802 	if (cp->provider != NULL) {
803 		if (cp->acr == 1)
804 			g_access(cp, -1, 0, 0);
805 		g_detach(cp);
806 	}
807 	g_destroy_consumer(cp);
808 	g_destroy_geom(gp);
809 	return (error);
810 }
811 
812 int
813 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
814     struct g_eli_metadata *md)
815 {
816 
817 	return (g_eli_read_metadata_offset(mp, pp,
818 	    pp->mediasize - pp->sectorsize, md));
819 }
820 
821 /*
822  * The function is called when we had last close on provider and user requested
823  * to close it when this situation occur.
824  */
825 static void
826 g_eli_last_close(void *arg, int flags __unused)
827 {
828 	struct g_geom *gp;
829 	char gpname[64];
830 	int error __diagused;
831 
832 	g_topology_assert();
833 	gp = arg;
834 	strlcpy(gpname, gp->name, sizeof(gpname));
835 	error = g_eli_destroy(gp->softc, TRUE);
836 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
837 	    gpname, error));
838 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
839 }
840 
841 int
842 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
843 {
844 	struct g_eli_softc *sc;
845 	struct g_geom *gp;
846 
847 	gp = pp->geom;
848 	sc = gp->softc;
849 
850 	if (dw > 0) {
851 		if (sc->sc_flags & G_ELI_FLAG_RO) {
852 			/* Deny write attempts. */
853 			return (EROFS);
854 		}
855 		/* Someone is opening us for write, we need to remember that. */
856 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
857 		return (0);
858 	}
859 	/* Is this the last close? */
860 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
861 		return (0);
862 
863 	/*
864 	 * Automatically detach on last close if requested.
865 	 */
866 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
867 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
868 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
869 	}
870 	return (0);
871 }
872 
873 static int
874 g_eli_cpu_is_disabled(int cpu)
875 {
876 #ifdef SMP
877 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
878 #else
879 	return (0);
880 #endif
881 }
882 
883 static void
884 g_eli_init_uma(void)
885 {
886 
887 	atomic_add_int(&g_eli_devs, 1);
888 	sx_xlock(&g_eli_umalock);
889 	if (g_eli_uma == NULL) {
890 		/*
891 		 * Calculate the maximum-sized swap buffer we are
892 		 * likely to see.
893 		 */
894 		g_eli_alloc_sz = roundup2((PAGE_SIZE + sizeof(int) +
895                     G_ELI_AUTH_SECKEYLEN) * nsw_cluster_max +
896                     sizeof(uintptr_t), PAGE_SIZE);
897 
898 		g_eli_uma = uma_zcreate("GELI buffers", g_eli_alloc_sz,
899 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
900 
901 		/* Reserve and pre-allocate pages, as appropriate. */
902 		uma_zone_reserve(g_eli_uma, g_eli_minbufs);
903 		uma_prealloc(g_eli_uma, g_eli_minbufs);
904 	}
905 	sx_xunlock(&g_eli_umalock);
906 }
907 
908 /*
909  * Try to destroy the UMA pool. This will do nothing if there are existing
910  * GELI devices or existing UMA allocations.
911  */
912 static void
913 g_eli_destroy_uma(void)
914 {
915 	uma_zone_t oldzone;
916 
917 	sx_xlock(&g_eli_umalock);
918 	/* Ensure we really should be destroying this. */
919 	if (atomic_load_int(&g_eli_devs) == 0 &&
920 	    atomic_load_int(&g_eli_umaoutstanding) == 0) {
921 		oldzone = g_eli_uma;
922 		g_eli_uma = NULL;
923 	} else
924 		oldzone = NULL;
925 	sx_xunlock(&g_eli_umalock);
926 
927 	if (oldzone != NULL)
928 		uma_zdestroy(oldzone);
929 }
930 
931 static void
932 g_eli_fini_uma(void)
933 {
934 
935 	/*
936 	 * If this is the last outstanding GELI device, try to
937 	 * destroy the UMA pool.
938 	 */
939 	if (atomic_fetchadd_int(&g_eli_devs, -1) == 1)
940 		g_eli_destroy_uma();
941 }
942 
943 /*
944  * Allocate a data buffer. If the size fits within our swap-sized buffers,
945  * try to allocate a swap-sized buffer from the UMA pool. Otherwise, fall
946  * back to using malloc.
947  *
948  * Swap-related requests are special: they can only use the UMA pool, they
949  * use M_USE_RESERVE to let them dip farther into system resources, and
950  * they always use M_NOWAIT to prevent swap operations from deadlocking.
951  */
952 bool
953 g_eli_alloc_data(struct bio *bp, int sz)
954 {
955 
956 	KASSERT(sz <= g_eli_alloc_sz || (bp->bio_flags & BIO_SWAP) == 0,
957 	    ("BIO_SWAP request for %d bytes exceeds the precalculated buffer"
958 	    " size (%d)", sz, g_eli_alloc_sz));
959 	if (sz <= g_eli_alloc_sz) {
960 		bp->bio_driver2 = uma_zalloc(g_eli_uma, M_NOWAIT |
961 		    ((bp->bio_flags & BIO_SWAP) != 0 ? M_USE_RESERVE : 0));
962 		if (bp->bio_driver2 != NULL) {
963 			bp->bio_pflags |= G_ELI_UMA_ALLOC;
964 			atomic_add_int(&g_eli_umaoutstanding, 1);
965 		}
966 		if (bp->bio_driver2 != NULL || (bp->bio_flags & BIO_SWAP) != 0)
967 			return (bp->bio_driver2 != NULL);
968 	}
969 	bp->bio_pflags &= ~(G_ELI_UMA_ALLOC);
970 	bp->bio_driver2 = malloc(sz, M_ELI, g_eli_blocking_malloc ? M_WAITOK :
971 	    M_NOWAIT);
972 	return (bp->bio_driver2 != NULL);
973 }
974 
975 /*
976  * Free a buffer from bp->bio_driver2 which was allocated with
977  * g_eli_alloc_data(). This function makes sure that the memory is freed
978  * to the correct place.
979  *
980  * Additionally, if this function frees the last outstanding UMA request
981  * and there are no open GELI devices, this will destroy the UMA pool.
982  */
983 void
984 g_eli_free_data(struct bio *bp)
985 {
986 
987 	/*
988 	 * Mimic the free(9) behavior of allowing a NULL pointer to be
989 	 * freed.
990 	 */
991 	if (bp->bio_driver2 == NULL)
992 		return;
993 
994 	if ((bp->bio_pflags & G_ELI_UMA_ALLOC) != 0) {
995 		uma_zfree(g_eli_uma, bp->bio_driver2);
996 		if (atomic_fetchadd_int(&g_eli_umaoutstanding, -1) == 1 &&
997 		    atomic_load_int(&g_eli_devs) == 0)
998 			g_eli_destroy_uma();
999 	} else
1000 		free(bp->bio_driver2, M_ELI);
1001 	bp->bio_driver2 = NULL;
1002 }
1003 
1004 struct g_geom *
1005 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
1006     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
1007 {
1008 	struct g_eli_softc *sc;
1009 	struct g_eli_worker *wr;
1010 	struct g_geom *gp;
1011 	struct g_provider *pp;
1012 	struct g_consumer *cp;
1013 	struct g_geom_alias *gap;
1014 	u_int i, threads;
1015 	int dcw, error;
1016 
1017 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
1018 	KASSERT(eli_metadata_crypto_supported(md),
1019 	    ("%s: unsupported crypto for %s", __func__, bpp->name));
1020 
1021 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
1022 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
1023 	gp->start = g_eli_start;
1024 	/*
1025 	 * Spoiling can happen even though we have the provider open
1026 	 * exclusively, e.g. through media change events.
1027 	 */
1028 	gp->spoiled = g_eli_orphan;
1029 	gp->orphan = g_eli_orphan;
1030 	gp->resize = g_eli_resize;
1031 	gp->dumpconf = g_eli_dumpconf;
1032 	/*
1033 	 * If detach-on-last-close feature is not enabled and we don't operate
1034 	 * on read-only provider, we can simply use g_std_access().
1035 	 */
1036 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
1037 		gp->access = g_eli_access;
1038 	else
1039 		gp->access = g_std_access;
1040 
1041 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
1042 	sc->sc_nkey = nkey;
1043 
1044 	gp->softc = sc;
1045 	sc->sc_geom = gp;
1046 
1047 	bioq_init(&sc->sc_queue);
1048 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
1049 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
1050 
1051 	pp = NULL;
1052 	cp = g_new_consumer(gp);
1053 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1054 	error = g_attach(cp, bpp);
1055 	if (error != 0) {
1056 		if (req != NULL) {
1057 			gctl_error(req, "Cannot attach to %s (error=%d).",
1058 			    bpp->name, error);
1059 		} else {
1060 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
1061 			    bpp->name, error);
1062 		}
1063 		goto failed;
1064 	}
1065 	/*
1066 	 * Keep provider open all the time, so we can run critical tasks,
1067 	 * like Master Keys deletion, without wondering if we can open
1068 	 * provider or not.
1069 	 * We don't open provider for writing only when user requested read-only
1070 	 * access.
1071 	 */
1072 	dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1;
1073 	error = g_access(cp, 1, dcw, 1);
1074 	if (error != 0) {
1075 		if (req != NULL) {
1076 			gctl_error(req, "Cannot access %s (error=%d).",
1077 			    bpp->name, error);
1078 		} else {
1079 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
1080 			    bpp->name, error);
1081 		}
1082 		goto failed;
1083 	}
1084 
1085 	/*
1086 	 * Remember the keys in our softc structure.
1087 	 */
1088 	g_eli_mkey_propagate(sc, mkey);
1089 
1090 	LIST_INIT(&sc->sc_workers);
1091 
1092 	threads = g_eli_threads;
1093 	if (threads == 0)
1094 		threads = mp_ncpus;
1095 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
1096 	g_eli_init_uma();
1097 	for (i = 0; i < threads; i++) {
1098 		if (g_eli_cpu_is_disabled(i)) {
1099 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
1100 			    bpp->name, i);
1101 			continue;
1102 		}
1103 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
1104 		wr->w_softc = sc;
1105 		wr->w_number = i;
1106 		wr->w_active = TRUE;
1107 
1108 		error = g_eli_newsession(wr);
1109 		if (error != 0) {
1110 			free(wr, M_ELI);
1111 			if (req != NULL) {
1112 				gctl_error(req, "Cannot set up crypto session "
1113 				    "for %s (error=%d).", bpp->name, error);
1114 			} else {
1115 				G_ELI_DEBUG(1, "Cannot set up crypto session "
1116 				    "for %s (error=%d).", bpp->name, error);
1117 			}
1118 			goto failed;
1119 		}
1120 
1121 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
1122 		    "g_eli[%u] %s", i, bpp->name);
1123 		if (error != 0) {
1124 			g_eli_freesession(wr);
1125 			free(wr, M_ELI);
1126 			if (req != NULL) {
1127 				gctl_error(req, "Cannot create kernel thread "
1128 				    "for %s (error=%d).", bpp->name, error);
1129 			} else {
1130 				G_ELI_DEBUG(1, "Cannot create kernel thread "
1131 				    "for %s (error=%d).", bpp->name, error);
1132 			}
1133 			goto failed;
1134 		}
1135 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
1136 	}
1137 
1138 	/*
1139 	 * Create decrypted provider.
1140 	 */
1141 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
1142 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
1143 	if (g_eli_unmapped_io && CRYPTO_HAS_VMPAGE) {
1144 		/*
1145 		 * On DMAP architectures we can use unmapped I/O.  But don't
1146 		 * use it with data integrity verification.  That code hasn't
1147 		 * been written yet.
1148 		 */
1149 		 if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0)
1150 			pp->flags |= G_PF_ACCEPT_UNMAPPED;
1151 	}
1152 	pp->mediasize = sc->sc_mediasize;
1153 	pp->sectorsize = sc->sc_sectorsize;
1154 	LIST_FOREACH(gap, &bpp->aliases, ga_next)
1155 		g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX);
1156 
1157 	g_error_provider(pp, 0);
1158 
1159 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
1160 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
1161 	    sc->sc_ekeylen);
1162 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
1163 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
1164 	G_ELI_DEBUG(0, "    Crypto: %s",
1165 	    sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" :
1166 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
1167 	return (gp);
1168 failed:
1169 	mtx_lock(&sc->sc_queue_mtx);
1170 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1171 	wakeup(sc);
1172 	/*
1173 	 * Wait for kernel threads self destruction.
1174 	 */
1175 	while (!LIST_EMPTY(&sc->sc_workers)) {
1176 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1177 		    "geli:destroy", 0);
1178 	}
1179 	mtx_destroy(&sc->sc_queue_mtx);
1180 	if (cp->provider != NULL) {
1181 		if (cp->acr == 1)
1182 			g_access(cp, -1, -dcw, -1);
1183 		g_detach(cp);
1184 	}
1185 	g_destroy_consumer(cp);
1186 	g_destroy_geom(gp);
1187 	g_eli_key_destroy(sc);
1188 	g_eli_fini_uma();
1189 	zfree(sc, M_ELI);
1190 	return (NULL);
1191 }
1192 
1193 int
1194 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
1195 {
1196 	struct g_geom *gp;
1197 	struct g_provider *pp;
1198 
1199 	g_topology_assert();
1200 
1201 	if (sc == NULL)
1202 		return (ENXIO);
1203 
1204 	gp = sc->sc_geom;
1205 	pp = LIST_FIRST(&gp->provider);
1206 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
1207 		if (force) {
1208 			G_ELI_DEBUG(1, "Device %s is still open, so it "
1209 			    "cannot be definitely removed.", pp->name);
1210 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1211 			gp->access = g_eli_access;
1212 			g_wither_provider(pp, ENXIO);
1213 			return (EBUSY);
1214 		} else {
1215 			G_ELI_DEBUG(1,
1216 			    "Device %s is still open (r%dw%de%d).", pp->name,
1217 			    pp->acr, pp->acw, pp->ace);
1218 			return (EBUSY);
1219 		}
1220 	}
1221 
1222 	mtx_lock(&sc->sc_queue_mtx);
1223 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1224 	wakeup(sc);
1225 	while (!LIST_EMPTY(&sc->sc_workers)) {
1226 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1227 		    "geli:destroy", 0);
1228 	}
1229 	mtx_destroy(&sc->sc_queue_mtx);
1230 	gp->softc = NULL;
1231 	g_eli_key_destroy(sc);
1232 	g_eli_fini_uma();
1233 	zfree(sc, M_ELI);
1234 
1235 	G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
1236 	g_wither_geom_close(gp, ENXIO);
1237 
1238 	return (0);
1239 }
1240 
1241 static int
1242 g_eli_destroy_geom(struct gctl_req *req __unused,
1243     struct g_class *mp __unused, struct g_geom *gp)
1244 {
1245 	struct g_eli_softc *sc;
1246 
1247 	sc = gp->softc;
1248 	return (g_eli_destroy(sc, FALSE));
1249 }
1250 
1251 static int
1252 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
1253 {
1254 	u_char *keyfile, *data;
1255 	char *file, name[64];
1256 	size_t size;
1257 	int i;
1258 
1259 	for (i = 0; ; i++) {
1260 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1261 		keyfile = preload_search_by_type(name);
1262 		if (keyfile == NULL && i == 0) {
1263 			/*
1264 			 * If there is only one keyfile, allow simpler name.
1265 			 */
1266 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
1267 			keyfile = preload_search_by_type(name);
1268 		}
1269 		if (keyfile == NULL)
1270 			return (i);	/* Return number of loaded keyfiles. */
1271 		data = preload_fetch_addr(keyfile);
1272 		if (data == NULL) {
1273 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
1274 			    name);
1275 			return (0);
1276 		}
1277 		size = preload_fetch_size(keyfile);
1278 		if (size == 0) {
1279 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
1280 			    name);
1281 			return (0);
1282 		}
1283 		file = preload_search_info(keyfile, MODINFO_NAME);
1284 		if (file == NULL) {
1285 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
1286 			    name);
1287 			return (0);
1288 		}
1289 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
1290 		    provider, name);
1291 		g_eli_crypto_hmac_update(ctx, data, size);
1292 	}
1293 }
1294 
1295 static void
1296 g_eli_keyfiles_clear(const char *provider)
1297 {
1298 	u_char *keyfile, *data;
1299 	char name[64];
1300 	size_t size;
1301 	int i;
1302 
1303 	for (i = 0; ; i++) {
1304 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1305 		keyfile = preload_search_by_type(name);
1306 		if (keyfile == NULL)
1307 			return;
1308 		data = preload_fetch_addr(keyfile);
1309 		size = preload_fetch_size(keyfile);
1310 		if (data != NULL && size != 0)
1311 			explicit_bzero(data, size);
1312 	}
1313 }
1314 
1315 /*
1316  * Tasting is only made on boot.
1317  * We detect providers which should be attached before root is mounted.
1318  */
1319 static struct g_geom *
1320 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
1321 {
1322 	struct g_eli_metadata md;
1323 	struct g_geom *gp;
1324 	struct hmac_ctx ctx;
1325 	char passphrase[256];
1326 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
1327 	u_int i, nkey, nkeyfiles, tries, showpass;
1328 	int error;
1329         struct keybuf *keybuf;
1330 
1331 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
1332 	g_topology_assert();
1333 
1334 	if (root_mounted() || g_eli_tries == 0)
1335 		return (NULL);
1336 
1337 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
1338 
1339 	error = g_eli_read_metadata(mp, pp, &md);
1340 	if (error != 0)
1341 		return (NULL);
1342 	gp = NULL;
1343 
1344 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
1345 		return (NULL);
1346 	if (md.md_version > G_ELI_VERSION) {
1347 		printf("geom_eli.ko module is too old to handle %s.\n",
1348 		    pp->name);
1349 		return (NULL);
1350 	}
1351 	if (md.md_provsize != pp->mediasize)
1352 		return (NULL);
1353 	/* Should we attach it on boot? */
1354 	if (!(md.md_flags & G_ELI_FLAG_BOOT) &&
1355 	    !(md.md_flags & G_ELI_FLAG_GELIBOOT))
1356 		return (NULL);
1357 	if (md.md_keys == 0x00) {
1358 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
1359 		return (NULL);
1360 	}
1361 	if (!eli_metadata_crypto_supported(&md)) {
1362 		G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n",
1363 		    pp->name);
1364 		return (NULL);
1365 	}
1366 	if (md.md_iterations == -1) {
1367 		/* If there is no passphrase, we try only once. */
1368 		tries = 1;
1369 	} else {
1370 		/* Ask for the passphrase no more than g_eli_tries times. */
1371 		tries = g_eli_tries;
1372 	}
1373 
1374         if ((keybuf = get_keybuf()) != NULL) {
1375                 /* Scan the key buffer, try all GELI keys. */
1376                 for (i = 0; i < keybuf->kb_nents; i++) {
1377                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
1378                                  memcpy(key, keybuf->kb_ents[i].ke_data,
1379                                      sizeof(key));
1380 
1381                                  if (g_eli_mkey_decrypt_any(&md, key,
1382                                      mkey, &nkey) == 0 ) {
1383                                          explicit_bzero(key, sizeof(key));
1384                                          goto have_key;
1385                                  }
1386                          }
1387                 }
1388         }
1389 
1390         for (i = 0; i <= tries; i++) {
1391                 g_eli_crypto_hmac_init(&ctx, NULL, 0);
1392 
1393                 /*
1394                  * Load all key files.
1395                  */
1396                 nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
1397 
1398                 if (nkeyfiles == 0 && md.md_iterations == -1) {
1399                         /*
1400                          * No key files and no passphrase, something is
1401                          * definitely wrong here.
1402                          * geli(8) doesn't allow for such situation, so assume
1403                          * that there was really no passphrase and in that case
1404                          * key files are no properly defined in loader.conf.
1405                          */
1406                         G_ELI_DEBUG(0,
1407                             "Found no key files in loader.conf for %s.",
1408                             pp->name);
1409                         return (NULL);
1410                 }
1411 
1412                 /* Ask for the passphrase if defined. */
1413                 if (md.md_iterations >= 0) {
1414                         /* Try first with cached passphrase. */
1415                         if (i == 0) {
1416                                 if (!g_eli_boot_passcache)
1417                                         continue;
1418                                 memcpy(passphrase, cached_passphrase,
1419                                     sizeof(passphrase));
1420                         } else {
1421                                 printf("Enter passphrase for %s: ", pp->name);
1422 				showpass = g_eli_visible_passphrase;
1423 				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
1424 					showpass = GETS_ECHOPASS;
1425                                 cngets(passphrase, sizeof(passphrase),
1426 				    showpass);
1427                                 memcpy(cached_passphrase, passphrase,
1428                                     sizeof(passphrase));
1429                         }
1430                 }
1431 
1432                 /*
1433                  * Prepare Derived-Key from the user passphrase.
1434                  */
1435                 if (md.md_iterations == 0) {
1436                         g_eli_crypto_hmac_update(&ctx, md.md_salt,
1437                             sizeof(md.md_salt));
1438                         g_eli_crypto_hmac_update(&ctx, passphrase,
1439                             strlen(passphrase));
1440                         explicit_bzero(passphrase, sizeof(passphrase));
1441                 } else if (md.md_iterations > 0) {
1442                         u_char dkey[G_ELI_USERKEYLEN];
1443 
1444                         pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
1445                             sizeof(md.md_salt), passphrase, md.md_iterations);
1446                         explicit_bzero(passphrase, sizeof(passphrase));
1447                         g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
1448                         explicit_bzero(dkey, sizeof(dkey));
1449                 }
1450 
1451                 g_eli_crypto_hmac_final(&ctx, key, 0);
1452 
1453                 /*
1454                  * Decrypt Master-Key.
1455                  */
1456                 error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
1457                 explicit_bzero(key, sizeof(key));
1458                 if (error == -1) {
1459                         if (i == tries) {
1460                                 G_ELI_DEBUG(0,
1461                                     "Wrong key for %s. No tries left.",
1462                                     pp->name);
1463                                 g_eli_keyfiles_clear(pp->name);
1464                                 return (NULL);
1465                         }
1466                         if (i > 0) {
1467                                 G_ELI_DEBUG(0,
1468                                     "Wrong key for %s. Tries left: %u.",
1469                                     pp->name, tries - i);
1470                         }
1471                         /* Try again. */
1472                         continue;
1473                 } else if (error > 0) {
1474                         G_ELI_DEBUG(0,
1475                             "Cannot decrypt Master Key for %s (error=%d).",
1476                             pp->name, error);
1477                         g_eli_keyfiles_clear(pp->name);
1478                         return (NULL);
1479                 }
1480                 g_eli_keyfiles_clear(pp->name);
1481                 G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
1482                 break;
1483         }
1484 have_key:
1485 
1486 	/*
1487 	 * We have correct key, let's attach provider.
1488 	 */
1489 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
1490 	explicit_bzero(mkey, sizeof(mkey));
1491 	explicit_bzero(&md, sizeof(md));
1492 	if (gp == NULL) {
1493 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
1494 		    G_ELI_SUFFIX);
1495 		return (NULL);
1496 	}
1497 	return (gp);
1498 }
1499 
1500 static void
1501 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1502     struct g_consumer *cp, struct g_provider *pp)
1503 {
1504 	struct g_eli_softc *sc;
1505 
1506 	g_topology_assert();
1507 	sc = gp->softc;
1508 	if (sc == NULL)
1509 		return;
1510 	if (pp != NULL || cp != NULL)
1511 		return;	/* Nothing here. */
1512 
1513 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
1514 	    (uintmax_t)sc->sc_ekeys_total);
1515 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
1516 	    (uintmax_t)sc->sc_ekeys_allocated);
1517 	sbuf_printf(sb, "%s<Flags>", indent);
1518 	if (sc->sc_flags == 0)
1519 		sbuf_cat(sb, "NONE");
1520 	else {
1521 		int first = 1;
1522 
1523 #define ADD_FLAG(flag, name)	do {					\
1524 	if (sc->sc_flags & (flag)) {					\
1525 		if (!first)						\
1526 			sbuf_cat(sb, ", ");				\
1527 		else							\
1528 			first = 0;					\
1529 		sbuf_cat(sb, name);					\
1530 	}								\
1531 } while (0)
1532 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
1533 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
1534 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
1535 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
1536 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
1537 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
1538 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
1539 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
1540 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
1541 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
1542 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
1543 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
1544 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
1545 		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
1546 		ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE");
1547 #undef  ADD_FLAG
1548 	}
1549 	sbuf_cat(sb, "</Flags>\n");
1550 
1551 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
1552 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
1553 		    sc->sc_nkey);
1554 	}
1555 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
1556 	sbuf_printf(sb, "%s<Crypto>", indent);
1557 	switch (sc->sc_crypto) {
1558 	case G_ELI_CRYPTO_HW:
1559 		sbuf_cat(sb, "hardware");
1560 		break;
1561 	case G_ELI_CRYPTO_SW:
1562 		sbuf_cat(sb, "software");
1563 		break;
1564 	case G_ELI_CRYPTO_SW_ACCEL:
1565 		sbuf_cat(sb, "accelerated software");
1566 		break;
1567 	default:
1568 		sbuf_cat(sb, "UNKNOWN");
1569 		break;
1570 	}
1571 	sbuf_cat(sb, "</Crypto>\n");
1572 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
1573 		sbuf_printf(sb,
1574 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
1575 		    indent, g_eli_algo2str(sc->sc_aalgo));
1576 	}
1577 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
1578 	    sc->sc_ekeylen);
1579 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
1580 	    indent, g_eli_algo2str(sc->sc_ealgo));
1581 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
1582 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
1583 }
1584 
1585 static void
1586 g_eli_shutdown_pre_sync(void *arg, int howto)
1587 {
1588 	struct g_class *mp;
1589 	struct g_geom *gp, *gp2;
1590 	struct g_provider *pp;
1591 	struct g_eli_softc *sc;
1592 
1593 	mp = arg;
1594 	g_topology_lock();
1595 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
1596 		sc = gp->softc;
1597 		if (sc == NULL)
1598 			continue;
1599 		pp = LIST_FIRST(&gp->provider);
1600 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
1601 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 ||
1602 		    SCHEDULER_STOPPED())
1603 		{
1604 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1605 			gp->access = g_eli_access;
1606 		} else {
1607 			(void) g_eli_destroy(sc, TRUE);
1608 		}
1609 	}
1610 	g_topology_unlock();
1611 }
1612 
1613 static void
1614 g_eli_init(struct g_class *mp)
1615 {
1616 
1617 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1618 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
1619 	if (g_eli_pre_sync == NULL)
1620 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
1621 }
1622 
1623 static void
1624 g_eli_fini(struct g_class *mp)
1625 {
1626 
1627 	if (g_eli_pre_sync != NULL)
1628 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
1629 }
1630 
1631 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
1632 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
1633 MODULE_VERSION(geom_eli, 0);
1634