xref: /freebsd/sys/geom/eli/g_eli.c (revision 0d4ad64077bcddcff5a170ee97273db95b9cab55)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cons.h>
32 #include <sys/kenv.h>
33 #include <sys/kernel.h>
34 #include <sys/linker.h>
35 #include <sys/module.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/bio.h>
39 #include <sys/sbuf.h>
40 #include <sys/sysctl.h>
41 #include <sys/malloc.h>
42 #include <sys/eventhandler.h>
43 #include <sys/kthread.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/uio.h>
48 #include <sys/vnode.h>
49 
50 #include <machine/vmparam.h>
51 
52 #include <vm/uma.h>
53 #include <vm/vm.h>
54 #include <vm/swap_pager.h>
55 
56 #include <geom/geom.h>
57 #include <geom/geom_dbg.h>
58 #include <geom/eli/g_eli.h>
59 #include <geom/eli/pkcs5v2.h>
60 
61 #include <crypto/intake.h>
62 
63 FEATURE(geom_eli, "GEOM crypto module");
64 
65 MALLOC_DEFINE(M_ELI, "eli_data", "GEOM_ELI Data");
66 
67 SYSCTL_DECL(_kern_geom);
68 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
69     "GEOM_ELI stuff");
70 static int g_eli_version = G_ELI_VERSION;
71 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
72     "GELI version");
73 int g_eli_debug = 0;
74 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
75     "Debug level");
76 static u_int g_eli_tries = 3;
77 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
78     "Number of tries for entering the passphrase");
79 static u_int g_eli_visible_passphrase = GETS_NOECHO;
80 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
81     &g_eli_visible_passphrase, 0,
82     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
83 u_int g_eli_overwrites = G_ELI_OVERWRITES;
84 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
85     0, "Number of times on-disk keys should be overwritten when destroying them");
86 static u_int g_eli_threads = 0;
87 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
88     "Number of threads doing crypto work");
89 u_int g_eli_batch = 0;
90 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
91     "Use crypto operations batching");
92 static u_int g_eli_minbufs = 16;
93 static int sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS);
94 SYSCTL_PROC(_kern_geom_eli, OID_AUTO, minbufs, CTLTYPE_UINT | CTLFLAG_RW |
95     CTLFLAG_MPSAFE, NULL, 0, sysctl_g_eli_minbufs, "IU",
96     "Number of GELI bufs reserved for swap transactions");
97 static bool g_eli_blocking_malloc = false;
98 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, blocking_malloc, CTLFLAG_RWTUN,
99     &g_eli_blocking_malloc, 0, "Use blocking malloc calls for GELI buffers");
100 static bool g_eli_unmapped_io = true;
101 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, unmapped_io, CTLFLAG_RDTUN,
102     &g_eli_unmapped_io, 0, "Enable support for unmapped I/O");
103 static int g_eli_alloc_sz;
104 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, use_uma_bytes, CTLFLAG_RD,
105     &g_eli_alloc_sz, 0, "Use uma(9) for allocations of this size or smaller.");
106 
107 static struct sx g_eli_umalock;	/* Controls changes to UMA zone. */
108 SX_SYSINIT(g_eli_umalock, &g_eli_umalock, "GELI UMA");
109 static uma_zone_t g_eli_uma = NULL;
110 static volatile int g_eli_umaoutstanding;
111 static volatile int g_eli_devs;
112 
113 /*
114  * Control the number of reserved entries in the GELI zone.
115  * If the GELI zone has already been allocated, update the zone. Otherwise,
116  * simply update the variable for use the next time the zone is created.
117  */
118 static int
119 sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS)
120 {
121 	int error;
122 	u_int new;
123 
124 	new = g_eli_minbufs;
125 	error = sysctl_handle_int(oidp, &new, 0, req);
126 	if (error != 0 || req->newptr == NULL)
127 		return (error);
128 	sx_xlock(&g_eli_umalock);
129 	if (g_eli_uma != NULL) {
130 		if (new != g_eli_minbufs)
131 			uma_zone_reserve(g_eli_uma, new);
132 		if (new > g_eli_minbufs)
133 			uma_prealloc(g_eli_uma, new - g_eli_minbufs);
134 	}
135 	if (new != g_eli_minbufs)
136 		g_eli_minbufs = new;
137 	sx_xunlock(&g_eli_umalock);
138 	return (0);
139 }
140 
141 /*
142  * Passphrase cached during boot, in order to be more user-friendly if
143  * there are multiple providers using the same passphrase.
144  */
145 static char cached_passphrase[256];
146 static u_int g_eli_boot_passcache = 1;
147 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
148 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
149     &g_eli_boot_passcache, 0,
150     "Passphrases are cached during boot process for possible reuse");
151 static void
152 fetch_loader_passphrase(void * dummy)
153 {
154 	char * env_passphrase;
155 
156 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
157 
158 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
159 		/* Extract passphrase from the environment. */
160 		strlcpy(cached_passphrase, env_passphrase,
161 		    sizeof(cached_passphrase));
162 		freeenv(env_passphrase);
163 
164 		/* Wipe the passphrase from the environment. */
165 		kern_unsetenv("kern.geom.eli.passphrase");
166 	}
167 }
168 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
169     fetch_loader_passphrase, NULL);
170 
171 static void
172 zero_boot_passcache(void)
173 {
174 
175         explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
176 }
177 
178 static void
179 zero_geli_intake_keys(void)
180 {
181         struct keybuf *keybuf;
182         int i;
183 
184         if ((keybuf = get_keybuf()) != NULL) {
185                 /* Scan the key buffer, clear all GELI keys. */
186                 for (i = 0; i < keybuf->kb_nents; i++) {
187                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
188                                  explicit_bzero(keybuf->kb_ents[i].ke_data,
189                                      sizeof(keybuf->kb_ents[i].ke_data));
190                                  keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
191                          }
192                 }
193         }
194 }
195 
196 static void
197 zero_intake_passcache(void *dummy)
198 {
199         zero_boot_passcache();
200         zero_geli_intake_keys();
201 }
202 EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
203 
204 static eventhandler_tag g_eli_pre_sync = NULL;
205 
206 static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
207     off_t offset, struct g_eli_metadata *md);
208 
209 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
210     struct g_geom *gp);
211 static void g_eli_init(struct g_class *mp);
212 static void g_eli_fini(struct g_class *mp);
213 
214 static g_taste_t g_eli_taste;
215 static g_dumpconf_t g_eli_dumpconf;
216 
217 struct g_class g_eli_class = {
218 	.name = G_ELI_CLASS_NAME,
219 	.version = G_VERSION,
220 	.ctlreq = g_eli_config,
221 	.taste = g_eli_taste,
222 	.destroy_geom = g_eli_destroy_geom,
223 	.init = g_eli_init,
224 	.fini = g_eli_fini
225 };
226 
227 /*
228  * Code paths:
229  * BIO_READ:
230  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
231  * BIO_WRITE:
232  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
233  */
234 
235 /*
236  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
237  * accelerator or something like this.
238  * The function updates the SID and rerun the operation.
239  */
240 int
241 g_eli_crypto_rerun(struct cryptop *crp)
242 {
243 	struct g_eli_softc *sc;
244 	struct g_eli_worker *wr;
245 	struct bio *bp;
246 	int error;
247 
248 	bp = (struct bio *)crp->crp_opaque;
249 	sc = bp->bio_to->geom->softc;
250 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
251 		if (wr->w_number == G_ELI_WORKER(bp->bio_pflags))
252 			break;
253 	}
254 	KASSERT(wr != NULL, ("Invalid worker (%u).",
255 	    G_ELI_WORKER(bp->bio_pflags)));
256 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).",
257 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid,
258 	    crp->crp_session);
259 	wr->w_sid = crp->crp_session;
260 	crp->crp_etype = 0;
261 	error = crypto_dispatch(crp);
262 	if (error == 0)
263 		return (0);
264 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
265 	crp->crp_etype = error;
266 	return (error);
267 }
268 
269 static void
270 g_eli_getattr_done(struct bio *bp)
271 {
272 	if (bp->bio_error == 0 &&
273 	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
274 		strlcat(bp->bio_data, "/eli", bp->bio_length);
275 	}
276 	g_std_done(bp);
277 }
278 
279 /*
280  * The function is called afer reading encrypted data from the provider.
281  *
282  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
283  */
284 void
285 g_eli_read_done(struct bio *bp)
286 {
287 	struct g_eli_softc *sc;
288 	struct bio *pbp;
289 
290 	G_ELI_LOGREQ(2, bp, "Request done.");
291 	pbp = bp->bio_parent;
292 	if (pbp->bio_error == 0 && bp->bio_error != 0)
293 		pbp->bio_error = bp->bio_error;
294 	g_destroy_bio(bp);
295 	/*
296 	 * Do we have all sectors already?
297 	 */
298 	pbp->bio_inbed++;
299 	if (pbp->bio_inbed < pbp->bio_children)
300 		return;
301 	sc = pbp->bio_to->geom->softc;
302 	if (pbp->bio_error != 0) {
303 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
304 		    pbp->bio_error);
305 		pbp->bio_completed = 0;
306 		g_eli_free_data(pbp);
307 		g_io_deliver(pbp, pbp->bio_error);
308 		if (sc != NULL)
309 			atomic_subtract_int(&sc->sc_inflight, 1);
310 		return;
311 	}
312 	mtx_lock(&sc->sc_queue_mtx);
313 	bioq_insert_tail(&sc->sc_queue, pbp);
314 	mtx_unlock(&sc->sc_queue_mtx);
315 	wakeup(sc);
316 }
317 
318 /*
319  * The function is called after we encrypt and write data.
320  *
321  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
322  */
323 void
324 g_eli_write_done(struct bio *bp)
325 {
326 	struct g_eli_softc *sc;
327 	struct bio *pbp;
328 
329 	G_ELI_LOGREQ(2, bp, "Request done.");
330 	pbp = bp->bio_parent;
331 	if (pbp->bio_error == 0 && bp->bio_error != 0)
332 		pbp->bio_error = bp->bio_error;
333 	g_destroy_bio(bp);
334 	/*
335 	 * Do we have all sectors already?
336 	 */
337 	pbp->bio_inbed++;
338 	if (pbp->bio_inbed < pbp->bio_children)
339 		return;
340 	sc = pbp->bio_to->geom->softc;
341 	g_eli_free_data(pbp);
342 	if (pbp->bio_error != 0) {
343 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
344 		    pbp->bio_error);
345 		pbp->bio_completed = 0;
346 	} else
347 		pbp->bio_completed = pbp->bio_length;
348 
349 	/*
350 	 * Write is finished, send it up.
351 	 */
352 	g_io_deliver(pbp, pbp->bio_error);
353 	if (sc != NULL)
354 		atomic_subtract_int(&sc->sc_inflight, 1);
355 }
356 
357 /*
358  * This function should never be called, but GEOM made as it set ->orphan()
359  * method for every geom.
360  */
361 static void
362 g_eli_orphan_spoil_assert(struct g_consumer *cp)
363 {
364 
365 	panic("Function %s() called for %s.", __func__, cp->geom->name);
366 }
367 
368 static void
369 g_eli_orphan(struct g_consumer *cp)
370 {
371 	struct g_eli_softc *sc;
372 
373 	g_topology_assert();
374 	sc = cp->geom->softc;
375 	if (sc == NULL)
376 		return;
377 	g_eli_destroy(sc, TRUE);
378 }
379 
380 static void
381 g_eli_resize(struct g_consumer *cp)
382 {
383 	struct g_eli_softc *sc;
384 	struct g_provider *epp, *pp;
385 	off_t oldsize;
386 
387 	g_topology_assert();
388 	sc = cp->geom->softc;
389 	if (sc == NULL)
390 		return;
391 
392 	if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) {
393 		G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.",
394 		    (intmax_t)sc->sc_provsize);
395 		return;
396 	}
397 
398 	pp = cp->provider;
399 
400 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
401 		struct g_eli_metadata md;
402 		u_char *sector;
403 		int error;
404 
405 		sector = NULL;
406 
407 		error = g_eli_read_metadata_offset(cp->geom->class, pp,
408 		    sc->sc_provsize - pp->sectorsize, &md);
409 		if (error != 0) {
410 			G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).",
411 			    pp->name, error);
412 			goto iofail;
413 		}
414 
415 		md.md_provsize = pp->mediasize;
416 
417 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
418 		eli_metadata_encode(&md, sector);
419 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
420 		    pp->sectorsize);
421 		if (error != 0) {
422 			G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).",
423 			    pp->name, error);
424 			goto iofail;
425 		}
426 		explicit_bzero(sector, pp->sectorsize);
427 		error = g_write_data(cp, sc->sc_provsize - pp->sectorsize,
428 		    sector, pp->sectorsize);
429 		if (error != 0) {
430 			G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).",
431 			    pp->name, error);
432 			goto iofail;
433 		}
434 iofail:
435 		explicit_bzero(&md, sizeof(md));
436 		zfree(sector, M_ELI);
437 	}
438 
439 	oldsize = sc->sc_mediasize;
440 	sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize);
441 	g_eli_key_resize(sc);
442 	sc->sc_provsize = pp->mediasize;
443 
444 	epp = LIST_FIRST(&sc->sc_geom->provider);
445 	g_resize_provider(epp, sc->sc_mediasize);
446 	G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name,
447 	    (intmax_t)oldsize, (intmax_t)sc->sc_mediasize);
448 }
449 
450 /*
451  * BIO_READ:
452  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
453  * BIO_WRITE:
454  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
455  */
456 static void
457 g_eli_start(struct bio *bp)
458 {
459 	struct g_eli_softc *sc;
460 	struct g_consumer *cp;
461 	struct bio *cbp;
462 
463 	sc = bp->bio_to->geom->softc;
464 	KASSERT(sc != NULL,
465 	    ("Provider's error should be set (error=%d)(device=%s).",
466 	    bp->bio_to->error, bp->bio_to->name));
467 	G_ELI_LOGREQ(2, bp, "Request received.");
468 
469 	switch (bp->bio_cmd) {
470 	case BIO_READ:
471 	case BIO_WRITE:
472 	case BIO_GETATTR:
473 	case BIO_FLUSH:
474 	case BIO_ZONE:
475 	case BIO_SPEEDUP:
476 		break;
477 	case BIO_DELETE:
478 		/*
479 		 * If the user hasn't set the NODELETE flag, we just pass
480 		 * it down the stack and let the layers beneath us do (or
481 		 * not) whatever they do with it.  If they have, we
482 		 * reject it.  A possible extension would be an
483 		 * additional flag to take it as a hint to shred the data
484 		 * with [multiple?] overwrites.
485 		 */
486 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
487 			break;
488 	default:
489 		g_io_deliver(bp, EOPNOTSUPP);
490 		return;
491 	}
492 	cbp = g_clone_bio(bp);
493 	if (cbp == NULL) {
494 		g_io_deliver(bp, ENOMEM);
495 		return;
496 	}
497 	bp->bio_driver1 = cbp;
498 	bp->bio_pflags = 0;
499 	G_ELI_SET_NEW_BIO(bp->bio_pflags);
500 	switch (bp->bio_cmd) {
501 	case BIO_READ:
502 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
503 			g_eli_crypto_read(sc, bp, 0);
504 			break;
505 		}
506 		/* FALLTHROUGH */
507 	case BIO_WRITE:
508 		mtx_lock(&sc->sc_queue_mtx);
509 		bioq_insert_tail(&sc->sc_queue, bp);
510 		mtx_unlock(&sc->sc_queue_mtx);
511 		wakeup(sc);
512 		break;
513 	case BIO_GETATTR:
514 	case BIO_FLUSH:
515 	case BIO_DELETE:
516 	case BIO_SPEEDUP:
517 	case BIO_ZONE:
518 		if (bp->bio_cmd == BIO_GETATTR)
519 			cbp->bio_done = g_eli_getattr_done;
520 		else
521 			cbp->bio_done = g_std_done;
522 		cp = LIST_FIRST(&sc->sc_geom->consumer);
523 		cbp->bio_to = cp->provider;
524 		G_ELI_LOGREQ(2, cbp, "Sending request.");
525 		g_io_request(cbp, cp);
526 		break;
527 	}
528 }
529 
530 static int
531 g_eli_newsession(struct g_eli_worker *wr)
532 {
533 	struct g_eli_softc *sc;
534 	struct crypto_session_params csp;
535 	uint32_t caps;
536 	int error, new_crypto;
537 	void *key;
538 
539 	sc = wr->w_softc;
540 
541 	memset(&csp, 0, sizeof(csp));
542 	csp.csp_mode = CSP_MODE_CIPHER;
543 	csp.csp_cipher_alg = sc->sc_ealgo;
544 	csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo);
545 	csp.csp_cipher_klen = sc->sc_ekeylen / 8;
546 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
547 		csp.csp_cipher_klen <<= 1;
548 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
549 		key = g_eli_key_hold(sc, 0,
550 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
551 		csp.csp_cipher_key = key;
552 	} else {
553 		key = NULL;
554 		csp.csp_cipher_key = sc->sc_ekey;
555 	}
556 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
557 		csp.csp_mode = CSP_MODE_ETA;
558 		csp.csp_auth_alg = sc->sc_aalgo;
559 		csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN;
560 	}
561 
562 	switch (sc->sc_crypto) {
563 	case G_ELI_CRYPTO_SW_ACCEL:
564 	case G_ELI_CRYPTO_SW:
565 		error = crypto_newsession(&wr->w_sid, &csp,
566 		    CRYPTOCAP_F_SOFTWARE);
567 		break;
568 	case G_ELI_CRYPTO_HW:
569 		error = crypto_newsession(&wr->w_sid, &csp,
570 		    CRYPTOCAP_F_HARDWARE);
571 		break;
572 	case G_ELI_CRYPTO_UNKNOWN:
573 		error = crypto_newsession(&wr->w_sid, &csp,
574 		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
575 		if (error == 0) {
576 			caps = crypto_ses2caps(wr->w_sid);
577 			if (caps & CRYPTOCAP_F_HARDWARE)
578 				new_crypto = G_ELI_CRYPTO_HW;
579 			else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE)
580 				new_crypto = G_ELI_CRYPTO_SW_ACCEL;
581 			else
582 				new_crypto = G_ELI_CRYPTO_SW;
583 			mtx_lock(&sc->sc_queue_mtx);
584 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
585 				sc->sc_crypto = new_crypto;
586 			mtx_unlock(&sc->sc_queue_mtx);
587 		}
588 		break;
589 	default:
590 		panic("%s: invalid condition", __func__);
591 	}
592 
593 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
594 		if (error)
595 			g_eli_key_drop(sc, key);
596 		else
597 			wr->w_first_key = key;
598 	}
599 
600 	return (error);
601 }
602 
603 static void
604 g_eli_freesession(struct g_eli_worker *wr)
605 {
606 	struct g_eli_softc *sc;
607 
608 	crypto_freesession(wr->w_sid);
609 	if (wr->w_first_key != NULL) {
610 		sc = wr->w_softc;
611 		g_eli_key_drop(sc, wr->w_first_key);
612 		wr->w_first_key = NULL;
613 	}
614 }
615 
616 static void
617 g_eli_cancel(struct g_eli_softc *sc)
618 {
619 	struct bio *bp;
620 
621 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
622 
623 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
624 		KASSERT(G_ELI_IS_NEW_BIO(bp->bio_pflags),
625 		    ("Not new bio when canceling (bp=%p).", bp));
626 		g_io_deliver(bp, ENXIO);
627 	}
628 }
629 
630 static struct bio *
631 g_eli_takefirst(struct g_eli_softc *sc)
632 {
633 	struct bio *bp;
634 
635 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
636 
637 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
638 		return (bioq_takefirst(&sc->sc_queue));
639 	/*
640 	 * Device suspended, so we skip new I/O requests.
641 	 */
642 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
643 		if (!G_ELI_IS_NEW_BIO(bp->bio_pflags))
644 			break;
645 	}
646 	if (bp != NULL)
647 		bioq_remove(&sc->sc_queue, bp);
648 	return (bp);
649 }
650 
651 /*
652  * This is the main function for kernel worker thread when we don't have
653  * hardware acceleration and we have to do cryptography in software.
654  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
655  * threads with crypto work.
656  */
657 static void
658 g_eli_worker(void *arg)
659 {
660 	struct g_eli_softc *sc;
661 	struct g_eli_worker *wr;
662 	struct bio *bp;
663 	int error __diagused;
664 
665 	wr = arg;
666 	sc = wr->w_softc;
667 #ifdef EARLY_AP_STARTUP
668 	MPASS(!sc->sc_cpubind || smp_started);
669 #elif defined(SMP)
670 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
671 	if (sc->sc_cpubind) {
672 		while (!smp_started)
673 			tsleep(wr, 0, "geli:smp", hz / 4);
674 	}
675 #endif
676 	thread_lock(curthread);
677 	sched_prio(curthread, PUSER);
678 	if (sc->sc_cpubind)
679 		sched_bind(curthread, wr->w_number % mp_ncpus);
680 	thread_unlock(curthread);
681 
682 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
683 
684 	for (;;) {
685 		mtx_lock(&sc->sc_queue_mtx);
686 again:
687 		bp = g_eli_takefirst(sc);
688 		if (bp == NULL) {
689 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
690 				g_eli_cancel(sc);
691 				LIST_REMOVE(wr, w_next);
692 				g_eli_freesession(wr);
693 				free(wr, M_ELI);
694 				G_ELI_DEBUG(1, "Thread %s exiting.",
695 				    curthread->td_proc->p_comm);
696 				wakeup(&sc->sc_workers);
697 				mtx_unlock(&sc->sc_queue_mtx);
698 				kproc_exit(0);
699 			}
700 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
701 				if (sc->sc_inflight > 0) {
702 					G_ELI_DEBUG(0, "inflight=%d",
703 					    sc->sc_inflight);
704 					/*
705 					 * We still have inflight BIOs, so
706 					 * sleep and retry.
707 					 */
708 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
709 					    "geli:inf", hz / 5);
710 					goto again;
711 				}
712 				/*
713 				 * Suspend requested, mark the worker as
714 				 * suspended and go to sleep.
715 				 */
716 				if (wr->w_active) {
717 					g_eli_freesession(wr);
718 					wr->w_active = FALSE;
719 				}
720 				wakeup(&sc->sc_workers);
721 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
722 				    "geli:suspend", 0);
723 				if (!wr->w_active &&
724 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
725 					error = g_eli_newsession(wr);
726 					KASSERT(error == 0,
727 					    ("g_eli_newsession() failed on resume (error=%d)",
728 					    error));
729 					wr->w_active = TRUE;
730 				}
731 				goto again;
732 			}
733 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
734 			continue;
735 		}
736 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags))
737 			atomic_add_int(&sc->sc_inflight, 1);
738 		mtx_unlock(&sc->sc_queue_mtx);
739 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags)) {
740 			G_ELI_SETWORKER(bp->bio_pflags, 0);
741 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
742 				if (bp->bio_cmd == BIO_READ)
743 					g_eli_auth_read(sc, bp);
744 				else
745 					g_eli_auth_run(wr, bp);
746 			} else {
747 				if (bp->bio_cmd == BIO_READ)
748 					g_eli_crypto_read(sc, bp, 1);
749 				else
750 					g_eli_crypto_run(wr, bp);
751 			}
752 		} else {
753 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
754 				g_eli_auth_run(wr, bp);
755 			else
756 				g_eli_crypto_run(wr, bp);
757 		}
758 	}
759 }
760 
761 static int
762 g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
763     off_t offset, struct g_eli_metadata *md)
764 {
765 	struct g_geom *gp;
766 	struct g_consumer *cp;
767 	u_char *buf = NULL;
768 	int error;
769 
770 	g_topology_assert();
771 
772 	gp = g_new_geomf(mp, "eli:taste");
773 	gp->start = g_eli_start;
774 	gp->access = g_std_access;
775 	/*
776 	 * g_eli_read_metadata() is always called from the event thread.
777 	 * Our geom is created and destroyed in the same event, so there
778 	 * could be no orphan nor spoil event in the meantime.
779 	 */
780 	gp->orphan = g_eli_orphan_spoil_assert;
781 	gp->spoiled = g_eli_orphan_spoil_assert;
782 	cp = g_new_consumer(gp);
783 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
784 	error = g_attach(cp, pp);
785 	if (error != 0)
786 		goto end;
787 	error = g_access(cp, 1, 0, 0);
788 	if (error != 0)
789 		goto end;
790 	g_topology_unlock();
791 	buf = g_read_data(cp, offset, pp->sectorsize, &error);
792 	g_topology_lock();
793 	if (buf == NULL)
794 		goto end;
795 	error = eli_metadata_decode(buf, md);
796 	if (error != 0)
797 		goto end;
798 	/* Metadata was read and decoded successfully. */
799 end:
800 	g_free(buf);
801 	if (cp->provider != NULL) {
802 		if (cp->acr == 1)
803 			g_access(cp, -1, 0, 0);
804 		g_detach(cp);
805 	}
806 	g_destroy_consumer(cp);
807 	g_destroy_geom(gp);
808 	return (error);
809 }
810 
811 int
812 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
813     struct g_eli_metadata *md)
814 {
815 
816 	return (g_eli_read_metadata_offset(mp, pp,
817 	    pp->mediasize - pp->sectorsize, md));
818 }
819 
820 /*
821  * The function is called when we had last close on provider and user requested
822  * to close it when this situation occur.
823  */
824 static void
825 g_eli_last_close(void *arg, int flags __unused)
826 {
827 	struct g_geom *gp;
828 	char gpname[64];
829 	int error __diagused;
830 
831 	g_topology_assert();
832 	gp = arg;
833 	strlcpy(gpname, gp->name, sizeof(gpname));
834 	error = g_eli_destroy(gp->softc, TRUE);
835 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
836 	    gpname, error));
837 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
838 }
839 
840 int
841 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
842 {
843 	struct g_eli_softc *sc;
844 	struct g_geom *gp;
845 
846 	gp = pp->geom;
847 	sc = gp->softc;
848 
849 	if (dw > 0) {
850 		if (sc->sc_flags & G_ELI_FLAG_RO) {
851 			/* Deny write attempts. */
852 			return (EROFS);
853 		}
854 		/* Someone is opening us for write, we need to remember that. */
855 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
856 		return (0);
857 	}
858 	/* Is this the last close? */
859 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
860 		return (0);
861 
862 	/*
863 	 * Automatically detach on last close if requested.
864 	 */
865 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
866 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
867 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
868 	}
869 	return (0);
870 }
871 
872 static int
873 g_eli_cpu_is_disabled(int cpu)
874 {
875 #ifdef SMP
876 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
877 #else
878 	return (0);
879 #endif
880 }
881 
882 static void
883 g_eli_init_uma(void)
884 {
885 
886 	atomic_add_int(&g_eli_devs, 1);
887 	sx_xlock(&g_eli_umalock);
888 	if (g_eli_uma == NULL) {
889 		/*
890 		 * Calculate the maximum-sized swap buffer we are
891 		 * likely to see.
892 		 */
893 		g_eli_alloc_sz = roundup2((PAGE_SIZE + sizeof(int) +
894                     G_ELI_AUTH_SECKEYLEN) * nsw_cluster_max +
895                     sizeof(uintptr_t), PAGE_SIZE);
896 
897 		g_eli_uma = uma_zcreate("GELI buffers", g_eli_alloc_sz,
898 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
899 
900 		/* Reserve and pre-allocate pages, as appropriate. */
901 		uma_zone_reserve(g_eli_uma, g_eli_minbufs);
902 		uma_prealloc(g_eli_uma, g_eli_minbufs);
903 	}
904 	sx_xunlock(&g_eli_umalock);
905 }
906 
907 /*
908  * Try to destroy the UMA pool. This will do nothing if there are existing
909  * GELI devices or existing UMA allocations.
910  */
911 static void
912 g_eli_destroy_uma(void)
913 {
914 	uma_zone_t oldzone;
915 
916 	sx_xlock(&g_eli_umalock);
917 	/* Ensure we really should be destroying this. */
918 	if (atomic_load_int(&g_eli_devs) == 0 &&
919 	    atomic_load_int(&g_eli_umaoutstanding) == 0) {
920 		oldzone = g_eli_uma;
921 		g_eli_uma = NULL;
922 	} else
923 		oldzone = NULL;
924 	sx_xunlock(&g_eli_umalock);
925 
926 	if (oldzone != NULL)
927 		uma_zdestroy(oldzone);
928 }
929 
930 static void
931 g_eli_fini_uma(void)
932 {
933 
934 	/*
935 	 * If this is the last outstanding GELI device, try to
936 	 * destroy the UMA pool.
937 	 */
938 	if (atomic_fetchadd_int(&g_eli_devs, -1) == 1)
939 		g_eli_destroy_uma();
940 }
941 
942 /*
943  * Allocate a data buffer. If the size fits within our swap-sized buffers,
944  * try to allocate a swap-sized buffer from the UMA pool. Otherwise, fall
945  * back to using malloc.
946  *
947  * Swap-related requests are special: they can only use the UMA pool, they
948  * use M_USE_RESERVE to let them dip farther into system resources, and
949  * they always use M_NOWAIT to prevent swap operations from deadlocking.
950  */
951 bool
952 g_eli_alloc_data(struct bio *bp, int sz)
953 {
954 
955 	KASSERT(sz <= g_eli_alloc_sz || (bp->bio_flags & BIO_SWAP) == 0,
956 	    ("BIO_SWAP request for %d bytes exceeds the precalculated buffer"
957 	    " size (%d)", sz, g_eli_alloc_sz));
958 	if (sz <= g_eli_alloc_sz) {
959 		bp->bio_driver2 = uma_zalloc(g_eli_uma, M_NOWAIT |
960 		    ((bp->bio_flags & BIO_SWAP) != 0 ? M_USE_RESERVE : 0));
961 		if (bp->bio_driver2 != NULL) {
962 			bp->bio_pflags |= G_ELI_UMA_ALLOC;
963 			atomic_add_int(&g_eli_umaoutstanding, 1);
964 		}
965 		if (bp->bio_driver2 != NULL || (bp->bio_flags & BIO_SWAP) != 0)
966 			return (bp->bio_driver2 != NULL);
967 	}
968 	bp->bio_pflags &= ~(G_ELI_UMA_ALLOC);
969 	bp->bio_driver2 = malloc(sz, M_ELI, g_eli_blocking_malloc ? M_WAITOK :
970 	    M_NOWAIT);
971 	return (bp->bio_driver2 != NULL);
972 }
973 
974 /*
975  * Free a buffer from bp->bio_driver2 which was allocated with
976  * g_eli_alloc_data(). This function makes sure that the memory is freed
977  * to the correct place.
978  *
979  * Additionally, if this function frees the last outstanding UMA request
980  * and there are no open GELI devices, this will destroy the UMA pool.
981  */
982 void
983 g_eli_free_data(struct bio *bp)
984 {
985 
986 	/*
987 	 * Mimic the free(9) behavior of allowing a NULL pointer to be
988 	 * freed.
989 	 */
990 	if (bp->bio_driver2 == NULL)
991 		return;
992 
993 	if ((bp->bio_pflags & G_ELI_UMA_ALLOC) != 0) {
994 		uma_zfree(g_eli_uma, bp->bio_driver2);
995 		if (atomic_fetchadd_int(&g_eli_umaoutstanding, -1) == 1 &&
996 		    atomic_load_int(&g_eli_devs) == 0)
997 			g_eli_destroy_uma();
998 	} else
999 		free(bp->bio_driver2, M_ELI);
1000 	bp->bio_driver2 = NULL;
1001 }
1002 
1003 struct g_geom *
1004 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
1005     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
1006 {
1007 	struct g_eli_softc *sc;
1008 	struct g_eli_worker *wr;
1009 	struct g_geom *gp;
1010 	struct g_provider *pp;
1011 	struct g_consumer *cp;
1012 	struct g_geom_alias *gap;
1013 	u_int i, threads;
1014 	int dcw, error;
1015 
1016 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
1017 	KASSERT(eli_metadata_crypto_supported(md),
1018 	    ("%s: unsupported crypto for %s", __func__, bpp->name));
1019 
1020 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
1021 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
1022 	gp->start = g_eli_start;
1023 	/*
1024 	 * Spoiling can happen even though we have the provider open
1025 	 * exclusively, e.g. through media change events.
1026 	 */
1027 	gp->spoiled = g_eli_orphan;
1028 	gp->orphan = g_eli_orphan;
1029 	gp->resize = g_eli_resize;
1030 	gp->dumpconf = g_eli_dumpconf;
1031 	/*
1032 	 * If detach-on-last-close feature is not enabled and we don't operate
1033 	 * on read-only provider, we can simply use g_std_access().
1034 	 */
1035 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
1036 		gp->access = g_eli_access;
1037 	else
1038 		gp->access = g_std_access;
1039 
1040 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
1041 	sc->sc_nkey = nkey;
1042 
1043 	gp->softc = sc;
1044 	sc->sc_geom = gp;
1045 
1046 	bioq_init(&sc->sc_queue);
1047 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
1048 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
1049 
1050 	pp = NULL;
1051 	cp = g_new_consumer(gp);
1052 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1053 	error = g_attach(cp, bpp);
1054 	if (error != 0) {
1055 		if (req != NULL) {
1056 			gctl_error(req, "Cannot attach to %s (error=%d).",
1057 			    bpp->name, error);
1058 		} else {
1059 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
1060 			    bpp->name, error);
1061 		}
1062 		goto failed;
1063 	}
1064 	/*
1065 	 * Keep provider open all the time, so we can run critical tasks,
1066 	 * like Master Keys deletion, without wondering if we can open
1067 	 * provider or not.
1068 	 * We don't open provider for writing only when user requested read-only
1069 	 * access.
1070 	 */
1071 	dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1;
1072 	error = g_access(cp, 1, dcw, 1);
1073 	if (error != 0) {
1074 		if (req != NULL) {
1075 			gctl_error(req, "Cannot access %s (error=%d).",
1076 			    bpp->name, error);
1077 		} else {
1078 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
1079 			    bpp->name, error);
1080 		}
1081 		goto failed;
1082 	}
1083 
1084 	/*
1085 	 * Remember the keys in our softc structure.
1086 	 */
1087 	g_eli_mkey_propagate(sc, mkey);
1088 
1089 	LIST_INIT(&sc->sc_workers);
1090 
1091 	threads = g_eli_threads;
1092 	if (threads == 0)
1093 		threads = mp_ncpus;
1094 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
1095 	g_eli_init_uma();
1096 	for (i = 0; i < threads; i++) {
1097 		if (g_eli_cpu_is_disabled(i)) {
1098 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
1099 			    bpp->name, i);
1100 			continue;
1101 		}
1102 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
1103 		wr->w_softc = sc;
1104 		wr->w_number = i;
1105 		wr->w_active = TRUE;
1106 
1107 		error = g_eli_newsession(wr);
1108 		if (error != 0) {
1109 			free(wr, M_ELI);
1110 			if (req != NULL) {
1111 				gctl_error(req, "Cannot set up crypto session "
1112 				    "for %s (error=%d).", bpp->name, error);
1113 			} else {
1114 				G_ELI_DEBUG(1, "Cannot set up crypto session "
1115 				    "for %s (error=%d).", bpp->name, error);
1116 			}
1117 			goto failed;
1118 		}
1119 
1120 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
1121 		    "g_eli[%u] %s", i, bpp->name);
1122 		if (error != 0) {
1123 			g_eli_freesession(wr);
1124 			free(wr, M_ELI);
1125 			if (req != NULL) {
1126 				gctl_error(req, "Cannot create kernel thread "
1127 				    "for %s (error=%d).", bpp->name, error);
1128 			} else {
1129 				G_ELI_DEBUG(1, "Cannot create kernel thread "
1130 				    "for %s (error=%d).", bpp->name, error);
1131 			}
1132 			goto failed;
1133 		}
1134 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
1135 	}
1136 
1137 	/*
1138 	 * Create decrypted provider.
1139 	 */
1140 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
1141 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
1142 	if (g_eli_unmapped_io && CRYPTO_HAS_VMPAGE) {
1143 		/*
1144 		 * On DMAP architectures we can use unmapped I/O.  But don't
1145 		 * use it with data integrity verification.  That code hasn't
1146 		 * been written yet.
1147 		 */
1148 		 if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0)
1149 			pp->flags |= G_PF_ACCEPT_UNMAPPED;
1150 	}
1151 	pp->mediasize = sc->sc_mediasize;
1152 	pp->sectorsize = sc->sc_sectorsize;
1153 	LIST_FOREACH(gap, &bpp->aliases, ga_next)
1154 		g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX);
1155 
1156 	g_error_provider(pp, 0);
1157 
1158 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
1159 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
1160 	    sc->sc_ekeylen);
1161 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
1162 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
1163 	G_ELI_DEBUG(0, "    Crypto: %s",
1164 	    sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" :
1165 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
1166 	return (gp);
1167 failed:
1168 	mtx_lock(&sc->sc_queue_mtx);
1169 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1170 	wakeup(sc);
1171 	/*
1172 	 * Wait for kernel threads self destruction.
1173 	 */
1174 	while (!LIST_EMPTY(&sc->sc_workers)) {
1175 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1176 		    "geli:destroy", 0);
1177 	}
1178 	mtx_destroy(&sc->sc_queue_mtx);
1179 	if (cp->provider != NULL) {
1180 		if (cp->acr == 1)
1181 			g_access(cp, -1, -dcw, -1);
1182 		g_detach(cp);
1183 	}
1184 	g_destroy_consumer(cp);
1185 	g_destroy_geom(gp);
1186 	g_eli_key_destroy(sc);
1187 	g_eli_fini_uma();
1188 	zfree(sc, M_ELI);
1189 	return (NULL);
1190 }
1191 
1192 int
1193 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
1194 {
1195 	struct g_geom *gp;
1196 	struct g_provider *pp;
1197 
1198 	g_topology_assert();
1199 
1200 	if (sc == NULL)
1201 		return (ENXIO);
1202 
1203 	gp = sc->sc_geom;
1204 	pp = LIST_FIRST(&gp->provider);
1205 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
1206 		if (force) {
1207 			G_ELI_DEBUG(1, "Device %s is still open, so it "
1208 			    "cannot be definitely removed.", pp->name);
1209 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1210 			gp->access = g_eli_access;
1211 			g_wither_provider(pp, ENXIO);
1212 			return (EBUSY);
1213 		} else {
1214 			G_ELI_DEBUG(1,
1215 			    "Device %s is still open (r%dw%de%d).", pp->name,
1216 			    pp->acr, pp->acw, pp->ace);
1217 			return (EBUSY);
1218 		}
1219 	}
1220 
1221 	mtx_lock(&sc->sc_queue_mtx);
1222 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1223 	wakeup(sc);
1224 	while (!LIST_EMPTY(&sc->sc_workers)) {
1225 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1226 		    "geli:destroy", 0);
1227 	}
1228 	mtx_destroy(&sc->sc_queue_mtx);
1229 	gp->softc = NULL;
1230 	g_eli_key_destroy(sc);
1231 	g_eli_fini_uma();
1232 	zfree(sc, M_ELI);
1233 
1234 	G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
1235 	g_wither_geom_close(gp, ENXIO);
1236 
1237 	return (0);
1238 }
1239 
1240 static int
1241 g_eli_destroy_geom(struct gctl_req *req __unused,
1242     struct g_class *mp __unused, struct g_geom *gp)
1243 {
1244 	struct g_eli_softc *sc;
1245 
1246 	sc = gp->softc;
1247 	return (g_eli_destroy(sc, FALSE));
1248 }
1249 
1250 static int
1251 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
1252 {
1253 	u_char *keyfile, *data;
1254 	char *file, name[64];
1255 	size_t size;
1256 	int i;
1257 
1258 	for (i = 0; ; i++) {
1259 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1260 		keyfile = preload_search_by_type(name);
1261 		if (keyfile == NULL && i == 0) {
1262 			/*
1263 			 * If there is only one keyfile, allow simpler name.
1264 			 */
1265 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
1266 			keyfile = preload_search_by_type(name);
1267 		}
1268 		if (keyfile == NULL)
1269 			return (i);	/* Return number of loaded keyfiles. */
1270 		data = preload_fetch_addr(keyfile);
1271 		if (data == NULL) {
1272 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
1273 			    name);
1274 			return (0);
1275 		}
1276 		size = preload_fetch_size(keyfile);
1277 		if (size == 0) {
1278 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
1279 			    name);
1280 			return (0);
1281 		}
1282 		file = preload_search_info(keyfile, MODINFO_NAME);
1283 		if (file == NULL) {
1284 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
1285 			    name);
1286 			return (0);
1287 		}
1288 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
1289 		    provider, name);
1290 		g_eli_crypto_hmac_update(ctx, data, size);
1291 	}
1292 }
1293 
1294 static void
1295 g_eli_keyfiles_clear(const char *provider)
1296 {
1297 	u_char *keyfile, *data;
1298 	char name[64];
1299 	size_t size;
1300 	int i;
1301 
1302 	for (i = 0; ; i++) {
1303 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1304 		keyfile = preload_search_by_type(name);
1305 		if (keyfile == NULL)
1306 			return;
1307 		data = preload_fetch_addr(keyfile);
1308 		size = preload_fetch_size(keyfile);
1309 		if (data != NULL && size != 0)
1310 			explicit_bzero(data, size);
1311 	}
1312 }
1313 
1314 /*
1315  * Tasting is only made on boot.
1316  * We detect providers which should be attached before root is mounted.
1317  */
1318 static struct g_geom *
1319 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
1320 {
1321 	struct g_eli_metadata md;
1322 	struct g_geom *gp;
1323 	struct hmac_ctx ctx;
1324 	char passphrase[256];
1325 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
1326 	u_int i, nkey, nkeyfiles, tries, showpass;
1327 	int error;
1328         struct keybuf *keybuf;
1329 
1330 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
1331 	g_topology_assert();
1332 
1333 	if (root_mounted() || g_eli_tries == 0)
1334 		return (NULL);
1335 
1336 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
1337 
1338 	error = g_eli_read_metadata(mp, pp, &md);
1339 	if (error != 0)
1340 		return (NULL);
1341 	gp = NULL;
1342 
1343 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
1344 		return (NULL);
1345 	if (md.md_version > G_ELI_VERSION) {
1346 		printf("geom_eli.ko module is too old to handle %s.\n",
1347 		    pp->name);
1348 		return (NULL);
1349 	}
1350 	if (md.md_provsize != pp->mediasize)
1351 		return (NULL);
1352 	/* Should we attach it on boot? */
1353 	if (!(md.md_flags & G_ELI_FLAG_BOOT) &&
1354 	    !(md.md_flags & G_ELI_FLAG_GELIBOOT))
1355 		return (NULL);
1356 	if (md.md_keys == 0x00) {
1357 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
1358 		return (NULL);
1359 	}
1360 	if (!eli_metadata_crypto_supported(&md)) {
1361 		G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n",
1362 		    pp->name);
1363 		return (NULL);
1364 	}
1365 	if (md.md_iterations == -1) {
1366 		/* If there is no passphrase, we try only once. */
1367 		tries = 1;
1368 	} else {
1369 		/* Ask for the passphrase no more than g_eli_tries times. */
1370 		tries = g_eli_tries;
1371 	}
1372 
1373         if ((keybuf = get_keybuf()) != NULL) {
1374                 /* Scan the key buffer, try all GELI keys. */
1375                 for (i = 0; i < keybuf->kb_nents; i++) {
1376                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
1377                                  memcpy(key, keybuf->kb_ents[i].ke_data,
1378                                      sizeof(key));
1379 
1380                                  if (g_eli_mkey_decrypt_any(&md, key,
1381                                      mkey, &nkey) == 0 ) {
1382                                          explicit_bzero(key, sizeof(key));
1383                                          goto have_key;
1384                                  }
1385                          }
1386                 }
1387         }
1388 
1389         for (i = 0; i <= tries; i++) {
1390                 g_eli_crypto_hmac_init(&ctx, NULL, 0);
1391 
1392                 /*
1393                  * Load all key files.
1394                  */
1395                 nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
1396 
1397                 if (nkeyfiles == 0 && md.md_iterations == -1) {
1398                         /*
1399                          * No key files and no passphrase, something is
1400                          * definitely wrong here.
1401                          * geli(8) doesn't allow for such situation, so assume
1402                          * that there was really no passphrase and in that case
1403                          * key files are no properly defined in loader.conf.
1404                          */
1405                         G_ELI_DEBUG(0,
1406                             "Found no key files in loader.conf for %s.",
1407                             pp->name);
1408                         return (NULL);
1409                 }
1410 
1411                 /* Ask for the passphrase if defined. */
1412                 if (md.md_iterations >= 0) {
1413                         /* Try first with cached passphrase. */
1414                         if (i == 0) {
1415                                 if (!g_eli_boot_passcache)
1416                                         continue;
1417                                 memcpy(passphrase, cached_passphrase,
1418                                     sizeof(passphrase));
1419                         } else {
1420                                 printf("Enter passphrase for %s: ", pp->name);
1421 				showpass = g_eli_visible_passphrase;
1422 				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
1423 					showpass = GETS_ECHOPASS;
1424                                 cngets(passphrase, sizeof(passphrase),
1425 				    showpass);
1426                                 memcpy(cached_passphrase, passphrase,
1427                                     sizeof(passphrase));
1428                         }
1429                 }
1430 
1431                 /*
1432                  * Prepare Derived-Key from the user passphrase.
1433                  */
1434                 if (md.md_iterations == 0) {
1435                         g_eli_crypto_hmac_update(&ctx, md.md_salt,
1436                             sizeof(md.md_salt));
1437                         g_eli_crypto_hmac_update(&ctx, passphrase,
1438                             strlen(passphrase));
1439                         explicit_bzero(passphrase, sizeof(passphrase));
1440                 } else if (md.md_iterations > 0) {
1441                         u_char dkey[G_ELI_USERKEYLEN];
1442 
1443                         pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
1444                             sizeof(md.md_salt), passphrase, md.md_iterations);
1445                         explicit_bzero(passphrase, sizeof(passphrase));
1446                         g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
1447                         explicit_bzero(dkey, sizeof(dkey));
1448                 }
1449 
1450                 g_eli_crypto_hmac_final(&ctx, key, 0);
1451 
1452                 /*
1453                  * Decrypt Master-Key.
1454                  */
1455                 error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
1456                 explicit_bzero(key, sizeof(key));
1457                 if (error == -1) {
1458                         if (i == tries) {
1459                                 G_ELI_DEBUG(0,
1460                                     "Wrong key for %s. No tries left.",
1461                                     pp->name);
1462                                 g_eli_keyfiles_clear(pp->name);
1463                                 return (NULL);
1464                         }
1465                         if (i > 0) {
1466                                 G_ELI_DEBUG(0,
1467                                     "Wrong key for %s. Tries left: %u.",
1468                                     pp->name, tries - i);
1469                         }
1470                         /* Try again. */
1471                         continue;
1472                 } else if (error > 0) {
1473                         G_ELI_DEBUG(0,
1474                             "Cannot decrypt Master Key for %s (error=%d).",
1475                             pp->name, error);
1476                         g_eli_keyfiles_clear(pp->name);
1477                         return (NULL);
1478                 }
1479                 g_eli_keyfiles_clear(pp->name);
1480                 G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
1481                 break;
1482         }
1483 have_key:
1484 
1485 	/*
1486 	 * We have correct key, let's attach provider.
1487 	 */
1488 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
1489 	explicit_bzero(mkey, sizeof(mkey));
1490 	explicit_bzero(&md, sizeof(md));
1491 	if (gp == NULL) {
1492 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
1493 		    G_ELI_SUFFIX);
1494 		return (NULL);
1495 	}
1496 	return (gp);
1497 }
1498 
1499 static void
1500 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1501     struct g_consumer *cp, struct g_provider *pp)
1502 {
1503 	struct g_eli_softc *sc;
1504 
1505 	g_topology_assert();
1506 	sc = gp->softc;
1507 	if (sc == NULL)
1508 		return;
1509 	if (pp != NULL || cp != NULL)
1510 		return;	/* Nothing here. */
1511 
1512 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
1513 	    (uintmax_t)sc->sc_ekeys_total);
1514 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
1515 	    (uintmax_t)sc->sc_ekeys_allocated);
1516 	sbuf_printf(sb, "%s<Flags>", indent);
1517 	if (sc->sc_flags == 0)
1518 		sbuf_cat(sb, "NONE");
1519 	else {
1520 		int first = 1;
1521 
1522 #define ADD_FLAG(flag, name)	do {					\
1523 	if (sc->sc_flags & (flag)) {					\
1524 		if (!first)						\
1525 			sbuf_cat(sb, ", ");				\
1526 		else							\
1527 			first = 0;					\
1528 		sbuf_cat(sb, name);					\
1529 	}								\
1530 } while (0)
1531 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
1532 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
1533 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
1534 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
1535 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
1536 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
1537 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
1538 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
1539 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
1540 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
1541 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
1542 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
1543 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
1544 		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
1545 		ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE");
1546 #undef  ADD_FLAG
1547 	}
1548 	sbuf_cat(sb, "</Flags>\n");
1549 
1550 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
1551 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
1552 		    sc->sc_nkey);
1553 	}
1554 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
1555 	sbuf_printf(sb, "%s<Crypto>", indent);
1556 	switch (sc->sc_crypto) {
1557 	case G_ELI_CRYPTO_HW:
1558 		sbuf_cat(sb, "hardware");
1559 		break;
1560 	case G_ELI_CRYPTO_SW:
1561 		sbuf_cat(sb, "software");
1562 		break;
1563 	case G_ELI_CRYPTO_SW_ACCEL:
1564 		sbuf_cat(sb, "accelerated software");
1565 		break;
1566 	default:
1567 		sbuf_cat(sb, "UNKNOWN");
1568 		break;
1569 	}
1570 	sbuf_cat(sb, "</Crypto>\n");
1571 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
1572 		sbuf_printf(sb,
1573 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
1574 		    indent, g_eli_algo2str(sc->sc_aalgo));
1575 	}
1576 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
1577 	    sc->sc_ekeylen);
1578 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
1579 	    indent, g_eli_algo2str(sc->sc_ealgo));
1580 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
1581 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
1582 }
1583 
1584 static void
1585 g_eli_shutdown_pre_sync(void *arg, int howto)
1586 {
1587 	struct g_class *mp;
1588 	struct g_geom *gp, *gp2;
1589 	struct g_provider *pp;
1590 	struct g_eli_softc *sc;
1591 
1592 	mp = arg;
1593 	g_topology_lock();
1594 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
1595 		sc = gp->softc;
1596 		if (sc == NULL)
1597 			continue;
1598 		pp = LIST_FIRST(&gp->provider);
1599 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
1600 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 ||
1601 		    SCHEDULER_STOPPED())
1602 		{
1603 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1604 			gp->access = g_eli_access;
1605 		} else {
1606 			(void) g_eli_destroy(sc, TRUE);
1607 		}
1608 	}
1609 	g_topology_unlock();
1610 }
1611 
1612 static void
1613 g_eli_init(struct g_class *mp)
1614 {
1615 
1616 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1617 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
1618 	if (g_eli_pre_sync == NULL)
1619 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
1620 }
1621 
1622 static void
1623 g_eli_fini(struct g_class *mp)
1624 {
1625 
1626 	if (g_eli_pre_sync != NULL)
1627 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
1628 }
1629 
1630 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
1631 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
1632 MODULE_VERSION(geom_eli, 0);
1633