xref: /freebsd/sys/geom/eli/g_eli.c (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cons.h>
32 #include <sys/kenv.h>
33 #include <sys/kernel.h>
34 #include <sys/linker.h>
35 #include <sys/module.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/bio.h>
39 #include <sys/sbuf.h>
40 #include <sys/sysctl.h>
41 #include <sys/malloc.h>
42 #include <sys/eventhandler.h>
43 #include <sys/kthread.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/uio.h>
48 #include <sys/vnode.h>
49 
50 #include <machine/vmparam.h>
51 
52 #include <vm/uma.h>
53 #include <vm/vm.h>
54 #include <vm/swap_pager.h>
55 
56 #include <geom/geom.h>
57 #include <geom/geom_dbg.h>
58 #include <geom/eli/g_eli.h>
59 #include <geom/eli/pkcs5v2.h>
60 
61 #include <crypto/intake.h>
62 
63 FEATURE(geom_eli, "GEOM crypto module");
64 
65 MALLOC_DEFINE(M_ELI, "eli_data", "GEOM_ELI Data");
66 
67 SYSCTL_DECL(_kern_geom);
68 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
69     "GEOM_ELI stuff");
70 static int g_eli_version = G_ELI_VERSION;
71 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
72     "GELI version");
73 int g_eli_debug = 0;
74 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
75     "Debug level");
76 static u_int g_eli_tries = 3;
77 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
78     "Number of tries for entering the passphrase");
79 static u_int g_eli_visible_passphrase = GETS_NOECHO;
80 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
81     &g_eli_visible_passphrase, 0,
82     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
83 u_int g_eli_overwrites = G_ELI_OVERWRITES;
84 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
85     0, "Number of times on-disk keys should be overwritten when destroying them");
86 static u_int g_eli_threads = 0;
87 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
88     "Number of threads doing crypto work");
89 u_int g_eli_batch = 0;
90 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
91     "Use crypto operations batching");
92 static u_int g_eli_minbufs = 16;
93 static int sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS);
94 SYSCTL_PROC(_kern_geom_eli, OID_AUTO, minbufs, CTLTYPE_UINT | CTLFLAG_RW |
95     CTLFLAG_MPSAFE, NULL, 0, sysctl_g_eli_minbufs, "IU",
96     "Number of GELI bufs reserved for swap transactions");
97 static bool g_eli_blocking_malloc = false;
98 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, blocking_malloc, CTLFLAG_RWTUN,
99     &g_eli_blocking_malloc, 0, "Use blocking malloc calls for GELI buffers");
100 static bool g_eli_unmapped_io = true;
101 SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, unmapped_io, CTLFLAG_RDTUN,
102     &g_eli_unmapped_io, 0, "Enable support for unmapped I/O");
103 
104 static struct sx g_eli_umalock;	/* Controls changes to UMA zone. */
105 SX_SYSINIT(g_eli_umalock, &g_eli_umalock, "GELI UMA");
106 static uma_zone_t g_eli_uma = NULL;
107 static int g_eli_alloc_sz;
108 static volatile int g_eli_umaoutstanding;
109 static volatile int g_eli_devs;
110 
111 /*
112  * Control the number of reserved entries in the GELI zone.
113  * If the GELI zone has already been allocated, update the zone. Otherwise,
114  * simply update the variable for use the next time the zone is created.
115  */
116 static int
117 sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS)
118 {
119 	int error;
120 	u_int new;
121 
122 	new = g_eli_minbufs;
123 	error = sysctl_handle_int(oidp, &new, 0, req);
124 	if (error != 0 || req->newptr == NULL)
125 		return (error);
126 	sx_xlock(&g_eli_umalock);
127 	if (g_eli_uma != NULL) {
128 		if (new != g_eli_minbufs)
129 			uma_zone_reserve(g_eli_uma, new);
130 		if (new > g_eli_minbufs)
131 			uma_prealloc(g_eli_uma, new - g_eli_minbufs);
132 	}
133 	if (new != g_eli_minbufs)
134 		g_eli_minbufs = new;
135 	sx_xunlock(&g_eli_umalock);
136 	return (0);
137 }
138 
139 /*
140  * Passphrase cached during boot, in order to be more user-friendly if
141  * there are multiple providers using the same passphrase.
142  */
143 static char cached_passphrase[256];
144 static u_int g_eli_boot_passcache = 1;
145 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
146 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
147     &g_eli_boot_passcache, 0,
148     "Passphrases are cached during boot process for possible reuse");
149 static void
150 fetch_loader_passphrase(void * dummy)
151 {
152 	char * env_passphrase;
153 
154 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
155 
156 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
157 		/* Extract passphrase from the environment. */
158 		strlcpy(cached_passphrase, env_passphrase,
159 		    sizeof(cached_passphrase));
160 		freeenv(env_passphrase);
161 
162 		/* Wipe the passphrase from the environment. */
163 		kern_unsetenv("kern.geom.eli.passphrase");
164 	}
165 }
166 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
167     fetch_loader_passphrase, NULL);
168 
169 static void
170 zero_boot_passcache(void)
171 {
172 
173         explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
174 }
175 
176 static void
177 zero_geli_intake_keys(void)
178 {
179         struct keybuf *keybuf;
180         int i;
181 
182         if ((keybuf = get_keybuf()) != NULL) {
183                 /* Scan the key buffer, clear all GELI keys. */
184                 for (i = 0; i < keybuf->kb_nents; i++) {
185                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
186                                  explicit_bzero(keybuf->kb_ents[i].ke_data,
187                                      sizeof(keybuf->kb_ents[i].ke_data));
188                                  keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
189                          }
190                 }
191         }
192 }
193 
194 static void
195 zero_intake_passcache(void *dummy)
196 {
197         zero_boot_passcache();
198         zero_geli_intake_keys();
199 }
200 EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
201 
202 static eventhandler_tag g_eli_pre_sync = NULL;
203 
204 static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
205     off_t offset, struct g_eli_metadata *md);
206 
207 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
208     struct g_geom *gp);
209 static void g_eli_init(struct g_class *mp);
210 static void g_eli_fini(struct g_class *mp);
211 
212 static g_taste_t g_eli_taste;
213 static g_dumpconf_t g_eli_dumpconf;
214 
215 struct g_class g_eli_class = {
216 	.name = G_ELI_CLASS_NAME,
217 	.version = G_VERSION,
218 	.ctlreq = g_eli_config,
219 	.taste = g_eli_taste,
220 	.destroy_geom = g_eli_destroy_geom,
221 	.init = g_eli_init,
222 	.fini = g_eli_fini
223 };
224 
225 /*
226  * Code paths:
227  * BIO_READ:
228  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
229  * BIO_WRITE:
230  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
231  */
232 
233 /*
234  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
235  * accelerator or something like this.
236  * The function updates the SID and rerun the operation.
237  */
238 int
239 g_eli_crypto_rerun(struct cryptop *crp)
240 {
241 	struct g_eli_softc *sc;
242 	struct g_eli_worker *wr;
243 	struct bio *bp;
244 	int error;
245 
246 	bp = (struct bio *)crp->crp_opaque;
247 	sc = bp->bio_to->geom->softc;
248 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
249 		if (wr->w_number == G_ELI_WORKER(bp->bio_pflags))
250 			break;
251 	}
252 	KASSERT(wr != NULL, ("Invalid worker (%u).",
253 	    G_ELI_WORKER(bp->bio_pflags)));
254 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).",
255 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid,
256 	    crp->crp_session);
257 	wr->w_sid = crp->crp_session;
258 	crp->crp_etype = 0;
259 	error = crypto_dispatch(crp);
260 	if (error == 0)
261 		return (0);
262 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
263 	crp->crp_etype = error;
264 	return (error);
265 }
266 
267 static void
268 g_eli_getattr_done(struct bio *bp)
269 {
270 	if (bp->bio_error == 0 &&
271 	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
272 		strlcat(bp->bio_data, "/eli", bp->bio_length);
273 	}
274 	g_std_done(bp);
275 }
276 
277 /*
278  * The function is called afer reading encrypted data from the provider.
279  *
280  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
281  */
282 void
283 g_eli_read_done(struct bio *bp)
284 {
285 	struct g_eli_softc *sc;
286 	struct bio *pbp;
287 
288 	G_ELI_LOGREQ(2, bp, "Request done.");
289 	pbp = bp->bio_parent;
290 	if (pbp->bio_error == 0 && bp->bio_error != 0)
291 		pbp->bio_error = bp->bio_error;
292 	g_destroy_bio(bp);
293 	/*
294 	 * Do we have all sectors already?
295 	 */
296 	pbp->bio_inbed++;
297 	if (pbp->bio_inbed < pbp->bio_children)
298 		return;
299 	sc = pbp->bio_to->geom->softc;
300 	if (pbp->bio_error != 0) {
301 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
302 		    pbp->bio_error);
303 		pbp->bio_completed = 0;
304 		g_eli_free_data(pbp);
305 		g_io_deliver(pbp, pbp->bio_error);
306 		if (sc != NULL)
307 			atomic_subtract_int(&sc->sc_inflight, 1);
308 		return;
309 	}
310 	mtx_lock(&sc->sc_queue_mtx);
311 	bioq_insert_tail(&sc->sc_queue, pbp);
312 	mtx_unlock(&sc->sc_queue_mtx);
313 	wakeup(sc);
314 }
315 
316 /*
317  * The function is called after we encrypt and write data.
318  *
319  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
320  */
321 void
322 g_eli_write_done(struct bio *bp)
323 {
324 	struct g_eli_softc *sc;
325 	struct bio *pbp;
326 
327 	G_ELI_LOGREQ(2, bp, "Request done.");
328 	pbp = bp->bio_parent;
329 	if (pbp->bio_error == 0 && bp->bio_error != 0)
330 		pbp->bio_error = bp->bio_error;
331 	g_destroy_bio(bp);
332 	/*
333 	 * Do we have all sectors already?
334 	 */
335 	pbp->bio_inbed++;
336 	if (pbp->bio_inbed < pbp->bio_children)
337 		return;
338 	sc = pbp->bio_to->geom->softc;
339 	g_eli_free_data(pbp);
340 	if (pbp->bio_error != 0) {
341 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
342 		    pbp->bio_error);
343 		pbp->bio_completed = 0;
344 	} else
345 		pbp->bio_completed = pbp->bio_length;
346 
347 	/*
348 	 * Write is finished, send it up.
349 	 */
350 	g_io_deliver(pbp, pbp->bio_error);
351 	if (sc != NULL)
352 		atomic_subtract_int(&sc->sc_inflight, 1);
353 }
354 
355 /*
356  * This function should never be called, but GEOM made as it set ->orphan()
357  * method for every geom.
358  */
359 static void
360 g_eli_orphan_spoil_assert(struct g_consumer *cp)
361 {
362 
363 	panic("Function %s() called for %s.", __func__, cp->geom->name);
364 }
365 
366 static void
367 g_eli_orphan(struct g_consumer *cp)
368 {
369 	struct g_eli_softc *sc;
370 
371 	g_topology_assert();
372 	sc = cp->geom->softc;
373 	if (sc == NULL)
374 		return;
375 	g_eli_destroy(sc, TRUE);
376 }
377 
378 static void
379 g_eli_resize(struct g_consumer *cp)
380 {
381 	struct g_eli_softc *sc;
382 	struct g_provider *epp, *pp;
383 	off_t oldsize;
384 
385 	g_topology_assert();
386 	sc = cp->geom->softc;
387 	if (sc == NULL)
388 		return;
389 
390 	if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) {
391 		G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.",
392 		    (intmax_t)sc->sc_provsize);
393 		return;
394 	}
395 
396 	pp = cp->provider;
397 
398 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
399 		struct g_eli_metadata md;
400 		u_char *sector;
401 		int error;
402 
403 		sector = NULL;
404 
405 		error = g_eli_read_metadata_offset(cp->geom->class, pp,
406 		    sc->sc_provsize - pp->sectorsize, &md);
407 		if (error != 0) {
408 			G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).",
409 			    pp->name, error);
410 			goto iofail;
411 		}
412 
413 		md.md_provsize = pp->mediasize;
414 
415 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
416 		eli_metadata_encode(&md, sector);
417 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
418 		    pp->sectorsize);
419 		if (error != 0) {
420 			G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).",
421 			    pp->name, error);
422 			goto iofail;
423 		}
424 		explicit_bzero(sector, pp->sectorsize);
425 		error = g_write_data(cp, sc->sc_provsize - pp->sectorsize,
426 		    sector, pp->sectorsize);
427 		if (error != 0) {
428 			G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).",
429 			    pp->name, error);
430 			goto iofail;
431 		}
432 iofail:
433 		explicit_bzero(&md, sizeof(md));
434 		zfree(sector, M_ELI);
435 	}
436 
437 	oldsize = sc->sc_mediasize;
438 	sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize);
439 	g_eli_key_resize(sc);
440 	sc->sc_provsize = pp->mediasize;
441 
442 	epp = LIST_FIRST(&sc->sc_geom->provider);
443 	g_resize_provider(epp, sc->sc_mediasize);
444 	G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name,
445 	    (intmax_t)oldsize, (intmax_t)sc->sc_mediasize);
446 }
447 
448 /*
449  * BIO_READ:
450  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
451  * BIO_WRITE:
452  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
453  */
454 static void
455 g_eli_start(struct bio *bp)
456 {
457 	struct g_eli_softc *sc;
458 	struct g_consumer *cp;
459 	struct bio *cbp;
460 
461 	sc = bp->bio_to->geom->softc;
462 	KASSERT(sc != NULL,
463 	    ("Provider's error should be set (error=%d)(device=%s).",
464 	    bp->bio_to->error, bp->bio_to->name));
465 	G_ELI_LOGREQ(2, bp, "Request received.");
466 
467 	switch (bp->bio_cmd) {
468 	case BIO_READ:
469 	case BIO_WRITE:
470 	case BIO_GETATTR:
471 	case BIO_FLUSH:
472 	case BIO_ZONE:
473 	case BIO_SPEEDUP:
474 		break;
475 	case BIO_DELETE:
476 		/*
477 		 * If the user hasn't set the NODELETE flag, we just pass
478 		 * it down the stack and let the layers beneath us do (or
479 		 * not) whatever they do with it.  If they have, we
480 		 * reject it.  A possible extension would be an
481 		 * additional flag to take it as a hint to shred the data
482 		 * with [multiple?] overwrites.
483 		 */
484 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
485 			break;
486 	default:
487 		g_io_deliver(bp, EOPNOTSUPP);
488 		return;
489 	}
490 	cbp = g_clone_bio(bp);
491 	if (cbp == NULL) {
492 		g_io_deliver(bp, ENOMEM);
493 		return;
494 	}
495 	bp->bio_driver1 = cbp;
496 	bp->bio_pflags = 0;
497 	G_ELI_SET_NEW_BIO(bp->bio_pflags);
498 	switch (bp->bio_cmd) {
499 	case BIO_READ:
500 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
501 			g_eli_crypto_read(sc, bp, 0);
502 			break;
503 		}
504 		/* FALLTHROUGH */
505 	case BIO_WRITE:
506 		mtx_lock(&sc->sc_queue_mtx);
507 		bioq_insert_tail(&sc->sc_queue, bp);
508 		mtx_unlock(&sc->sc_queue_mtx);
509 		wakeup(sc);
510 		break;
511 	case BIO_GETATTR:
512 	case BIO_FLUSH:
513 	case BIO_DELETE:
514 	case BIO_SPEEDUP:
515 	case BIO_ZONE:
516 		if (bp->bio_cmd == BIO_GETATTR)
517 			cbp->bio_done = g_eli_getattr_done;
518 		else
519 			cbp->bio_done = g_std_done;
520 		cp = LIST_FIRST(&sc->sc_geom->consumer);
521 		cbp->bio_to = cp->provider;
522 		G_ELI_LOGREQ(2, cbp, "Sending request.");
523 		g_io_request(cbp, cp);
524 		break;
525 	}
526 }
527 
528 static int
529 g_eli_newsession(struct g_eli_worker *wr)
530 {
531 	struct g_eli_softc *sc;
532 	struct crypto_session_params csp;
533 	uint32_t caps;
534 	int error, new_crypto;
535 	void *key;
536 
537 	sc = wr->w_softc;
538 
539 	memset(&csp, 0, sizeof(csp));
540 	csp.csp_mode = CSP_MODE_CIPHER;
541 	csp.csp_cipher_alg = sc->sc_ealgo;
542 	csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo);
543 	csp.csp_cipher_klen = sc->sc_ekeylen / 8;
544 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
545 		csp.csp_cipher_klen <<= 1;
546 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
547 		key = g_eli_key_hold(sc, 0,
548 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
549 		csp.csp_cipher_key = key;
550 	} else {
551 		key = NULL;
552 		csp.csp_cipher_key = sc->sc_ekey;
553 	}
554 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
555 		csp.csp_mode = CSP_MODE_ETA;
556 		csp.csp_auth_alg = sc->sc_aalgo;
557 		csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN;
558 	}
559 
560 	switch (sc->sc_crypto) {
561 	case G_ELI_CRYPTO_SW_ACCEL:
562 	case G_ELI_CRYPTO_SW:
563 		error = crypto_newsession(&wr->w_sid, &csp,
564 		    CRYPTOCAP_F_SOFTWARE);
565 		break;
566 	case G_ELI_CRYPTO_HW:
567 		error = crypto_newsession(&wr->w_sid, &csp,
568 		    CRYPTOCAP_F_HARDWARE);
569 		break;
570 	case G_ELI_CRYPTO_UNKNOWN:
571 		error = crypto_newsession(&wr->w_sid, &csp,
572 		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
573 		if (error == 0) {
574 			caps = crypto_ses2caps(wr->w_sid);
575 			if (caps & CRYPTOCAP_F_HARDWARE)
576 				new_crypto = G_ELI_CRYPTO_HW;
577 			else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE)
578 				new_crypto = G_ELI_CRYPTO_SW_ACCEL;
579 			else
580 				new_crypto = G_ELI_CRYPTO_SW;
581 			mtx_lock(&sc->sc_queue_mtx);
582 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
583 				sc->sc_crypto = new_crypto;
584 			mtx_unlock(&sc->sc_queue_mtx);
585 		}
586 		break;
587 	default:
588 		panic("%s: invalid condition", __func__);
589 	}
590 
591 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
592 		if (error)
593 			g_eli_key_drop(sc, key);
594 		else
595 			wr->w_first_key = key;
596 	}
597 
598 	return (error);
599 }
600 
601 static void
602 g_eli_freesession(struct g_eli_worker *wr)
603 {
604 	struct g_eli_softc *sc;
605 
606 	crypto_freesession(wr->w_sid);
607 	if (wr->w_first_key != NULL) {
608 		sc = wr->w_softc;
609 		g_eli_key_drop(sc, wr->w_first_key);
610 		wr->w_first_key = NULL;
611 	}
612 }
613 
614 static void
615 g_eli_cancel(struct g_eli_softc *sc)
616 {
617 	struct bio *bp;
618 
619 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
620 
621 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
622 		KASSERT(G_ELI_IS_NEW_BIO(bp->bio_pflags),
623 		    ("Not new bio when canceling (bp=%p).", bp));
624 		g_io_deliver(bp, ENXIO);
625 	}
626 }
627 
628 static struct bio *
629 g_eli_takefirst(struct g_eli_softc *sc)
630 {
631 	struct bio *bp;
632 
633 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
634 
635 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
636 		return (bioq_takefirst(&sc->sc_queue));
637 	/*
638 	 * Device suspended, so we skip new I/O requests.
639 	 */
640 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
641 		if (!G_ELI_IS_NEW_BIO(bp->bio_pflags))
642 			break;
643 	}
644 	if (bp != NULL)
645 		bioq_remove(&sc->sc_queue, bp);
646 	return (bp);
647 }
648 
649 /*
650  * This is the main function for kernel worker thread when we don't have
651  * hardware acceleration and we have to do cryptography in software.
652  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
653  * threads with crypto work.
654  */
655 static void
656 g_eli_worker(void *arg)
657 {
658 	struct g_eli_softc *sc;
659 	struct g_eli_worker *wr;
660 	struct bio *bp;
661 	int error __diagused;
662 
663 	wr = arg;
664 	sc = wr->w_softc;
665 #ifdef EARLY_AP_STARTUP
666 	MPASS(!sc->sc_cpubind || smp_started);
667 #elif defined(SMP)
668 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
669 	if (sc->sc_cpubind) {
670 		while (!smp_started)
671 			tsleep(wr, 0, "geli:smp", hz / 4);
672 	}
673 #endif
674 	thread_lock(curthread);
675 	sched_prio(curthread, PUSER);
676 	if (sc->sc_cpubind)
677 		sched_bind(curthread, wr->w_number % mp_ncpus);
678 	thread_unlock(curthread);
679 
680 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
681 
682 	for (;;) {
683 		mtx_lock(&sc->sc_queue_mtx);
684 again:
685 		bp = g_eli_takefirst(sc);
686 		if (bp == NULL) {
687 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
688 				g_eli_cancel(sc);
689 				LIST_REMOVE(wr, w_next);
690 				g_eli_freesession(wr);
691 				free(wr, M_ELI);
692 				G_ELI_DEBUG(1, "Thread %s exiting.",
693 				    curthread->td_proc->p_comm);
694 				wakeup(&sc->sc_workers);
695 				mtx_unlock(&sc->sc_queue_mtx);
696 				kproc_exit(0);
697 			}
698 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
699 				if (sc->sc_inflight > 0) {
700 					G_ELI_DEBUG(0, "inflight=%d",
701 					    sc->sc_inflight);
702 					/*
703 					 * We still have inflight BIOs, so
704 					 * sleep and retry.
705 					 */
706 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
707 					    "geli:inf", hz / 5);
708 					goto again;
709 				}
710 				/*
711 				 * Suspend requested, mark the worker as
712 				 * suspended and go to sleep.
713 				 */
714 				if (wr->w_active) {
715 					g_eli_freesession(wr);
716 					wr->w_active = FALSE;
717 				}
718 				wakeup(&sc->sc_workers);
719 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
720 				    "geli:suspend", 0);
721 				if (!wr->w_active &&
722 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
723 					error = g_eli_newsession(wr);
724 					KASSERT(error == 0,
725 					    ("g_eli_newsession() failed on resume (error=%d)",
726 					    error));
727 					wr->w_active = TRUE;
728 				}
729 				goto again;
730 			}
731 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
732 			continue;
733 		}
734 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags))
735 			atomic_add_int(&sc->sc_inflight, 1);
736 		mtx_unlock(&sc->sc_queue_mtx);
737 		if (G_ELI_IS_NEW_BIO(bp->bio_pflags)) {
738 			G_ELI_SETWORKER(bp->bio_pflags, 0);
739 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
740 				if (bp->bio_cmd == BIO_READ)
741 					g_eli_auth_read(sc, bp);
742 				else
743 					g_eli_auth_run(wr, bp);
744 			} else {
745 				if (bp->bio_cmd == BIO_READ)
746 					g_eli_crypto_read(sc, bp, 1);
747 				else
748 					g_eli_crypto_run(wr, bp);
749 			}
750 		} else {
751 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
752 				g_eli_auth_run(wr, bp);
753 			else
754 				g_eli_crypto_run(wr, bp);
755 		}
756 	}
757 }
758 
759 static int
760 g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
761     off_t offset, struct g_eli_metadata *md)
762 {
763 	struct g_geom *gp;
764 	struct g_consumer *cp;
765 	u_char *buf = NULL;
766 	int error;
767 
768 	g_topology_assert();
769 
770 	gp = g_new_geomf(mp, "eli:taste");
771 	gp->start = g_eli_start;
772 	gp->access = g_std_access;
773 	/*
774 	 * g_eli_read_metadata() is always called from the event thread.
775 	 * Our geom is created and destroyed in the same event, so there
776 	 * could be no orphan nor spoil event in the meantime.
777 	 */
778 	gp->orphan = g_eli_orphan_spoil_assert;
779 	gp->spoiled = g_eli_orphan_spoil_assert;
780 	cp = g_new_consumer(gp);
781 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
782 	error = g_attach(cp, pp);
783 	if (error != 0)
784 		goto end;
785 	error = g_access(cp, 1, 0, 0);
786 	if (error != 0)
787 		goto end;
788 	g_topology_unlock();
789 	buf = g_read_data(cp, offset, pp->sectorsize, &error);
790 	g_topology_lock();
791 	if (buf == NULL)
792 		goto end;
793 	error = eli_metadata_decode(buf, md);
794 	if (error != 0)
795 		goto end;
796 	/* Metadata was read and decoded successfully. */
797 end:
798 	g_free(buf);
799 	if (cp->provider != NULL) {
800 		if (cp->acr == 1)
801 			g_access(cp, -1, 0, 0);
802 		g_detach(cp);
803 	}
804 	g_destroy_consumer(cp);
805 	g_destroy_geom(gp);
806 	return (error);
807 }
808 
809 int
810 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
811     struct g_eli_metadata *md)
812 {
813 
814 	return (g_eli_read_metadata_offset(mp, pp,
815 	    pp->mediasize - pp->sectorsize, md));
816 }
817 
818 /*
819  * The function is called when we had last close on provider and user requested
820  * to close it when this situation occur.
821  */
822 static void
823 g_eli_last_close(void *arg, int flags __unused)
824 {
825 	struct g_geom *gp;
826 	char gpname[64];
827 	int error __diagused;
828 
829 	g_topology_assert();
830 	gp = arg;
831 	strlcpy(gpname, gp->name, sizeof(gpname));
832 	error = g_eli_destroy(gp->softc, TRUE);
833 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
834 	    gpname, error));
835 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
836 }
837 
838 int
839 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
840 {
841 	struct g_eli_softc *sc;
842 	struct g_geom *gp;
843 
844 	gp = pp->geom;
845 	sc = gp->softc;
846 
847 	if (dw > 0) {
848 		if (sc->sc_flags & G_ELI_FLAG_RO) {
849 			/* Deny write attempts. */
850 			return (EROFS);
851 		}
852 		/* Someone is opening us for write, we need to remember that. */
853 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
854 		return (0);
855 	}
856 	/* Is this the last close? */
857 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
858 		return (0);
859 
860 	/*
861 	 * Automatically detach on last close if requested.
862 	 */
863 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
864 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
865 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
866 	}
867 	return (0);
868 }
869 
870 static int
871 g_eli_cpu_is_disabled(int cpu)
872 {
873 #ifdef SMP
874 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
875 #else
876 	return (0);
877 #endif
878 }
879 
880 static void
881 g_eli_init_uma(void)
882 {
883 
884 	atomic_add_int(&g_eli_devs, 1);
885 	sx_xlock(&g_eli_umalock);
886 	if (g_eli_uma == NULL) {
887 		/*
888 		 * Calculate the maximum-sized swap buffer we are
889 		 * likely to see.
890 		 */
891 		g_eli_alloc_sz = roundup2((PAGE_SIZE + sizeof(int) +
892                     G_ELI_AUTH_SECKEYLEN) * nsw_cluster_max +
893                     sizeof(uintptr_t), PAGE_SIZE);
894 
895 		g_eli_uma = uma_zcreate("GELI buffers", g_eli_alloc_sz,
896 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
897 
898 		/* Reserve and pre-allocate pages, as appropriate. */
899 		uma_zone_reserve(g_eli_uma, g_eli_minbufs);
900 		uma_prealloc(g_eli_uma, g_eli_minbufs);
901 	}
902 	sx_xunlock(&g_eli_umalock);
903 }
904 
905 /*
906  * Try to destroy the UMA pool. This will do nothing if there are existing
907  * GELI devices or existing UMA allocations.
908  */
909 static void
910 g_eli_destroy_uma(void)
911 {
912 	uma_zone_t oldzone;
913 
914 	sx_xlock(&g_eli_umalock);
915 	/* Ensure we really should be destroying this. */
916 	if (atomic_load_int(&g_eli_devs) == 0 &&
917 	    atomic_load_int(&g_eli_umaoutstanding) == 0) {
918 		oldzone = g_eli_uma;
919 		g_eli_uma = NULL;
920 	} else
921 		oldzone = NULL;
922 	sx_xunlock(&g_eli_umalock);
923 
924 	if (oldzone != NULL)
925 		uma_zdestroy(oldzone);
926 }
927 
928 static void
929 g_eli_fini_uma(void)
930 {
931 
932 	/*
933 	 * If this is the last outstanding GELI device, try to
934 	 * destroy the UMA pool.
935 	 */
936 	if (atomic_fetchadd_int(&g_eli_devs, -1) == 1)
937 		g_eli_destroy_uma();
938 }
939 
940 /*
941  * Allocate a data buffer. If the size fits within our swap-sized buffers,
942  * try to allocate a swap-sized buffer from the UMA pool. Otherwise, fall
943  * back to using malloc.
944  *
945  * Swap-related requests are special: they can only use the UMA pool, they
946  * use M_USE_RESERVE to let them dip farther into system resources, and
947  * they always use M_NOWAIT to prevent swap operations from deadlocking.
948  */
949 bool
950 g_eli_alloc_data(struct bio *bp, int sz)
951 {
952 
953 	KASSERT(sz <= g_eli_alloc_sz || (bp->bio_flags & BIO_SWAP) == 0,
954 	    ("BIO_SWAP request for %d bytes exceeds the precalculated buffer"
955 	    " size (%d)", sz, g_eli_alloc_sz));
956 	if (sz <= g_eli_alloc_sz) {
957 		bp->bio_driver2 = uma_zalloc(g_eli_uma, M_NOWAIT |
958 		    ((bp->bio_flags & BIO_SWAP) != 0 ? M_USE_RESERVE : 0));
959 		if (bp->bio_driver2 != NULL) {
960 			bp->bio_pflags |= G_ELI_UMA_ALLOC;
961 			atomic_add_int(&g_eli_umaoutstanding, 1);
962 		}
963 		if (bp->bio_driver2 != NULL || (bp->bio_flags & BIO_SWAP) != 0)
964 			return (bp->bio_driver2 != NULL);
965 	}
966 	bp->bio_pflags &= ~(G_ELI_UMA_ALLOC);
967 	bp->bio_driver2 = malloc(sz, M_ELI, g_eli_blocking_malloc ? M_WAITOK :
968 	    M_NOWAIT);
969 	return (bp->bio_driver2 != NULL);
970 }
971 
972 /*
973  * Free a buffer from bp->bio_driver2 which was allocated with
974  * g_eli_alloc_data(). This function makes sure that the memory is freed
975  * to the correct place.
976  *
977  * Additionally, if this function frees the last outstanding UMA request
978  * and there are no open GELI devices, this will destroy the UMA pool.
979  */
980 void
981 g_eli_free_data(struct bio *bp)
982 {
983 
984 	/*
985 	 * Mimic the free(9) behavior of allowing a NULL pointer to be
986 	 * freed.
987 	 */
988 	if (bp->bio_driver2 == NULL)
989 		return;
990 
991 	if ((bp->bio_pflags & G_ELI_UMA_ALLOC) != 0) {
992 		uma_zfree(g_eli_uma, bp->bio_driver2);
993 		if (atomic_fetchadd_int(&g_eli_umaoutstanding, -1) == 1 &&
994 		    atomic_load_int(&g_eli_devs) == 0)
995 			g_eli_destroy_uma();
996 	} else
997 		free(bp->bio_driver2, M_ELI);
998 	bp->bio_driver2 = NULL;
999 }
1000 
1001 struct g_geom *
1002 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
1003     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
1004 {
1005 	struct g_eli_softc *sc;
1006 	struct g_eli_worker *wr;
1007 	struct g_geom *gp;
1008 	struct g_provider *pp;
1009 	struct g_consumer *cp;
1010 	struct g_geom_alias *gap;
1011 	u_int i, threads;
1012 	int dcw, error;
1013 
1014 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
1015 	KASSERT(eli_metadata_crypto_supported(md),
1016 	    ("%s: unsupported crypto for %s", __func__, bpp->name));
1017 
1018 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
1019 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
1020 	gp->start = g_eli_start;
1021 	/*
1022 	 * Spoiling can happen even though we have the provider open
1023 	 * exclusively, e.g. through media change events.
1024 	 */
1025 	gp->spoiled = g_eli_orphan;
1026 	gp->orphan = g_eli_orphan;
1027 	gp->resize = g_eli_resize;
1028 	gp->dumpconf = g_eli_dumpconf;
1029 	/*
1030 	 * If detach-on-last-close feature is not enabled and we don't operate
1031 	 * on read-only provider, we can simply use g_std_access().
1032 	 */
1033 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
1034 		gp->access = g_eli_access;
1035 	else
1036 		gp->access = g_std_access;
1037 
1038 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
1039 	sc->sc_nkey = nkey;
1040 
1041 	gp->softc = sc;
1042 	sc->sc_geom = gp;
1043 
1044 	bioq_init(&sc->sc_queue);
1045 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
1046 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
1047 
1048 	pp = NULL;
1049 	cp = g_new_consumer(gp);
1050 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1051 	error = g_attach(cp, bpp);
1052 	if (error != 0) {
1053 		if (req != NULL) {
1054 			gctl_error(req, "Cannot attach to %s (error=%d).",
1055 			    bpp->name, error);
1056 		} else {
1057 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
1058 			    bpp->name, error);
1059 		}
1060 		goto failed;
1061 	}
1062 	/*
1063 	 * Keep provider open all the time, so we can run critical tasks,
1064 	 * like Master Keys deletion, without wondering if we can open
1065 	 * provider or not.
1066 	 * We don't open provider for writing only when user requested read-only
1067 	 * access.
1068 	 */
1069 	dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1;
1070 	error = g_access(cp, 1, dcw, 1);
1071 	if (error != 0) {
1072 		if (req != NULL) {
1073 			gctl_error(req, "Cannot access %s (error=%d).",
1074 			    bpp->name, error);
1075 		} else {
1076 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
1077 			    bpp->name, error);
1078 		}
1079 		goto failed;
1080 	}
1081 
1082 	/*
1083 	 * Remember the keys in our softc structure.
1084 	 */
1085 	g_eli_mkey_propagate(sc, mkey);
1086 
1087 	LIST_INIT(&sc->sc_workers);
1088 
1089 	threads = g_eli_threads;
1090 	if (threads == 0)
1091 		threads = mp_ncpus;
1092 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
1093 	g_eli_init_uma();
1094 	for (i = 0; i < threads; i++) {
1095 		if (g_eli_cpu_is_disabled(i)) {
1096 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
1097 			    bpp->name, i);
1098 			continue;
1099 		}
1100 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
1101 		wr->w_softc = sc;
1102 		wr->w_number = i;
1103 		wr->w_active = TRUE;
1104 
1105 		error = g_eli_newsession(wr);
1106 		if (error != 0) {
1107 			free(wr, M_ELI);
1108 			if (req != NULL) {
1109 				gctl_error(req, "Cannot set up crypto session "
1110 				    "for %s (error=%d).", bpp->name, error);
1111 			} else {
1112 				G_ELI_DEBUG(1, "Cannot set up crypto session "
1113 				    "for %s (error=%d).", bpp->name, error);
1114 			}
1115 			goto failed;
1116 		}
1117 
1118 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
1119 		    "g_eli[%u] %s", i, bpp->name);
1120 		if (error != 0) {
1121 			g_eli_freesession(wr);
1122 			free(wr, M_ELI);
1123 			if (req != NULL) {
1124 				gctl_error(req, "Cannot create kernel thread "
1125 				    "for %s (error=%d).", bpp->name, error);
1126 			} else {
1127 				G_ELI_DEBUG(1, "Cannot create kernel thread "
1128 				    "for %s (error=%d).", bpp->name, error);
1129 			}
1130 			goto failed;
1131 		}
1132 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
1133 	}
1134 
1135 	/*
1136 	 * Create decrypted provider.
1137 	 */
1138 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
1139 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
1140 	if (g_eli_unmapped_io && CRYPTO_HAS_VMPAGE) {
1141 		/*
1142 		 * On DMAP architectures we can use unmapped I/O.  But don't
1143 		 * use it with data integrity verification.  That code hasn't
1144 		 * been written yet.
1145 		 */
1146 		 if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0)
1147 			pp->flags |= G_PF_ACCEPT_UNMAPPED;
1148 	}
1149 	pp->mediasize = sc->sc_mediasize;
1150 	pp->sectorsize = sc->sc_sectorsize;
1151 	LIST_FOREACH(gap, &bpp->aliases, ga_next)
1152 		g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX);
1153 
1154 	g_error_provider(pp, 0);
1155 
1156 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
1157 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
1158 	    sc->sc_ekeylen);
1159 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
1160 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
1161 	G_ELI_DEBUG(0, "    Crypto: %s",
1162 	    sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" :
1163 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
1164 	return (gp);
1165 failed:
1166 	mtx_lock(&sc->sc_queue_mtx);
1167 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1168 	wakeup(sc);
1169 	/*
1170 	 * Wait for kernel threads self destruction.
1171 	 */
1172 	while (!LIST_EMPTY(&sc->sc_workers)) {
1173 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1174 		    "geli:destroy", 0);
1175 	}
1176 	mtx_destroy(&sc->sc_queue_mtx);
1177 	if (cp->provider != NULL) {
1178 		if (cp->acr == 1)
1179 			g_access(cp, -1, -dcw, -1);
1180 		g_detach(cp);
1181 	}
1182 	g_destroy_consumer(cp);
1183 	g_destroy_geom(gp);
1184 	g_eli_key_destroy(sc);
1185 	g_eli_fini_uma();
1186 	zfree(sc, M_ELI);
1187 	return (NULL);
1188 }
1189 
1190 int
1191 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
1192 {
1193 	struct g_geom *gp;
1194 	struct g_provider *pp;
1195 
1196 	g_topology_assert();
1197 
1198 	if (sc == NULL)
1199 		return (ENXIO);
1200 
1201 	gp = sc->sc_geom;
1202 	pp = LIST_FIRST(&gp->provider);
1203 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
1204 		if (force) {
1205 			G_ELI_DEBUG(1, "Device %s is still open, so it "
1206 			    "cannot be definitely removed.", pp->name);
1207 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1208 			gp->access = g_eli_access;
1209 			g_wither_provider(pp, ENXIO);
1210 			return (EBUSY);
1211 		} else {
1212 			G_ELI_DEBUG(1,
1213 			    "Device %s is still open (r%dw%de%d).", pp->name,
1214 			    pp->acr, pp->acw, pp->ace);
1215 			return (EBUSY);
1216 		}
1217 	}
1218 
1219 	mtx_lock(&sc->sc_queue_mtx);
1220 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1221 	wakeup(sc);
1222 	while (!LIST_EMPTY(&sc->sc_workers)) {
1223 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1224 		    "geli:destroy", 0);
1225 	}
1226 	mtx_destroy(&sc->sc_queue_mtx);
1227 	gp->softc = NULL;
1228 	g_eli_key_destroy(sc);
1229 	g_eli_fini_uma();
1230 	zfree(sc, M_ELI);
1231 
1232 	G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
1233 	g_wither_geom_close(gp, ENXIO);
1234 
1235 	return (0);
1236 }
1237 
1238 static int
1239 g_eli_destroy_geom(struct gctl_req *req __unused,
1240     struct g_class *mp __unused, struct g_geom *gp)
1241 {
1242 	struct g_eli_softc *sc;
1243 
1244 	sc = gp->softc;
1245 	return (g_eli_destroy(sc, FALSE));
1246 }
1247 
1248 static int
1249 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
1250 {
1251 	u_char *keyfile, *data;
1252 	char *file, name[64];
1253 	size_t size;
1254 	int i;
1255 
1256 	for (i = 0; ; i++) {
1257 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1258 		keyfile = preload_search_by_type(name);
1259 		if (keyfile == NULL && i == 0) {
1260 			/*
1261 			 * If there is only one keyfile, allow simpler name.
1262 			 */
1263 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
1264 			keyfile = preload_search_by_type(name);
1265 		}
1266 		if (keyfile == NULL)
1267 			return (i);	/* Return number of loaded keyfiles. */
1268 		data = preload_fetch_addr(keyfile);
1269 		if (data == NULL) {
1270 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
1271 			    name);
1272 			return (0);
1273 		}
1274 		size = preload_fetch_size(keyfile);
1275 		if (size == 0) {
1276 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
1277 			    name);
1278 			return (0);
1279 		}
1280 		file = preload_search_info(keyfile, MODINFO_NAME);
1281 		if (file == NULL) {
1282 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
1283 			    name);
1284 			return (0);
1285 		}
1286 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
1287 		    provider, name);
1288 		g_eli_crypto_hmac_update(ctx, data, size);
1289 	}
1290 }
1291 
1292 static void
1293 g_eli_keyfiles_clear(const char *provider)
1294 {
1295 	u_char *keyfile, *data;
1296 	char name[64];
1297 	size_t size;
1298 	int i;
1299 
1300 	for (i = 0; ; i++) {
1301 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1302 		keyfile = preload_search_by_type(name);
1303 		if (keyfile == NULL)
1304 			return;
1305 		data = preload_fetch_addr(keyfile);
1306 		size = preload_fetch_size(keyfile);
1307 		if (data != NULL && size != 0)
1308 			explicit_bzero(data, size);
1309 	}
1310 }
1311 
1312 /*
1313  * Tasting is only made on boot.
1314  * We detect providers which should be attached before root is mounted.
1315  */
1316 static struct g_geom *
1317 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
1318 {
1319 	struct g_eli_metadata md;
1320 	struct g_geom *gp;
1321 	struct hmac_ctx ctx;
1322 	char passphrase[256];
1323 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
1324 	u_int i, nkey, nkeyfiles, tries, showpass;
1325 	int error;
1326         struct keybuf *keybuf;
1327 
1328 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
1329 	g_topology_assert();
1330 
1331 	if (root_mounted() || g_eli_tries == 0)
1332 		return (NULL);
1333 
1334 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
1335 
1336 	error = g_eli_read_metadata(mp, pp, &md);
1337 	if (error != 0)
1338 		return (NULL);
1339 	gp = NULL;
1340 
1341 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
1342 		return (NULL);
1343 	if (md.md_version > G_ELI_VERSION) {
1344 		printf("geom_eli.ko module is too old to handle %s.\n",
1345 		    pp->name);
1346 		return (NULL);
1347 	}
1348 	if (md.md_provsize != pp->mediasize)
1349 		return (NULL);
1350 	/* Should we attach it on boot? */
1351 	if (!(md.md_flags & G_ELI_FLAG_BOOT) &&
1352 	    !(md.md_flags & G_ELI_FLAG_GELIBOOT))
1353 		return (NULL);
1354 	if (md.md_keys == 0x00) {
1355 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
1356 		return (NULL);
1357 	}
1358 	if (!eli_metadata_crypto_supported(&md)) {
1359 		G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n",
1360 		    pp->name);
1361 		return (NULL);
1362 	}
1363 	if (md.md_iterations == -1) {
1364 		/* If there is no passphrase, we try only once. */
1365 		tries = 1;
1366 	} else {
1367 		/* Ask for the passphrase no more than g_eli_tries times. */
1368 		tries = g_eli_tries;
1369 	}
1370 
1371         if ((keybuf = get_keybuf()) != NULL) {
1372                 /* Scan the key buffer, try all GELI keys. */
1373                 for (i = 0; i < keybuf->kb_nents; i++) {
1374                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
1375                                  memcpy(key, keybuf->kb_ents[i].ke_data,
1376                                      sizeof(key));
1377 
1378                                  if (g_eli_mkey_decrypt_any(&md, key,
1379                                      mkey, &nkey) == 0 ) {
1380                                          explicit_bzero(key, sizeof(key));
1381                                          goto have_key;
1382                                  }
1383                          }
1384                 }
1385         }
1386 
1387         for (i = 0; i <= tries; i++) {
1388                 g_eli_crypto_hmac_init(&ctx, NULL, 0);
1389 
1390                 /*
1391                  * Load all key files.
1392                  */
1393                 nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
1394 
1395                 if (nkeyfiles == 0 && md.md_iterations == -1) {
1396                         /*
1397                          * No key files and no passphrase, something is
1398                          * definitely wrong here.
1399                          * geli(8) doesn't allow for such situation, so assume
1400                          * that there was really no passphrase and in that case
1401                          * key files are no properly defined in loader.conf.
1402                          */
1403                         G_ELI_DEBUG(0,
1404                             "Found no key files in loader.conf for %s.",
1405                             pp->name);
1406                         return (NULL);
1407                 }
1408 
1409                 /* Ask for the passphrase if defined. */
1410                 if (md.md_iterations >= 0) {
1411                         /* Try first with cached passphrase. */
1412                         if (i == 0) {
1413                                 if (!g_eli_boot_passcache)
1414                                         continue;
1415                                 memcpy(passphrase, cached_passphrase,
1416                                     sizeof(passphrase));
1417                         } else {
1418                                 printf("Enter passphrase for %s: ", pp->name);
1419 				showpass = g_eli_visible_passphrase;
1420 				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
1421 					showpass = GETS_ECHOPASS;
1422                                 cngets(passphrase, sizeof(passphrase),
1423 				    showpass);
1424                                 memcpy(cached_passphrase, passphrase,
1425                                     sizeof(passphrase));
1426                         }
1427                 }
1428 
1429                 /*
1430                  * Prepare Derived-Key from the user passphrase.
1431                  */
1432                 if (md.md_iterations == 0) {
1433                         g_eli_crypto_hmac_update(&ctx, md.md_salt,
1434                             sizeof(md.md_salt));
1435                         g_eli_crypto_hmac_update(&ctx, passphrase,
1436                             strlen(passphrase));
1437                         explicit_bzero(passphrase, sizeof(passphrase));
1438                 } else if (md.md_iterations > 0) {
1439                         u_char dkey[G_ELI_USERKEYLEN];
1440 
1441                         pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
1442                             sizeof(md.md_salt), passphrase, md.md_iterations);
1443                         explicit_bzero(passphrase, sizeof(passphrase));
1444                         g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
1445                         explicit_bzero(dkey, sizeof(dkey));
1446                 }
1447 
1448                 g_eli_crypto_hmac_final(&ctx, key, 0);
1449 
1450                 /*
1451                  * Decrypt Master-Key.
1452                  */
1453                 error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
1454                 explicit_bzero(key, sizeof(key));
1455                 if (error == -1) {
1456                         if (i == tries) {
1457                                 G_ELI_DEBUG(0,
1458                                     "Wrong key for %s. No tries left.",
1459                                     pp->name);
1460                                 g_eli_keyfiles_clear(pp->name);
1461                                 return (NULL);
1462                         }
1463                         if (i > 0) {
1464                                 G_ELI_DEBUG(0,
1465                                     "Wrong key for %s. Tries left: %u.",
1466                                     pp->name, tries - i);
1467                         }
1468                         /* Try again. */
1469                         continue;
1470                 } else if (error > 0) {
1471                         G_ELI_DEBUG(0,
1472                             "Cannot decrypt Master Key for %s (error=%d).",
1473                             pp->name, error);
1474                         g_eli_keyfiles_clear(pp->name);
1475                         return (NULL);
1476                 }
1477                 g_eli_keyfiles_clear(pp->name);
1478                 G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
1479                 break;
1480         }
1481 have_key:
1482 
1483 	/*
1484 	 * We have correct key, let's attach provider.
1485 	 */
1486 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
1487 	explicit_bzero(mkey, sizeof(mkey));
1488 	explicit_bzero(&md, sizeof(md));
1489 	if (gp == NULL) {
1490 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
1491 		    G_ELI_SUFFIX);
1492 		return (NULL);
1493 	}
1494 	return (gp);
1495 }
1496 
1497 static void
1498 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1499     struct g_consumer *cp, struct g_provider *pp)
1500 {
1501 	struct g_eli_softc *sc;
1502 
1503 	g_topology_assert();
1504 	sc = gp->softc;
1505 	if (sc == NULL)
1506 		return;
1507 	if (pp != NULL || cp != NULL)
1508 		return;	/* Nothing here. */
1509 
1510 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
1511 	    (uintmax_t)sc->sc_ekeys_total);
1512 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
1513 	    (uintmax_t)sc->sc_ekeys_allocated);
1514 	sbuf_printf(sb, "%s<Flags>", indent);
1515 	if (sc->sc_flags == 0)
1516 		sbuf_cat(sb, "NONE");
1517 	else {
1518 		int first = 1;
1519 
1520 #define ADD_FLAG(flag, name)	do {					\
1521 	if (sc->sc_flags & (flag)) {					\
1522 		if (!first)						\
1523 			sbuf_cat(sb, ", ");				\
1524 		else							\
1525 			first = 0;					\
1526 		sbuf_cat(sb, name);					\
1527 	}								\
1528 } while (0)
1529 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
1530 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
1531 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
1532 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
1533 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
1534 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
1535 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
1536 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
1537 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
1538 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
1539 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
1540 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
1541 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
1542 		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
1543 		ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE");
1544 #undef  ADD_FLAG
1545 	}
1546 	sbuf_cat(sb, "</Flags>\n");
1547 
1548 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
1549 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
1550 		    sc->sc_nkey);
1551 	}
1552 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
1553 	sbuf_printf(sb, "%s<Crypto>", indent);
1554 	switch (sc->sc_crypto) {
1555 	case G_ELI_CRYPTO_HW:
1556 		sbuf_cat(sb, "hardware");
1557 		break;
1558 	case G_ELI_CRYPTO_SW:
1559 		sbuf_cat(sb, "software");
1560 		break;
1561 	case G_ELI_CRYPTO_SW_ACCEL:
1562 		sbuf_cat(sb, "accelerated software");
1563 		break;
1564 	default:
1565 		sbuf_cat(sb, "UNKNOWN");
1566 		break;
1567 	}
1568 	sbuf_cat(sb, "</Crypto>\n");
1569 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
1570 		sbuf_printf(sb,
1571 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
1572 		    indent, g_eli_algo2str(sc->sc_aalgo));
1573 	}
1574 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
1575 	    sc->sc_ekeylen);
1576 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
1577 	    indent, g_eli_algo2str(sc->sc_ealgo));
1578 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
1579 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
1580 }
1581 
1582 static void
1583 g_eli_shutdown_pre_sync(void *arg, int howto)
1584 {
1585 	struct g_class *mp;
1586 	struct g_geom *gp, *gp2;
1587 	struct g_provider *pp;
1588 	struct g_eli_softc *sc;
1589 
1590 	mp = arg;
1591 	g_topology_lock();
1592 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
1593 		sc = gp->softc;
1594 		if (sc == NULL)
1595 			continue;
1596 		pp = LIST_FIRST(&gp->provider);
1597 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
1598 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 ||
1599 		    SCHEDULER_STOPPED())
1600 		{
1601 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1602 			gp->access = g_eli_access;
1603 		} else {
1604 			(void) g_eli_destroy(sc, TRUE);
1605 		}
1606 	}
1607 	g_topology_unlock();
1608 }
1609 
1610 static void
1611 g_eli_init(struct g_class *mp)
1612 {
1613 
1614 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1615 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
1616 	if (g_eli_pre_sync == NULL)
1617 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
1618 }
1619 
1620 static void
1621 g_eli_fini(struct g_class *mp)
1622 {
1623 
1624 	if (g_eli_pre_sync != NULL)
1625 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
1626 }
1627 
1628 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
1629 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
1630 MODULE_VERSION(geom_eli, 0);
1631