xref: /freebsd/sys/geom/eli/g_eli.c (revision 7dfd9569a2f0637fb9a48157b1c1bfe5709faee3)
1 /*-
2  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/module.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/bio.h>
38 #include <sys/sysctl.h>
39 #include <sys/malloc.h>
40 #include <sys/kthread.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/uio.h>
45 #include <sys/vnode.h>
46 
47 #include <vm/uma.h>
48 
49 #include <geom/geom.h>
50 #include <geom/eli/g_eli.h>
51 #include <geom/eli/pkcs5v2.h>
52 
53 
54 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
55 
56 SYSCTL_DECL(_kern_geom);
57 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
58 u_int g_eli_debug = 0;
59 TUNABLE_INT("kern.geom.eli.debug", &g_eli_debug);
60 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RW, &g_eli_debug, 0,
61     "Debug level");
62 static u_int g_eli_tries = 3;
63 TUNABLE_INT("kern.geom.eli.tries", &g_eli_tries);
64 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RW, &g_eli_tries, 0,
65     "Number of tries for entering the passphrase");
66 static u_int g_eli_visible_passphrase = 0;
67 TUNABLE_INT("kern.geom.eli.visible_passphrase", &g_eli_visible_passphrase);
68 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RW,
69     &g_eli_visible_passphrase, 0,
70     "Turn on echo when entering the passphrase (for debug purposes only!!)");
71 u_int g_eli_overwrites = 5;
72 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RW, &g_eli_overwrites,
73     0, "Number of times on-disk keys should be overwritten when destroying them");
74 static u_int g_eli_threads = 0;
75 TUNABLE_INT("kern.geom.eli.threads", &g_eli_threads);
76 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RW, &g_eli_threads, 0,
77     "Number of threads doing crypto work");
78 
79 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
80     struct g_geom *gp);
81 static void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
82 
83 static g_taste_t g_eli_taste;
84 static g_dumpconf_t g_eli_dumpconf;
85 
86 struct g_class g_eli_class = {
87 	.name = G_ELI_CLASS_NAME,
88 	.version = G_VERSION,
89 	.ctlreq = g_eli_config,
90 	.taste = g_eli_taste,
91 	.destroy_geom = g_eli_destroy_geom
92 };
93 
94 
95 /*
96  * Code paths:
97  * BIO_READ:
98  *	g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
99  * BIO_WRITE:
100  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
101  */
102 
103 
104 /*
105  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
106  * accelerator or something like this.
107  * The function updates the SID and rerun the operation.
108  */
109 static int
110 g_eli_crypto_rerun(struct cryptop *crp)
111 {
112 	struct g_eli_softc *sc;
113 	struct g_eli_worker *wr;
114 	struct bio *bp;
115 	int error;
116 
117 	bp = (struct bio *)crp->crp_opaque;
118 	sc = bp->bio_to->geom->softc;
119 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
120 		if (wr->w_number == bp->bio_pflags)
121 			break;
122 	}
123 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
124 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %ju -> %ju).",
125 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid,
126 	    (uintmax_t)crp->crp_sid);
127 	wr->w_sid = crp->crp_sid;
128 	crp->crp_etype = 0;
129 	error = crypto_dispatch(crp);
130 	if (error == 0)
131 		return (0);
132 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
133 	crp->crp_etype = error;
134 	return (error);
135 }
136 
137 /*
138  * The function is called afer reading encrypted data from the provider.
139  *
140  * g_eli_start -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
141  */
142 static void
143 g_eli_read_done(struct bio *bp)
144 {
145 	struct g_eli_softc *sc;
146 	struct bio *pbp;
147 
148 	G_ELI_LOGREQ(2, bp, "Request done.");
149 	pbp = bp->bio_parent;
150 	if (pbp->bio_error == 0)
151 		pbp->bio_error = bp->bio_error;
152 	g_destroy_bio(bp);
153 	if (pbp->bio_error != 0) {
154 		G_ELI_LOGREQ(0, pbp, "%s() failed", __func__);
155 		pbp->bio_completed = 0;
156 		g_io_deliver(pbp, pbp->bio_error);
157 		return;
158 	}
159 	sc = pbp->bio_to->geom->softc;
160 	mtx_lock(&sc->sc_queue_mtx);
161 	bioq_insert_tail(&sc->sc_queue, pbp);
162 	mtx_unlock(&sc->sc_queue_mtx);
163 	wakeup(sc);
164 }
165 
166 /*
167  * The function is called after we read and decrypt data.
168  *
169  * g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
170  */
171 static int
172 g_eli_crypto_read_done(struct cryptop *crp)
173 {
174 	struct bio *bp;
175 
176 	if (crp->crp_etype == EAGAIN) {
177 		if (g_eli_crypto_rerun(crp) == 0)
178 			return (0);
179 	}
180 	bp = (struct bio *)crp->crp_opaque;
181 	bp->bio_inbed++;
182 	if (crp->crp_etype == 0) {
183 		G_ELI_DEBUG(3, "Crypto READ request done (%d/%d).",
184 		    bp->bio_inbed, bp->bio_children);
185 		bp->bio_completed += crp->crp_olen;
186 	} else {
187 		G_ELI_DEBUG(1, "Crypto READ request failed (%d/%d) error=%d.",
188 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
189 		if (bp->bio_error == 0)
190 			bp->bio_error = crp->crp_etype;
191 	}
192 	/*
193 	 * Do we have all sectors already?
194 	 */
195 	if (bp->bio_inbed < bp->bio_children)
196 		return (0);
197 	free(bp->bio_driver2, M_ELI);
198 	bp->bio_driver2 = NULL;
199 	if (bp->bio_error != 0) {
200 		G_ELI_LOGREQ(0, bp, "Crypto READ request failed (error=%d).",
201 		    bp->bio_error);
202 		bp->bio_completed = 0;
203 	}
204 	/*
205 	 * Read is finished, send it up.
206 	 */
207 	g_io_deliver(bp, bp->bio_error);
208 	return (0);
209 }
210 
211 /*
212  * The function is called after we encrypt and write data.
213  *
214  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
215  */
216 static void
217 g_eli_write_done(struct bio *bp)
218 {
219 	struct bio *pbp;
220 
221 	G_ELI_LOGREQ(2, bp, "Request done.");
222 	pbp = bp->bio_parent;
223 	if (pbp->bio_error == 0)
224 		pbp->bio_error = bp->bio_error;
225 	free(pbp->bio_driver2, M_ELI);
226 	pbp->bio_driver2 = NULL;
227 	if (pbp->bio_error == 0)
228 		pbp->bio_completed = pbp->bio_length;
229 	else {
230 		G_ELI_LOGREQ(0, pbp, "Crypto WRITE request failed (error=%d).",
231 		    pbp->bio_error);
232 		pbp->bio_completed = 0;
233 	}
234 	g_destroy_bio(bp);
235 	/*
236 	 * Write is finished, send it up.
237 	 */
238 	g_io_deliver(pbp, pbp->bio_error);
239 }
240 
241 /*
242  * The function is called after data encryption.
243  *
244  * g_eli_start -> g_eli_crypto_run -> G_ELI_CRYPTO_WRITE_DONE -> g_io_request -> g_eli_write_done -> g_io_deliver
245  */
246 static int
247 g_eli_crypto_write_done(struct cryptop *crp)
248 {
249 	struct g_geom *gp;
250 	struct g_consumer *cp;
251 	struct bio *bp, *cbp;
252 
253 	if (crp->crp_etype == EAGAIN) {
254 		if (g_eli_crypto_rerun(crp) == 0)
255 			return (0);
256 	}
257 	bp = (struct bio *)crp->crp_opaque;
258 	bp->bio_inbed++;
259 	if (crp->crp_etype == 0) {
260 		G_ELI_DEBUG(3, "Crypto WRITE request done (%d/%d).",
261 		    bp->bio_inbed, bp->bio_children);
262 	} else {
263 		G_ELI_DEBUG(1, "Crypto WRITE request failed (%d/%d) error=%d.",
264 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
265 		if (bp->bio_error == 0)
266 			bp->bio_error = crp->crp_etype;
267 	}
268 	/*
269 	 * All sectors are already encrypted?
270 	 */
271 	if (bp->bio_inbed < bp->bio_children)
272 		return (0);
273 	bp->bio_inbed = 0;
274 	bp->bio_children = 1;
275 	cbp = bp->bio_driver1;
276 	bp->bio_driver1 = NULL;
277 	if (bp->bio_error != 0) {
278 		G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
279 		    bp->bio_error);
280 		free(bp->bio_driver2, M_ELI);
281 		bp->bio_driver2 = NULL;
282 		g_destroy_bio(cbp);
283 		g_io_deliver(bp, bp->bio_error);
284 		return (0);
285 	}
286 	cbp->bio_data = bp->bio_driver2;
287 	cbp->bio_done = g_eli_write_done;
288 	gp = bp->bio_to->geom;
289 	cp = LIST_FIRST(&gp->consumer);
290 	cbp->bio_to = cp->provider;
291 	G_ELI_LOGREQ(2, cbp, "Sending request.");
292 	/*
293 	 * Send encrypted data to the provider.
294 	 */
295 	g_io_request(cbp, cp);
296 	return (0);
297 }
298 
299 /*
300  * This function should never be called, but GEOM made as it set ->orphan()
301  * method for every geom.
302  */
303 static void
304 g_eli_orphan_spoil_assert(struct g_consumer *cp)
305 {
306 
307 	panic("Function %s() called for %s.", __func__, cp->geom->name);
308 }
309 
310 static void
311 g_eli_orphan(struct g_consumer *cp)
312 {
313 	struct g_eli_softc *sc;
314 
315 	g_topology_assert();
316 	sc = cp->geom->softc;
317 	if (sc == NULL)
318 		return;
319 	g_eli_destroy(sc, 1);
320 }
321 
322 /*
323  * BIO_READ : G_ELI_START -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
324  * BIO_WRITE: G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
325  */
326 static void
327 g_eli_start(struct bio *bp)
328 {
329 	struct g_eli_softc *sc;
330 	struct g_consumer *cp;
331 	struct bio *cbp;
332 
333 	sc = bp->bio_to->geom->softc;
334 	KASSERT(sc != NULL,
335 	    ("Provider's error should be set (error=%d)(device=%s).",
336 	    bp->bio_to->error, bp->bio_to->name));
337 	G_ELI_LOGREQ(2, bp, "Request received.");
338 
339 	switch (bp->bio_cmd) {
340 	case BIO_READ:
341 	case BIO_WRITE:
342 	case BIO_GETATTR:
343 		break;
344 	case BIO_DELETE:
345 		/*
346 		 * We could eventually support BIO_DELETE request.
347 		 * It could be done by overwritting requested sector with
348 		 * random data g_eli_overwrites number of times.
349 		 */
350 	default:
351 		g_io_deliver(bp, EOPNOTSUPP);
352 		return;
353 	}
354 	cbp = g_clone_bio(bp);
355 	if (cbp == NULL) {
356 		g_io_deliver(bp, ENOMEM);
357 		return;
358 	}
359 	switch (bp->bio_cmd) {
360 	case BIO_READ:
361 		cbp->bio_done = g_eli_read_done;
362 		cp = LIST_FIRST(&sc->sc_geom->consumer);
363 		cbp->bio_to = cp->provider;
364 		G_ELI_LOGREQ(2, cbp, "Sending request.");
365 		/*
366 		 * Read encrypted data from provider.
367 		 */
368 		g_io_request(cbp, cp);
369 		break;
370 	case BIO_WRITE:
371 		bp->bio_driver1 = cbp;
372 		mtx_lock(&sc->sc_queue_mtx);
373 		bioq_insert_tail(&sc->sc_queue, bp);
374 		mtx_unlock(&sc->sc_queue_mtx);
375 		wakeup(sc);
376 		break;
377 	case BIO_GETATTR:
378 		cbp->bio_done = g_std_done;
379 		cp = LIST_FIRST(&sc->sc_geom->consumer);
380 		cbp->bio_to = cp->provider;
381 		G_ELI_LOGREQ(2, cbp, "Sending request.");
382 		g_io_request(cbp, cp);
383 		break;
384 	}
385 }
386 
387 /*
388  * This is the main function for kernel worker thread when we don't have
389  * hardware acceleration and we have to do cryptography in software.
390  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
391  * threads with crypto work.
392  */
393 static void
394 g_eli_worker(void *arg)
395 {
396 	struct g_eli_softc *sc;
397 	struct g_eli_worker *wr;
398 	struct bio *bp;
399 
400 	wr = arg;
401 	sc = wr->w_softc;
402 	mtx_lock_spin(&sched_lock);
403 	sched_prio(curthread, PRIBIO);
404 	if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0)
405 		sched_bind(curthread, wr->w_number);
406 	mtx_unlock_spin(&sched_lock);
407 
408 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
409 
410 	for (;;) {
411 		mtx_lock(&sc->sc_queue_mtx);
412 		bp = bioq_takefirst(&sc->sc_queue);
413 		if (bp == NULL) {
414 			if ((sc->sc_flags & G_ELI_FLAG_DESTROY) != 0) {
415 				LIST_REMOVE(wr, w_next);
416 				crypto_freesession(wr->w_sid);
417 				free(wr, M_ELI);
418 				G_ELI_DEBUG(1, "Thread %s exiting.",
419 				    curthread->td_proc->p_comm);
420 				wakeup(&sc->sc_workers);
421 				mtx_unlock(&sc->sc_queue_mtx);
422 				kthread_exit(0);
423 			}
424 			msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
425 			    "geli:w", 0);
426 			continue;
427 		}
428 		mtx_unlock(&sc->sc_queue_mtx);
429 		g_eli_crypto_run(wr, bp);
430 	}
431 }
432 
433 /*
434  * Here we generate IV. It is unique for every sector.
435  */
436 static void
437 g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
438     size_t size)
439 {
440 	u_char hash[SHA256_DIGEST_LENGTH];
441 	SHA256_CTX ctx;
442 
443 	/* Copy precalculated SHA256 context for IV-Key. */
444 	bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx));
445 	SHA256_Update(&ctx, (uint8_t *)&offset, sizeof(offset));
446 	SHA256_Final(hash, &ctx);
447 	bcopy(hash, iv, size);
448 }
449 
450 /*
451  * This is the main function responsible for cryptography (ie. communication
452  * with crypto(9) subsystem).
453  */
454 static void
455 g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp)
456 {
457 	struct g_eli_softc *sc;
458 	struct cryptop *crp;
459 	struct cryptodesc *crd;
460 	struct uio *uio;
461 	struct iovec *iov;
462 	u_int i, nsec, add, secsize;
463 	int err, error;
464 	size_t size;
465 	u_char *p, *data;
466 
467 	G_ELI_LOGREQ(3, bp, "%s", __func__);
468 
469 	bp->bio_pflags = wr->w_number;
470 	sc = wr->w_softc;
471 	secsize = LIST_FIRST(&sc->sc_geom->provider)->sectorsize;
472 	nsec = bp->bio_length / secsize;
473 
474 	/*
475 	 * Calculate how much memory do we need.
476 	 * We need separate crypto operation for every single sector.
477 	 * It is much faster to calculate total amount of needed memory here and
478 	 * do the allocation once instead of allocating memory in pieces (many,
479 	 * many pieces).
480 	 */
481 	size = sizeof(*crp) * nsec;
482 	size += sizeof(*crd) * nsec;
483 	size += sizeof(*uio) * nsec;
484 	size += sizeof(*iov) * nsec;
485 	/*
486 	 * If we write the data we cannot destroy current bio_data content,
487 	 * so we need to allocate more memory for encrypted data.
488 	 */
489 	if (bp->bio_cmd == BIO_WRITE)
490 		size += bp->bio_length;
491 	p = malloc(size, M_ELI, M_WAITOK);
492 
493 	bp->bio_inbed = 0;
494 	bp->bio_children = nsec;
495 	bp->bio_driver2 = p;
496 
497 	if (bp->bio_cmd == BIO_READ)
498 		data = bp->bio_data;
499 	else {
500 		data = p;
501 		p += bp->bio_length;
502 		bcopy(bp->bio_data, data, bp->bio_length);
503 	}
504 
505 	error = 0;
506 	for (i = 0, add = 0; i < nsec; i++, add += secsize) {
507 		crp = (struct cryptop *)p;	p += sizeof(*crp);
508 		crd = (struct cryptodesc *)p;	p += sizeof(*crd);
509 		uio = (struct uio *)p;		p += sizeof(*uio);
510 		iov = (struct iovec *)p;	p += sizeof(*iov);
511 
512 		iov->iov_len = secsize;
513 		iov->iov_base = data;
514 		data += secsize;
515 
516 		uio->uio_iov = iov;
517 		uio->uio_iovcnt = 1;
518 		uio->uio_segflg = UIO_SYSSPACE;
519 		uio->uio_resid = secsize;
520 
521 		crp->crp_sid = wr->w_sid;
522 		crp->crp_ilen = secsize;
523 		crp->crp_olen = secsize;
524 		crp->crp_opaque = (void *)bp;
525 		crp->crp_buf = (void *)uio;
526 		if (bp->bio_cmd == BIO_WRITE)
527 			crp->crp_callback = g_eli_crypto_write_done;
528 		else /* if (bp->bio_cmd == BIO_READ) */
529 			crp->crp_callback = g_eli_crypto_read_done;
530 		crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL;
531 		crp->crp_desc = crd;
532 
533 		crd->crd_skip = 0;
534 		crd->crd_len = secsize;
535 		crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
536 		if (bp->bio_cmd == BIO_WRITE)
537 			crd->crd_flags |= CRD_F_ENCRYPT;
538 		crd->crd_alg = sc->sc_algo;
539 		crd->crd_key = sc->sc_datakey;
540 		crd->crd_klen = sc->sc_keylen;
541 		g_eli_crypto_ivgen(sc, bp->bio_offset + add, crd->crd_iv,
542 		    sizeof(crd->crd_iv));
543 		crd->crd_next = NULL;
544 
545 		crp->crp_etype = 0;
546 		err = crypto_dispatch(crp);
547 		if (error == 0)
548 			error = err;
549 	}
550 	if (bp->bio_error == 0)
551 		bp->bio_error = error;
552 }
553 
554 int
555 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
556     struct g_eli_metadata *md)
557 {
558 	struct g_geom *gp;
559 	struct g_consumer *cp;
560 	u_char *buf = NULL;
561 	int error;
562 
563 	g_topology_assert();
564 
565 	gp = g_new_geomf(mp, "eli:taste");
566 	gp->start = g_eli_start;
567 	gp->access = g_std_access;
568 	/*
569 	 * g_eli_read_metadata() is always called from the event thread.
570 	 * Our geom is created and destroyed in the same event, so there
571 	 * could be no orphan nor spoil event in the meantime.
572 	 */
573 	gp->orphan = g_eli_orphan_spoil_assert;
574 	gp->spoiled = g_eli_orphan_spoil_assert;
575 	cp = g_new_consumer(gp);
576 	error = g_attach(cp, pp);
577 	if (error != 0)
578 		goto end;
579 	error = g_access(cp, 1, 0, 0);
580 	if (error != 0)
581 		goto end;
582 	g_topology_unlock();
583 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
584 	    &error);
585 	g_topology_lock();
586 	if (buf == NULL)
587 		goto end;
588 	eli_metadata_decode(buf, md);
589 end:
590 	if (buf != NULL)
591 		g_free(buf);
592 	if (cp->provider != NULL) {
593 		if (cp->acr == 1)
594 			g_access(cp, -1, 0, 0);
595 		g_detach(cp);
596 	}
597 	g_destroy_consumer(cp);
598 	g_destroy_geom(gp);
599 	return (error);
600 }
601 
602 /*
603  * The function is called when we had last close on provider and user requested
604  * to close it when this situation occur.
605  */
606 static void
607 g_eli_last_close(struct g_eli_softc *sc)
608 {
609 	struct g_geom *gp;
610 	struct g_provider *pp;
611 	char ppname[64];
612 	int error;
613 
614 	g_topology_assert();
615 	gp = sc->sc_geom;
616 	pp = LIST_FIRST(&gp->provider);
617 	strlcpy(ppname, pp->name, sizeof(ppname));
618 	error = g_eli_destroy(sc, 1);
619 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
620 	    ppname, error));
621 	G_ELI_DEBUG(0, "Detached %s on last close.", ppname);
622 }
623 
624 int
625 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
626 {
627 	struct g_eli_softc *sc;
628 	struct g_geom *gp;
629 
630 	gp = pp->geom;
631 	sc = gp->softc;
632 
633 	if (dw > 0) {
634 		/* Someone is opening us for write, we need to remember that. */
635 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
636 		return (0);
637 	}
638 	/* Is this the last close? */
639 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
640 		return (0);
641 
642 	/*
643 	 * Automatically detach on last close if requested.
644 	 */
645 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
646 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
647 		g_eli_last_close(sc);
648 	}
649 	return (0);
650 }
651 
652 struct g_geom *
653 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
654     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
655 {
656 	struct g_eli_softc *sc;
657 	struct g_eli_worker *wr;
658 	struct g_geom *gp;
659 	struct g_provider *pp;
660 	struct g_consumer *cp;
661 	struct cryptoini cri;
662 	u_int i, threads;
663 	int error;
664 
665 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
666 
667 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
668 	gp->softc = NULL;	/* for a moment */
669 
670 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
671 	gp->start = g_eli_start;
672 	/*
673 	 * Spoiling cannot happen actually, because we keep provider open for
674 	 * writing all the time.
675 	 */
676 	gp->spoiled = g_eli_orphan_spoil_assert;
677 	gp->orphan = g_eli_orphan;
678 	/*
679 	 * If detach-on-last-close feature is not enabled, we can simply use
680 	 * g_std_access().
681 	 */
682 	if (md->md_flags & G_ELI_FLAG_WO_DETACH)
683 		gp->access = g_eli_access;
684 	else
685 		gp->access = g_std_access;
686 	gp->dumpconf = g_eli_dumpconf;
687 
688 	sc->sc_crypto = G_ELI_CRYPTO_SW;
689 	sc->sc_flags = md->md_flags;
690 	sc->sc_algo = md->md_algo;
691 	sc->sc_nkey = nkey;
692 	/*
693 	 * Remember the keys in our softc structure.
694 	 */
695 	bcopy(mkey, sc->sc_ivkey, sizeof(sc->sc_ivkey));
696 	mkey += sizeof(sc->sc_ivkey);
697 	bcopy(mkey, sc->sc_datakey, sizeof(sc->sc_datakey));
698 	sc->sc_keylen = md->md_keylen;
699 
700 	/*
701 	 * Precalculate SHA256 for IV generation.
702 	 * This is expensive operation and we can do it only once now or for
703 	 * every access to sector, so now will be much better.
704 	 */
705 	SHA256_Init(&sc->sc_ivctx);
706 	SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey, sizeof(sc->sc_ivkey));
707 
708 	gp->softc = sc;
709 	sc->sc_geom = gp;
710 
711 	bioq_init(&sc->sc_queue);
712 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
713 
714 	pp = NULL;
715 	cp = g_new_consumer(gp);
716 	error = g_attach(cp, bpp);
717 	if (error != 0) {
718 		if (req != NULL) {
719 			gctl_error(req, "Cannot attach to %s (error=%d).",
720 			    bpp->name, error);
721 		} else {
722 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
723 			    bpp->name, error);
724 		}
725 		goto failed;
726 	}
727 	/*
728 	 * Keep provider open all the time, so we can run critical tasks,
729 	 * like Master Keys deletion, without wondering if we can open
730 	 * provider or not.
731 	 */
732 	error = g_access(cp, 1, 1, 1);
733 	if (error != 0) {
734 		if (req != NULL) {
735 			gctl_error(req, "Cannot access %s (error=%d).",
736 			    bpp->name, error);
737 		} else {
738 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
739 			    bpp->name, error);
740 		}
741 		goto failed;
742 	}
743 
744 	LIST_INIT(&sc->sc_workers);
745 
746 	bzero(&cri, sizeof(cri));
747 	cri.cri_alg = sc->sc_algo;
748 	cri.cri_klen = sc->sc_keylen;
749 	cri.cri_key = sc->sc_datakey;
750 
751 	threads = g_eli_threads;
752 	if (threads == 0)
753 		threads = mp_ncpus;
754 	else if (threads > mp_ncpus) {
755 		/* There is really no need for too many worker threads. */
756 		threads = mp_ncpus;
757 		G_ELI_DEBUG(0, "Reducing number of threads to %u.", threads);
758 	}
759 	for (i = 0; i < threads; i++) {
760 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
761 		wr->w_softc = sc;
762 		wr->w_number = i;
763 
764 		/*
765 		 * If this is the first pass, try to get hardware support.
766 		 * Use software cryptography, if we cannot get it.
767 		 */
768 		if (i == 0) {
769 			error = crypto_newsession(&wr->w_sid, &cri, 1);
770 			if (error == 0)
771 				sc->sc_crypto = G_ELI_CRYPTO_HW;
772 		}
773 		if (sc->sc_crypto == G_ELI_CRYPTO_SW)
774 			error = crypto_newsession(&wr->w_sid, &cri, 0);
775 		if (error != 0) {
776 			free(wr, M_ELI);
777 			if (req != NULL) {
778 				gctl_error(req, "Cannot set up crypto session "
779 				    "for %s (error=%d).", bpp->name, error);
780 			} else {
781 				G_ELI_DEBUG(1, "Cannot set up crypto session "
782 				    "for %s (error=%d).", bpp->name, error);
783 			}
784 			goto failed;
785 		}
786 
787 		error = kthread_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
788 		    "g_eli[%u] %s", i, bpp->name);
789 		if (error != 0) {
790 			crypto_freesession(wr->w_sid);
791 			free(wr, M_ELI);
792 			if (req != NULL) {
793 				gctl_error(req, "Cannot create kernel thread "
794 				    "for %s (error=%d).", bpp->name, error);
795 			} else {
796 				G_ELI_DEBUG(1, "Cannot create kernel thread "
797 				    "for %s (error=%d).", bpp->name, error);
798 			}
799 			goto failed;
800 		}
801 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
802 		/* If we have hardware support, one thread is enough. */
803 		if (sc->sc_crypto == G_ELI_CRYPTO_HW)
804 			break;
805 	}
806 
807 	/*
808 	 * Create decrypted provider.
809 	 */
810 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
811 	pp->sectorsize = md->md_sectorsize;
812 	pp->mediasize = bpp->mediasize;
813 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0)
814 		pp->mediasize -= bpp->sectorsize;
815 	pp->mediasize -= (pp->mediasize % pp->sectorsize);
816 	g_error_provider(pp, 0);
817 
818 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
819 	G_ELI_DEBUG(0, "    Cipher: %s", g_eli_algo2str(sc->sc_algo));
820 	G_ELI_DEBUG(0, "Key length: %u", sc->sc_keylen);
821 	G_ELI_DEBUG(0, "    Crypto: %s",
822 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
823 	return (gp);
824 failed:
825 	mtx_lock(&sc->sc_queue_mtx);
826 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
827 	wakeup(sc);
828 	/*
829 	 * Wait for kernel threads self destruction.
830 	 */
831 	while (!LIST_EMPTY(&sc->sc_workers)) {
832 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
833 		    "geli:destroy", 0);
834 	}
835 	mtx_destroy(&sc->sc_queue_mtx);
836 	if (cp->provider != NULL) {
837 		if (cp->acr == 1)
838 			g_access(cp, -1, -1, -1);
839 		g_detach(cp);
840 	}
841 	g_destroy_consumer(cp);
842 	g_destroy_geom(gp);
843 	bzero(sc, sizeof(*sc));
844 	free(sc, M_ELI);
845 	return (NULL);
846 }
847 
848 int
849 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
850 {
851 	struct g_geom *gp;
852 	struct g_provider *pp;
853 
854 	g_topology_assert();
855 
856 	if (sc == NULL)
857 		return (ENXIO);
858 
859 	gp = sc->sc_geom;
860 	pp = LIST_FIRST(&gp->provider);
861 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
862 		if (force) {
863 			G_ELI_DEBUG(1, "Device %s is still open, so it "
864 			    "cannot be definitely removed.", pp->name);
865 		} else {
866 			G_ELI_DEBUG(1,
867 			    "Device %s is still open (r%dw%de%d).", pp->name,
868 			    pp->acr, pp->acw, pp->ace);
869 			return (EBUSY);
870 		}
871 	}
872 
873 	mtx_lock(&sc->sc_queue_mtx);
874 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
875 	wakeup(sc);
876 	while (!LIST_EMPTY(&sc->sc_workers)) {
877 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
878 		    "geli:destroy", 0);
879 	}
880 	mtx_destroy(&sc->sc_queue_mtx);
881 	gp->softc = NULL;
882 	bzero(sc, sizeof(*sc));
883 	free(sc, M_ELI);
884 
885 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
886 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
887 	g_wither_geom_close(gp, ENXIO);
888 
889 	return (0);
890 }
891 
892 static int
893 g_eli_destroy_geom(struct gctl_req *req __unused,
894     struct g_class *mp __unused, struct g_geom *gp)
895 {
896 	struct g_eli_softc *sc;
897 
898 	sc = gp->softc;
899 	return (g_eli_destroy(sc, 0));
900 }
901 
902 static int
903 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
904 {
905 	u_char *keyfile, *data, *size;
906 	char *file, name[64];
907 	int i;
908 
909 	for (i = 0; ; i++) {
910 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
911 		keyfile = preload_search_by_type(name);
912 		if (keyfile == NULL)
913 			return (i);	/* Return number of loaded keyfiles. */
914 		data = preload_search_info(keyfile, MODINFO_ADDR);
915 		if (data == NULL) {
916 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
917 			    name);
918 			return (0);
919 		}
920 		data = *(void **)data;
921 		size = preload_search_info(keyfile, MODINFO_SIZE);
922 		if (size == NULL) {
923 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
924 			    name);
925 			return (0);
926 		}
927 		file = preload_search_info(keyfile, MODINFO_NAME);
928 		if (file == NULL) {
929 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
930 			    name);
931 			return (0);
932 		}
933 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
934 		    provider, name);
935 		g_eli_crypto_hmac_update(ctx, data, *(size_t *)size);
936 	}
937 }
938 
939 static void
940 g_eli_keyfiles_clear(const char *provider)
941 {
942 	u_char *keyfile, *data, *size;
943 	char name[64];
944 	int i;
945 
946 	for (i = 0; ; i++) {
947 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
948 		keyfile = preload_search_by_type(name);
949 		if (keyfile == NULL)
950 			return;
951 		data = preload_search_info(keyfile, MODINFO_ADDR);
952 		size = preload_search_info(keyfile, MODINFO_SIZE);
953 		if (data == NULL || size == NULL)
954 			continue;
955 		data = *(void **)data;
956 		bzero(data, *(size_t *)size);
957 	}
958 }
959 
960 /*
961  * Tasting is only made on boot.
962  * We detect providers which should be attached before root is mounted.
963  */
964 static struct g_geom *
965 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
966 {
967 	struct g_eli_metadata md;
968 	struct g_geom *gp;
969 	struct hmac_ctx ctx;
970 	char passphrase[256];
971 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
972 	u_int i, nkey, nkeyfiles, tries;
973 	int error;
974 
975 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
976 	g_topology_assert();
977 
978 	if (rootvnode != NULL || g_eli_tries == 0)
979 		return (NULL);
980 
981 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
982 
983 	error = g_eli_read_metadata(mp, pp, &md);
984 	if (error != 0)
985 		return (NULL);
986 	gp = NULL;
987 
988 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
989 		return (NULL);
990 	if (md.md_version > G_ELI_VERSION) {
991 		printf("geom_eli.ko module is too old to handle %s.\n",
992 		    pp->name);
993 		return (NULL);
994 	}
995 	if (md.md_provsize != pp->mediasize)
996 		return (NULL);
997 	/* Should we attach it on boot? */
998 	if ((md.md_flags & G_ELI_FLAG_BOOT) == 0)
999 		return (NULL);
1000 	if (md.md_keys == 0x00) {
1001 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
1002 		return (NULL);
1003 	}
1004 	if (md.md_iterations == -1) {
1005 		/* If there is no passphrase, we try only once. */
1006 		tries = 1;
1007 	} else {
1008 		/* Ask for the passphrase no more than g_eli_tries times. */
1009 		tries = g_eli_tries;
1010 	}
1011 
1012 	for (i = 0; i < tries; i++) {
1013 		g_eli_crypto_hmac_init(&ctx, NULL, 0);
1014 
1015 		/*
1016 		 * Load all key files.
1017 		 */
1018 		nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
1019 
1020 		if (nkeyfiles == 0 && md.md_iterations == -1) {
1021 			/*
1022 			 * No key files and no passphrase, something is
1023 			 * definitely wrong here.
1024 			 * geli(8) doesn't allow for such situation, so assume
1025 			 * that there was really no passphrase and in that case
1026 			 * key files are no properly defined in loader.conf.
1027 			 */
1028 			G_ELI_DEBUG(0,
1029 			    "Found no key files in loader.conf for %s.",
1030 			    pp->name);
1031 			return (NULL);
1032 		}
1033 
1034 		/* Ask for the passphrase if defined. */
1035 		if (md.md_iterations >= 0) {
1036 			printf("Enter passphrase for %s: ", pp->name);
1037 			gets(passphrase, sizeof(passphrase),
1038 			    g_eli_visible_passphrase);
1039 		}
1040 
1041 		/*
1042 		 * Prepare Derived-Key from the user passphrase.
1043 		 */
1044 		if (md.md_iterations == 0) {
1045 			g_eli_crypto_hmac_update(&ctx, md.md_salt,
1046 			    sizeof(md.md_salt));
1047 			g_eli_crypto_hmac_update(&ctx, passphrase,
1048 			    strlen(passphrase));
1049 		} else if (md.md_iterations > 0) {
1050 			u_char dkey[G_ELI_USERKEYLEN];
1051 
1052 			pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
1053 			    sizeof(md.md_salt), passphrase, md.md_iterations);
1054 			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
1055 			bzero(dkey, sizeof(dkey));
1056 		}
1057 
1058 		g_eli_crypto_hmac_final(&ctx, key, 0);
1059 
1060 		/*
1061 		 * Decrypt Master-Key.
1062 		 */
1063 		error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
1064 		bzero(key, sizeof(key));
1065 		if (error == -1) {
1066 			if (i == tries - 1) {
1067 				G_ELI_DEBUG(0,
1068 				    "Wrong key for %s. No tries left.",
1069 				    pp->name);
1070 				g_eli_keyfiles_clear(pp->name);
1071 				return (NULL);
1072 			}
1073 			G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.",
1074 			    pp->name, tries - i - 1);
1075 			/* Try again. */
1076 			continue;
1077 		} else if (error > 0) {
1078 			G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).",
1079 			    pp->name, error);
1080 			g_eli_keyfiles_clear(pp->name);
1081 			return (NULL);
1082 		}
1083 		G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
1084 		break;
1085 	}
1086 
1087 	/*
1088 	 * We have correct key, let's attach provider.
1089 	 */
1090 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
1091 	bzero(mkey, sizeof(mkey));
1092 	bzero(&md, sizeof(md));
1093 	if (gp == NULL) {
1094 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
1095 		    G_ELI_SUFFIX);
1096 		return (NULL);
1097 	}
1098 	return (gp);
1099 }
1100 
1101 static void
1102 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1103     struct g_consumer *cp, struct g_provider *pp)
1104 {
1105 	struct g_eli_softc *sc;
1106 
1107 	g_topology_assert();
1108 	sc = gp->softc;
1109 	if (sc == NULL)
1110 		return;
1111 	if (pp != NULL || cp != NULL)
1112 		return;	/* Nothing here. */
1113 	sbuf_printf(sb, "%s<Flags>", indent);
1114 	if (sc->sc_flags == 0)
1115 		sbuf_printf(sb, "NONE");
1116 	else {
1117 		int first = 1;
1118 
1119 #define ADD_FLAG(flag, name)	do {					\
1120 	if ((sc->sc_flags & (flag)) != 0) {				\
1121 		if (!first)						\
1122 			sbuf_printf(sb, ", ");				\
1123 		else							\
1124 			first = 0;					\
1125 		sbuf_printf(sb, name);					\
1126 	}								\
1127 } while (0)
1128 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
1129 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
1130 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
1131 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
1132 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
1133 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
1134 #undef  ADD_FLAG
1135 	}
1136 	sbuf_printf(sb, "</Flags>\n");
1137 
1138 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
1139 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
1140 		    sc->sc_nkey);
1141 	}
1142 	sbuf_printf(sb, "%s<Crypto>", indent);
1143 	switch (sc->sc_crypto) {
1144 	case G_ELI_CRYPTO_HW:
1145 		sbuf_printf(sb, "hardware");
1146 		break;
1147 	case G_ELI_CRYPTO_SW:
1148 		sbuf_printf(sb, "software");
1149 		break;
1150 	default:
1151 		sbuf_printf(sb, "UNKNOWN");
1152 		break;
1153 	}
1154 	sbuf_printf(sb, "</Crypto>\n");
1155 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent, sc->sc_keylen);
1156 	sbuf_printf(sb, "%s<Cipher>%s</Cipher>\n", indent,
1157 	    g_eli_algo2str(sc->sc_algo));
1158 }
1159 
1160 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
1161 MODULE_DEPEND(geom_eli, crypto, 1, 1, 1);
1162