xref: /freebsd/sys/crypto/ccp/ccp_hardware.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017 Chelsio Communications, Inc.
5  * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6  * All rights reserved.
7  * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include "opt_ddb.h"
33 
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/lock.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/module.h>
41 #include <sys/rman.h>
42 #include <sys/sglist.h>
43 #include <sys/sysctl.h>
44 
45 #ifdef DDB
46 #include <ddb/ddb.h>
47 #endif
48 
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51 
52 #include <machine/bus.h>
53 #include <machine/resource.h>
54 #include <machine/vmparam.h>
55 
56 #include <opencrypto/cryptodev.h>
57 #include <opencrypto/xform.h>
58 
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 
62 #include "cryptodev_if.h"
63 
64 #include "ccp.h"
65 #include "ccp_hardware.h"
66 #include "ccp_lsb.h"
67 
68 CTASSERT(sizeof(struct ccp_desc) == 32);
69 
70 static struct ccp_xts_unitsize_map_entry {
71 	enum ccp_xts_unitsize cxu_id;
72 	unsigned cxu_size;
73 } ccp_xts_unitsize_map[] = {
74 	{ CCP_XTS_AES_UNIT_SIZE_16, 16 },
75 	{ CCP_XTS_AES_UNIT_SIZE_512, 512 },
76 	{ CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
77 	{ CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
78 	{ CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
79 };
80 
81 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
82     "ccp node");
83 
84 unsigned g_ccp_ring_order = 11;
85 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
86     0, "Set CCP ring order.  (1 << this) == ring size.  Min: 6, Max: 16");
87 
88 /*
89  * Zero buffer, sufficient for padding LSB entries, that does not span a page
90  * boundary
91  */
92 static const char g_zeroes[32] __aligned(32);
93 
94 static inline uint32_t
95 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
96 {
97 	return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
98 }
99 
100 static inline void
101 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
102 {
103 	bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
104 }
105 
106 static inline uint32_t
107 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
108 {
109 	/*
110 	 * Each queue gets its own 4kB register space.  Queue 0 is at 0x1000.
111 	 */
112 	return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
113 }
114 
115 static inline void
116 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
117     uint32_t value)
118 {
119 	ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
120 }
121 
122 void
123 ccp_queue_write_tail(struct ccp_queue *qp)
124 {
125 	ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
126 	    ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
127 }
128 
129 /*
130  * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
131  * that entry for the queue's private LSB region.
132  */
133 static inline uint8_t
134 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
135 {
136 	return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
137 }
138 
139 /*
140  * Given a queue and a reserved LSB entry index, compute the LSB *address* of
141  * that entry for the queue's private LSB region.
142  */
143 static inline uint32_t
144 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
145 {
146 	return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
147 }
148 
149 /*
150  * Some terminology:
151  *
152  * LSB - Local Storage Block
153  * =========================
154  *
155  * 8 segments/regions, each containing 16 entries.
156  *
157  * Each entry contains 256 bits (32 bytes).
158  *
159  * Segments are virtually addressed in commands, but accesses cannot cross
160  * segment boundaries.  Virtual map uses an identity mapping by default
161  * (virtual segment N corresponds to physical segment N).
162  *
163  * Access to a physical region can be restricted to any subset of all five
164  * queues.
165  *
166  * "Pass-through" mode
167  * ===================
168  *
169  * Pass-through is a generic DMA engine, much like ioat(4).  Some nice
170  * features:
171  *
172  * - Supports byte-swapping for endian conversion (32- or 256-bit words)
173  * - AND, OR, XOR with fixed 256-bit mask
174  * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
175  * - Read/write of LSB
176  * - Memset
177  *
178  * If bit manipulation mode is enabled, input must be a multiple of 256 bits
179  * (32 bytes).
180  *
181  * If byte-swapping is enabled, input must be a multiple of the word size.
182  *
183  * Zlib mode -- only usable from one queue at a time, single job at a time.
184  * ========================================================================
185  *
186  * Only usable from private host, aka PSP?  Not host processor?
187  *
188  * RNG.
189  * ====
190  *
191  * Raw bits are conditioned with AES and fed through CTR_DRBG.  Output goes in
192  * a ring buffer readable by software.
193  *
194  * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
195  * implemented on the raw input stream and may be enabled to verify min-entropy
196  * of 0.5 bits per bit.
197  */
198 
199 static void
200 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
201 {
202 	bus_addr_t *baddr;
203 
204 	KASSERT(error == 0, ("%s: error:%d", __func__, error));
205 	baddr = arg;
206 	*baddr = segs->ds_addr;
207 }
208 
209 static int
210 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
211 {
212 	struct ccp_softc *sc;
213 	struct ccp_queue *qp;
214 	void *desc;
215 	size_t ringsz, num_descriptors;
216 	int error;
217 
218 	desc = NULL;
219 	sc = device_get_softc(dev);
220 	qp = &sc->queues[queue];
221 
222 	/*
223 	 * Don't bother allocating a ring for queues the host isn't allowed to
224 	 * drive.
225 	 */
226 	if ((sc->valid_queues & (1 << queue)) == 0)
227 		return (0);
228 
229 	ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
230 
231 	/* Ignore queues that do not have any LSB access. */
232 	if (qp->lsb_mask == 0) {
233 		device_printf(dev, "Ignoring queue %u with no LSB access\n",
234 		    queue);
235 		sc->valid_queues &= ~(1 << queue);
236 		return (0);
237 	}
238 
239 	num_descriptors = 1 << sc->ring_size_order;
240 	ringsz = sizeof(struct ccp_desc) * num_descriptors;
241 
242 	/*
243 	 * "Queue_Size" is order - 1.
244 	 *
245 	 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
246 	 */
247 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
248 	    1 << (5 + sc->ring_size_order),
249 #if defined(__i386__) && !defined(PAE)
250 	    0, BUS_SPACE_MAXADDR,
251 #else
252 	    (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
253 #endif
254 	    BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
255 	    ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
256 	if (error != 0)
257 		goto out;
258 
259 	error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
260 	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
261 	if (error != 0)
262 		goto out;
263 
264 	error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
265 	    ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
266 	if (error != 0)
267 		goto out;
268 
269 	qp->desc_ring = desc;
270 	qp->completions_ring = malloc(num_descriptors *
271 	    sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
272 
273 	/* Zero control register; among other things, clears the RUN flag. */
274 	qp->qcontrol = 0;
275 	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
276 	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
277 
278 	/* Clear any leftover interrupt status flags */
279 	ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
280 	    ALL_INTERRUPTS);
281 
282 	qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
283 
284 	ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
285 	    (uint32_t)qp->desc_ring_bus_addr);
286 	ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
287 	    (uint32_t)qp->desc_ring_bus_addr);
288 
289 	/*
290 	 * Enable completion interrupts, as well as error or administrative
291 	 * halt interrupts.  We don't use administrative halts, but they
292 	 * shouldn't trip unless we do, so it ought to be harmless.
293 	 */
294 	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
295 	    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
296 
297 	qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
298 	qp->qcontrol |= CMD_Q_RUN;
299 	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
300 
301 out:
302 	if (error != 0) {
303 		if (qp->desc_ring != NULL)
304 			bus_dmamap_unload(qp->ring_desc_tag,
305 			    qp->ring_desc_map);
306 		if (desc != NULL)
307 			bus_dmamem_free(qp->ring_desc_tag, desc,
308 			    qp->ring_desc_map);
309 		if (qp->ring_desc_tag != NULL)
310 			bus_dma_tag_destroy(qp->ring_desc_tag);
311 	}
312 	return (error);
313 }
314 
315 static void
316 ccp_hw_detach_queue(device_t dev, unsigned queue)
317 {
318 	struct ccp_softc *sc;
319 	struct ccp_queue *qp;
320 
321 	sc = device_get_softc(dev);
322 	qp = &sc->queues[queue];
323 
324 	/*
325 	 * Don't bother allocating a ring for queues the host isn't allowed to
326 	 * drive.
327 	 */
328 	if ((sc->valid_queues & (1 << queue)) == 0)
329 		return;
330 
331 	free(qp->completions_ring, M_CCP);
332 	bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
333 	bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
334 	bus_dma_tag_destroy(qp->ring_desc_tag);
335 }
336 
337 static int
338 ccp_map_pci_bar(device_t dev)
339 {
340 	struct ccp_softc *sc;
341 
342 	sc = device_get_softc(dev);
343 
344 	sc->pci_resource_id = PCIR_BAR(2);
345 	sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
346 	    &sc->pci_resource_id, RF_ACTIVE);
347 	if (sc->pci_resource == NULL) {
348 		device_printf(dev, "unable to allocate pci resource\n");
349 		return (ENODEV);
350 	}
351 
352 	sc->pci_resource_id_msix = PCIR_BAR(5);
353 	sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
354 	    &sc->pci_resource_id_msix, RF_ACTIVE);
355 	if (sc->pci_resource_msix == NULL) {
356 		device_printf(dev, "unable to allocate pci resource msix\n");
357 		bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
358 		    sc->pci_resource);
359 		return (ENODEV);
360 	}
361 
362 	sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
363 	sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
364 	return (0);
365 }
366 
367 static void
368 ccp_unmap_pci_bar(device_t dev)
369 {
370 	struct ccp_softc *sc;
371 
372 	sc = device_get_softc(dev);
373 
374 	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
375 	    sc->pci_resource_msix);
376 	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
377 	    sc->pci_resource);
378 }
379 
380 const static struct ccp_error_code {
381 	uint8_t		ce_code;
382 	const char	*ce_name;
383 	int		ce_errno;
384 	const char	*ce_desc;
385 } ccp_error_codes[] = {
386 	{ 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
387 	{ 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
388 	    "A non-supported function type was specified" },
389 	{ 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
390 	    "A non-supported function mode was specified" },
391 	{ 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
392 	    "A CMAC type was specified when ENCRYPT was not specified" },
393 	{ 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
394 	    "A non-supported function size was specified.\n"
395 	    "AES-CFB: Size was not 127 or 7;\n"
396 	    "3DES-CFB: Size was not 7;\n"
397 	    "RSA: See supported size table (7.4.2);\n"
398 	    "ECC: Size was greater than 576 bits." },
399 	{ 0x07, "Zlib_MISSING_INIT_EOM", EIO,
400 	    "Zlib command does not have INIT and EOM set" },
401 	{ 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
402 	    "Reserved bits in a function specification were not 0" },
403 	{ 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
404 	    "The buffer length specified was not correct for the selected engine"
405 	},
406 	{ 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
407 	    "Undefined VLSB segment mapping or\n"
408 	    "mapping to unsupported LSB segment id" },
409 	{ 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
410 	    "The specified source/destination buffer access was illegal:\n"
411 	    "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
412 	    "Data buffer not completely contained within a single segment; or\n"
413 	    "Pointer with Fixed=1 is not 32-bit aligned; or\n"
414 	    "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
415 	},
416 	{ 0x0C, "ILLEGAL_MEM_SEL", EIO,
417 	    "A src_mem, dst_mem, or key_mem field was illegal:\n"
418 	    "A field was set to a reserved value; or\n"
419 	    "A public command attempted to reference AXI1 (local) or GART memory; or\n"
420 	    "A Zlib command attmpted to use the LSB." },
421 	{ 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
422 	    "The specified context location was illegal:\n"
423 	    "Context located in a LSB location disallowed by the LSB protection masks; or\n"
424 	    "Context not completely contained within a single segment." },
425 	{ 0x0E, "ILLEGAL_KEY_ADDR", EIO,
426 	    "The specified key location was illegal:\n"
427 	    "Key located in a LSB location disallowed by the LSB protection masks; or\n"
428 	    "Key not completely contained within a single segment." },
429 	{ 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
430 	/* XXX Could fill out these descriptions too */
431 	{ 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
432 	{ 0x14, "IDMA0_AXI_DECERR", EIO, "" },
433 	{ 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
434 	{ 0x17, "IDMA1_AXI_DECERR", EIO, "" },
435 	{ 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
436 	{ 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
437 	{ 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
438 	{ 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
439 	{ 0x1E, "ZLIB_BTYPE", EIO, "" },
440 	{ 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
441 	{ 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
442 	{ 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
443 	{ 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
444 	{ 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
445 	{ 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
446 	{ 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
447 	{ 0x27, "ODMA0_AXI_DECERR", EIO, "" },
448 	{ 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
449 	{ 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
450 	{ 0x2B, "LSB_PARITY_ERR", EIO,
451 	    "A read from the LSB encountered a parity error" },
452 };
453 
454 static void
455 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
456 {
457 	struct ccp_completion_ctx *cctx;
458 	const struct ccp_error_code *ec;
459 	struct ccp_softc *sc;
460 	uint32_t status, error, esource, faultblock;
461 	unsigned q, idx;
462 	int errno;
463 
464 	sc = qp->cq_softc;
465 	q = qp->cq_qindex;
466 
467 	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
468 
469 	error = status & STATUS_ERROR_MASK;
470 
471 	/* Decode error status */
472 	ec = NULL;
473 	for (idx = 0; idx < nitems(ccp_error_codes); idx++)
474 		if (ccp_error_codes[idx].ce_code == error) {
475 			ec = &ccp_error_codes[idx];
476 			break;
477 		}
478 
479 	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
480 	    STATUS_ERRORSOURCE_MASK;
481 	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
482 	    STATUS_VLSB_FAULTBLOCK_MASK;
483 	device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
484 	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
485 	    faultblock);
486 	if (ec != NULL)
487 		device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
488 
489 	/* TODO Could format the desc nicely here */
490 	idx = desc - qp->desc_ring;
491 	DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
492 	    (const void *)desc, " ");
493 
494 	/*
495 	 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
496 	 * Zlib Decompress status may be interesting.
497 	 */
498 
499 	while (true) {
500 		/* Keep unused descriptors zero for next use. */
501 		memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
502 
503 		cctx = &qp->completions_ring[idx];
504 
505 		/*
506 		 * Restart procedure described in § 14.2.5.  Could be used by HoC if we
507 		 * used that.
508 		 *
509 		 * Advance HEAD_LO past bad descriptor + any remaining in
510 		 * transaction manually, then restart queue.
511 		 */
512 		idx = (idx + 1) % (1 << sc->ring_size_order);
513 
514 		/* Callback function signals end of transaction */
515 		if (cctx->callback_fn != NULL) {
516 			if (ec == NULL)
517 				errno = EIO;
518 			else
519 				errno = ec->ce_errno;
520 			/* TODO More specific error code */
521 			cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
522 			cctx->callback_fn = NULL;
523 			break;
524 		}
525 	}
526 
527 	qp->cq_head = idx;
528 	qp->cq_waiting = false;
529 	wakeup(&qp->cq_tail);
530 	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
531 	ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
532 	    (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
533 	ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
534 	DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
535 }
536 
537 static void
538 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
539 {
540 	struct ccp_completion_ctx *cctx;
541 	struct ccp_softc *sc;
542 	const struct ccp_desc *desc;
543 	uint32_t headlo, idx;
544 	unsigned q, completed;
545 
546 	sc = qp->cq_softc;
547 	q = qp->cq_qindex;
548 
549 	mtx_lock(&qp->cq_lock);
550 
551 	/*
552 	 * Hardware HEAD_LO points to the first incomplete descriptor.  Process
553 	 * any submitted and completed descriptors, up to but not including
554 	 * HEAD_LO.
555 	 */
556 	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
557 	idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
558 
559 	DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
560 	    qp->cq_head);
561 	completed = 0;
562 	while (qp->cq_head != idx) {
563 		DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
564 
565 		cctx = &qp->completions_ring[qp->cq_head];
566 		if (cctx->callback_fn != NULL) {
567 			cctx->callback_fn(qp, cctx->session,
568 			    cctx->callback_arg, 0);
569 			cctx->callback_fn = NULL;
570 		}
571 
572 		/* Keep unused descriptors zero for next use. */
573 		memset(&qp->desc_ring[qp->cq_head], 0,
574 		    sizeof(qp->desc_ring[qp->cq_head]));
575 
576 		qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
577 		completed++;
578 	}
579 	if (completed > 0) {
580 		qp->cq_waiting = false;
581 		wakeup(&qp->cq_tail);
582 	}
583 
584 	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
585 
586 	/*
587 	 * Desc points to the first incomplete descriptor, at the time we read
588 	 * HEAD_LO.  If there was an error flagged in interrupt status, the HW
589 	 * will not proceed past the erroneous descriptor by itself.
590 	 */
591 	desc = &qp->desc_ring[idx];
592 	if ((ints & INT_ERROR) != 0)
593 		ccp_intr_handle_error(qp, desc);
594 
595 	mtx_unlock(&qp->cq_lock);
596 }
597 
598 static void
599 ccp_intr_handler(void *arg)
600 {
601 	struct ccp_softc *sc = arg;
602 	size_t i;
603 	uint32_t ints;
604 
605 	DPRINTF(sc->dev, "%s: interrupt\n", __func__);
606 
607 	/*
608 	 * We get one global interrupt per PCI device, shared over all of
609 	 * its queues.  Scan each valid queue on interrupt for flags indicating
610 	 * activity.
611 	 */
612 	for (i = 0; i < nitems(sc->queues); i++) {
613 		if ((sc->valid_queues & (1 << i)) == 0)
614 			continue;
615 
616 		ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
617 		if (ints == 0)
618 			continue;
619 
620 #if 0
621 		DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
622 		    (unsigned)ints, i);
623 #endif
624 		/* Write back 1s to clear interrupt status bits. */
625 		ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
626 
627 		/*
628 		 * If there was an error, we still need to run completions on
629 		 * any descriptors prior to the error.  The completions handler
630 		 * invoked below will also handle the error descriptor.
631 		 */
632 		if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
633 			ccp_intr_run_completions(&sc->queues[i], ints);
634 
635 		if ((ints & INT_QUEUE_STOPPED) != 0)
636 			device_printf(sc->dev, "%s: queue %zu stopped\n",
637 			    __func__, i);
638 	}
639 
640 	/* Re-enable interrupts after processing */
641 	for (i = 0; i < nitems(sc->queues); i++) {
642 		if ((sc->valid_queues & (1 << i)) == 0)
643 			continue;
644 		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
645 		    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
646 	}
647 }
648 
649 static int
650 ccp_intr_filter(void *arg)
651 {
652 	struct ccp_softc *sc = arg;
653 	size_t i;
654 
655 	/* TODO: Split individual queues into separate taskqueues? */
656 	for (i = 0; i < nitems(sc->queues); i++) {
657 		if ((sc->valid_queues & (1 << i)) == 0)
658 			continue;
659 
660 		/* Mask interrupt until task completes */
661 		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
662 	}
663 
664 	return (FILTER_SCHEDULE_THREAD);
665 }
666 
667 static int
668 ccp_setup_interrupts(struct ccp_softc *sc)
669 {
670 	uint32_t nvec;
671 	int rid, error, n, ridcopy;
672 
673 	n = pci_msix_count(sc->dev);
674 	if (n < 1) {
675 		device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
676 		return (ENXIO);
677 	}
678 
679 	nvec = n;
680 	error = pci_alloc_msix(sc->dev, &nvec);
681 	if (error != 0) {
682 		device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
683 		    error);
684 		return (error);
685 	}
686 	if (nvec < 1) {
687 		device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
688 		    __func__);
689 		return (ENXIO);
690 	}
691 	if (nvec > nitems(sc->intr_res)) {
692 		device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
693 		    nvec);
694 		nvec = nitems(sc->intr_res);
695 	}
696 
697 	for (rid = 1; rid < 1 + nvec; rid++) {
698 		ridcopy = rid;
699 		sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
700 		    SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
701 		if (sc->intr_res[rid - 1] == NULL) {
702 			device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
703 			    __func__);
704 			return (ENXIO);
705 		}
706 
707 		sc->intr_tag[rid - 1] = NULL;
708 		error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
709 		    INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
710 		    ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
711 		if (error != 0)
712 			device_printf(sc->dev, "%s: setup_intr: %d\n",
713 			    __func__, error);
714 	}
715 	sc->intr_count = nvec;
716 
717 	return (error);
718 }
719 
720 static void
721 ccp_release_interrupts(struct ccp_softc *sc)
722 {
723 	unsigned i;
724 
725 	for (i = 0; i < sc->intr_count; i++) {
726 		if (sc->intr_tag[i] != NULL)
727 			bus_teardown_intr(sc->dev, sc->intr_res[i],
728 			    sc->intr_tag[i]);
729 		if (sc->intr_res[i] != NULL)
730 			bus_release_resource(sc->dev, SYS_RES_IRQ,
731 			    rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
732 	}
733 
734 	pci_release_msi(sc->dev);
735 }
736 
737 int
738 ccp_hw_attach(device_t dev)
739 {
740 	struct ccp_softc *sc;
741 	uint64_t lsbmask;
742 	uint32_t version, lsbmasklo, lsbmaskhi;
743 	unsigned queue_idx, j;
744 	int error;
745 	bool bars_mapped, interrupts_setup;
746 
747 	queue_idx = 0;
748 	bars_mapped = interrupts_setup = false;
749 	sc = device_get_softc(dev);
750 
751 	error = ccp_map_pci_bar(dev);
752 	if (error != 0) {
753 		device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
754 		goto out;
755 	}
756 	bars_mapped = true;
757 
758 	error = pci_enable_busmaster(dev);
759 	if (error != 0) {
760 		device_printf(dev, "%s: couldn't enable busmaster\n",
761 		    __func__);
762 		goto out;
763 	}
764 
765 	sc->ring_size_order = g_ccp_ring_order;
766 	if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
767 		device_printf(dev, "bogus hw.ccp.ring_order\n");
768 		error = EINVAL;
769 		goto out;
770 	}
771 	sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
772 
773 	version = ccp_read_4(sc, VERSION_REG);
774 	if ((version & VERSION_NUM_MASK) < 5) {
775 		device_printf(dev,
776 		    "driver supports version 5 and later hardware\n");
777 		error = ENXIO;
778 		goto out;
779 	}
780 
781 	error = ccp_setup_interrupts(sc);
782 	if (error != 0)
783 		goto out;
784 	interrupts_setup = true;
785 
786 	sc->hw_version = version & VERSION_NUM_MASK;
787 	sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
788 	    VERSION_NUMVQM_MASK;
789 	sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
790 	    VERSION_LSBSIZE_MASK;
791 	sc->hw_features = version & VERSION_CAP_MASK;
792 
793 	/*
794 	 * Copy private LSB mask to public registers to enable access to LSB
795 	 * from all queues allowed by BIOS.
796 	 */
797 	lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
798 	lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
799 	ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
800 	ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
801 
802 	lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
803 
804 	for (; queue_idx < nitems(sc->queues); queue_idx++) {
805 		error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
806 		if (error != 0) {
807 			device_printf(dev, "%s: couldn't attach queue %u\n",
808 			    __func__, queue_idx);
809 			goto out;
810 		}
811 	}
812 	ccp_assign_lsb_regions(sc, lsbmask);
813 
814 out:
815 	if (error != 0) {
816 		if (interrupts_setup)
817 			ccp_release_interrupts(sc);
818 		for (j = 0; j < queue_idx; j++)
819 			ccp_hw_detach_queue(dev, j);
820 		if (sc->ring_size_order != 0)
821 			pci_disable_busmaster(dev);
822 		if (bars_mapped)
823 			ccp_unmap_pci_bar(dev);
824 	}
825 	return (error);
826 }
827 
828 void
829 ccp_hw_detach(device_t dev)
830 {
831 	struct ccp_softc *sc;
832 	unsigned i;
833 
834 	sc = device_get_softc(dev);
835 
836 	for (i = 0; i < nitems(sc->queues); i++)
837 		ccp_hw_detach_queue(dev, i);
838 
839 	ccp_release_interrupts(sc);
840 	pci_disable_busmaster(dev);
841 	ccp_unmap_pci_bar(dev);
842 }
843 
844 static int __must_check
845 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
846     enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
847     bus_size_t len, enum ccp_passthru_byteswap swapmode,
848     enum ccp_passthru_bitwise bitmode, bool interrupt,
849     const struct ccp_completion_ctx *cctx)
850 {
851 	struct ccp_desc *desc;
852 
853 	if (ccp_queue_get_ring_space(qp) == 0)
854 		return (EAGAIN);
855 
856 	desc = &qp->desc_ring[qp->cq_tail];
857 
858 	memset(desc, 0, sizeof(*desc));
859 	desc->engine = CCP_ENGINE_PASSTHRU;
860 
861 	desc->pt.ioc = interrupt;
862 	desc->pt.byteswap = swapmode;
863 	desc->pt.bitwise = bitmode;
864 	desc->length = len;
865 
866 	desc->src_lo = (uint32_t)src;
867 	desc->src_hi = src >> 32;
868 	desc->src_mem = src_type;
869 
870 	desc->dst_lo = (uint32_t)dst;
871 	desc->dst_hi = dst >> 32;
872 	desc->dst_mem = dst_type;
873 
874 	if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
875 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
876 
877 	if (cctx != NULL)
878 		memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
879 
880 	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
881 	return (0);
882 }
883 
884 static int __must_check
885 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
886     struct sglist *sgl, bus_size_t len, bool interrupt,
887     const struct ccp_completion_ctx *cctx)
888 {
889 	struct sglist_seg *seg;
890 	size_t i, remain, nb;
891 	int error;
892 
893 	remain = len;
894 	for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
895 		seg = &sgl->sg_segs[i];
896 		/* crp lengths are int, so 32-bit min() is ok. */
897 		nb = min(remain, seg->ss_len);
898 
899 		if (tolsb)
900 			error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
901 			    seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
902 			    CCP_PASSTHRU_BYTESWAP_NOOP,
903 			    CCP_PASSTHRU_BITWISE_NOOP,
904 			    (nb == remain) && interrupt, cctx);
905 		else
906 			error = ccp_passthrough(qp, seg->ss_paddr,
907 			    CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
908 			    CCP_PASSTHRU_BYTESWAP_NOOP,
909 			    CCP_PASSTHRU_BITWISE_NOOP,
910 			    (nb == remain) && interrupt, cctx);
911 		if (error != 0)
912 			return (error);
913 
914 		remain -= nb;
915 	}
916 	return (0);
917 }
918 
919 /*
920  * Note that these vectors are in reverse of the usual order.
921  */
922 const struct SHA_vectors {
923 	uint32_t SHA1[8];
924 	uint32_t SHA224[8];
925 	uint32_t SHA256[8];
926 	uint64_t SHA384[8];
927 	uint64_t SHA512[8];
928 } SHA_H __aligned(PAGE_SIZE) = {
929 	.SHA1 = {
930 		0xc3d2e1f0ul,
931 		0x10325476ul,
932 		0x98badcfeul,
933 		0xefcdab89ul,
934 		0x67452301ul,
935 		0,
936 		0,
937 		0,
938 	},
939 	.SHA224 = {
940 		0xbefa4fa4ul,
941 		0x64f98fa7ul,
942 		0x68581511ul,
943 		0xffc00b31ul,
944 		0xf70e5939ul,
945 		0x3070dd17ul,
946 		0x367cd507ul,
947 		0xc1059ed8ul,
948 	},
949 	.SHA256 = {
950 		0x5be0cd19ul,
951 		0x1f83d9abul,
952 		0x9b05688cul,
953 		0x510e527ful,
954 		0xa54ff53aul,
955 		0x3c6ef372ul,
956 		0xbb67ae85ul,
957 		0x6a09e667ul,
958 	},
959 	.SHA384 = {
960 		0x47b5481dbefa4fa4ull,
961 		0xdb0c2e0d64f98fa7ull,
962 		0x8eb44a8768581511ull,
963 		0x67332667ffc00b31ull,
964 		0x152fecd8f70e5939ull,
965 		0x9159015a3070dd17ull,
966 		0x629a292a367cd507ull,
967 		0xcbbb9d5dc1059ed8ull,
968 	},
969 	.SHA512 = {
970 		0x5be0cd19137e2179ull,
971 		0x1f83d9abfb41bd6bull,
972 		0x9b05688c2b3e6c1full,
973 		0x510e527fade682d1ull,
974 		0xa54ff53a5f1d36f1ull,
975 		0x3c6ef372fe94f82bull,
976 		0xbb67ae8584caa73bull,
977 		0x6a09e667f3bcc908ull,
978 	},
979 };
980 /*
981  * Ensure vectors do not cross a page boundary.
982  *
983  * Disabled due to a new Clang error:  "expression is not an integral constant
984  * expression."  GCC (cross toolchain) seems to handle this assertion with
985  * _Static_assert just fine.
986  */
987 #if 0
988 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
989 #endif
990 
991 const struct SHA_Defn {
992 	enum sha_version version;
993 	const void *H_vectors;
994 	size_t H_size;
995 	const struct auth_hash *axf;
996 	enum ccp_sha_type engine_type;
997 } SHA_definitions[] = {
998 	{
999 		.version = SHA1,
1000 		.H_vectors = SHA_H.SHA1,
1001 		.H_size = sizeof(SHA_H.SHA1),
1002 		.axf = &auth_hash_hmac_sha1,
1003 		.engine_type = CCP_SHA_TYPE_1,
1004 	},
1005 #if 0
1006 	{
1007 		.version = SHA2_224,
1008 		.H_vectors = SHA_H.SHA224,
1009 		.H_size = sizeof(SHA_H.SHA224),
1010 		.axf = &auth_hash_hmac_sha2_224,
1011 		.engine_type = CCP_SHA_TYPE_224,
1012 	},
1013 #endif
1014 	{
1015 		.version = SHA2_256,
1016 		.H_vectors = SHA_H.SHA256,
1017 		.H_size = sizeof(SHA_H.SHA256),
1018 		.axf = &auth_hash_hmac_sha2_256,
1019 		.engine_type = CCP_SHA_TYPE_256,
1020 	},
1021 	{
1022 		.version = SHA2_384,
1023 		.H_vectors = SHA_H.SHA384,
1024 		.H_size = sizeof(SHA_H.SHA384),
1025 		.axf = &auth_hash_hmac_sha2_384,
1026 		.engine_type = CCP_SHA_TYPE_384,
1027 	},
1028 	{
1029 		.version = SHA2_512,
1030 		.H_vectors = SHA_H.SHA512,
1031 		.H_size = sizeof(SHA_H.SHA512),
1032 		.axf = &auth_hash_hmac_sha2_512,
1033 		.engine_type = CCP_SHA_TYPE_512,
1034 	},
1035 };
1036 
1037 static int __must_check
1038 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1039     vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1040 {
1041 	struct ccp_desc *desc;
1042 
1043 	if (ccp_queue_get_ring_space(qp) == 0)
1044 		return (EAGAIN);
1045 
1046 	desc = &qp->desc_ring[qp->cq_tail];
1047 
1048 	memset(desc, 0, sizeof(*desc));
1049 	desc->engine = CCP_ENGINE_SHA;
1050 	desc->som = start;
1051 	desc->eom = end;
1052 
1053 	desc->sha.type = defn->engine_type;
1054 	desc->length = len;
1055 
1056 	if (end) {
1057 		desc->sha_len_lo = (uint32_t)msgbits;
1058 		desc->sha_len_hi = msgbits >> 32;
1059 	}
1060 
1061 	desc->src_lo = (uint32_t)addr;
1062 	desc->src_hi = addr >> 32;
1063 	desc->src_mem = CCP_MEMTYPE_SYSTEM;
1064 
1065 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1066 
1067 	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1068 	return (0);
1069 }
1070 
1071 static int __must_check
1072 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1073     struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1074 {
1075 	const struct SHA_Defn *defn;
1076 	struct sglist_seg *seg;
1077 	size_t i, msgsize, remaining, nb;
1078 	uint32_t lsbaddr;
1079 	int error;
1080 
1081 	for (i = 0; i < nitems(SHA_definitions); i++)
1082 		if (SHA_definitions[i].version == version)
1083 			break;
1084 	if (i == nitems(SHA_definitions))
1085 		return (EINVAL);
1086 	defn = &SHA_definitions[i];
1087 
1088 	/* XXX validate input ??? */
1089 
1090 	/* Load initial SHA state into LSB */
1091 	/* XXX ensure H_vectors don't span page boundaries */
1092 	error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1093 	    CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1094 	    CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1095 	    CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1096 	    NULL);
1097 	if (error != 0)
1098 		return (error);
1099 
1100 	/* Execute series of SHA updates on correctly sized buffers */
1101 	msgsize = 0;
1102 	for (i = 0; i < sgl_src->sg_nseg; i++) {
1103 		seg = &sgl_src->sg_segs[i];
1104 		msgsize += seg->ss_len;
1105 		error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1106 		    seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1107 		    msgsize << 3);
1108 		if (error != 0)
1109 			return (error);
1110 	}
1111 
1112 	/* Copy result out to sgl_dst */
1113 	remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1114 	lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1115 	for (i = 0; i < sgl_dst->sg_nseg; i++) {
1116 		seg = &sgl_dst->sg_segs[i];
1117 		/* crp lengths are int, so 32-bit min() is ok. */
1118 		nb = min(remaining, seg->ss_len);
1119 
1120 		error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1121 		    lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1122 		    CCP_PASSTHRU_BITWISE_NOOP,
1123 		    (cctx != NULL) ? (nb == remaining) : false,
1124 		    (nb == remaining) ? cctx : NULL);
1125 		if (error != 0)
1126 			return (error);
1127 
1128 		remaining -= nb;
1129 		lsbaddr += nb;
1130 		if (remaining == 0)
1131 			break;
1132 	}
1133 
1134 	return (0);
1135 }
1136 
1137 static void
1138 byteswap256(uint64_t *buffer)
1139 {
1140 	uint64_t t;
1141 
1142 	t = bswap64(buffer[3]);
1143 	buffer[3] = bswap64(buffer[0]);
1144 	buffer[0] = t;
1145 
1146 	t = bswap64(buffer[2]);
1147 	buffer[2] = bswap64(buffer[1]);
1148 	buffer[1] = t;
1149 }
1150 
1151 /*
1152  * Translate CCP internal LSB hash format into a standard hash ouput.
1153  *
1154  * Manipulates input buffer with byteswap256 operation.
1155  */
1156 static void
1157 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1158 {
1159 	const struct SHA_Defn *defn;
1160 	size_t i;
1161 
1162 	for (i = 0; i < nitems(SHA_definitions); i++)
1163 		if (SHA_definitions[i].version == version)
1164 			break;
1165 	if (i == nitems(SHA_definitions))
1166 		panic("bogus sha version auth_mode %u\n", (unsigned)version);
1167 
1168 	defn = &SHA_definitions[i];
1169 
1170 	/* Swap 256bit manually -- DMA engine can, but with limitations */
1171 	byteswap256((void *)buffer);
1172 	if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1173 		byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1174 
1175 	switch (defn->version) {
1176 	case SHA1:
1177 		memcpy(output, buffer + 12, defn->axf->hashsize);
1178 		break;
1179 #if 0
1180 	case SHA2_224:
1181 		memcpy(output, buffer + XXX, defn->axf->hashsize);
1182 		break;
1183 #endif
1184 	case SHA2_256:
1185 		memcpy(output, buffer, defn->axf->hashsize);
1186 		break;
1187 	case SHA2_384:
1188 		memcpy(output,
1189 		    buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1190 		    defn->axf->hashsize - LSB_ENTRY_SIZE);
1191 		memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1192 		    LSB_ENTRY_SIZE);
1193 		break;
1194 	case SHA2_512:
1195 		memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1196 		memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1197 		break;
1198 	}
1199 }
1200 
1201 static void
1202 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1203     struct cryptop *crp, int error)
1204 {
1205 	char ihash[SHA2_512_HASH_LEN /* max hash len */];
1206 	union authctx auth_ctx;
1207 	const struct auth_hash *axf;
1208 
1209 	axf = s->hmac.auth_hash;
1210 
1211 	s->pending--;
1212 
1213 	if (error != 0) {
1214 		crp->crp_etype = error;
1215 		goto out;
1216 	}
1217 
1218 	/* Do remaining outer hash over small inner hash in software */
1219 	axf->Init(&auth_ctx);
1220 	axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1221 	ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1222 #if 0
1223 	INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1224 	    (u_char *)ihash, " ");
1225 #endif
1226 	axf->Update(&auth_ctx, ihash, axf->hashsize);
1227 	axf->Final(s->hmac.res, &auth_ctx);
1228 
1229 	if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1230 		crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1231 		    ihash);
1232 		if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1233 			crp->crp_etype = EBADMSG;
1234 	} else
1235 		crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1236 		    s->hmac.res);
1237 
1238 	/* Avoid leaking key material */
1239 	explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1240 	explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1241 
1242 out:
1243 	crypto_done(crp);
1244 }
1245 
1246 static void
1247 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1248     int error)
1249 {
1250 	struct cryptop *crp;
1251 
1252 	crp = vcrp;
1253 	ccp_do_hmac_done(qp, s, crp, error);
1254 }
1255 
1256 static int __must_check
1257 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1258     const struct ccp_completion_ctx *cctx)
1259 {
1260 	device_t dev;
1261 	const struct auth_hash *axf;
1262 	int error;
1263 
1264 	dev = qp->cq_softc->dev;
1265 	axf = s->hmac.auth_hash;
1266 
1267 	/*
1268 	 * Populate the SGL describing inside hash contents.  We want to hash
1269 	 * the ipad (key XOR fixed bit pattern) concatenated with the user
1270 	 * data.
1271 	 */
1272 	sglist_reset(qp->cq_sg_ulptx);
1273 	error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1274 	if (error != 0)
1275 		return (error);
1276 	if (crp->crp_aad_length != 0) {
1277 		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1278 		    crp->crp_aad_start, crp->crp_aad_length);
1279 		if (error != 0)
1280 			return (error);
1281 	}
1282 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1283 	    crp->crp_payload_start, crp->crp_payload_length);
1284 	if (error != 0) {
1285 		DPRINTF(dev, "%s: sglist too short\n", __func__);
1286 		return (error);
1287 	}
1288 	/* Populate SGL for output -- use hmac.res buffer. */
1289 	sglist_reset(qp->cq_sg_dst);
1290 	error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1291 	    roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1292 	if (error != 0)
1293 		return (error);
1294 
1295 	error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1296 	    cctx);
1297 	if (error != 0) {
1298 		DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1299 		return (error);
1300 	}
1301 	return (0);
1302 }
1303 
1304 int __must_check
1305 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1306 {
1307 	struct ccp_completion_ctx ctx;
1308 
1309 	ctx.callback_fn = ccp_hmac_done;
1310 	ctx.callback_arg = crp;
1311 	ctx.session = s;
1312 
1313 	return (ccp_do_hmac(qp, s, crp, &ctx));
1314 }
1315 
1316 static void
1317 ccp_byteswap(char *data, size_t len)
1318 {
1319 	size_t i;
1320 	char t;
1321 
1322 	len--;
1323 	for (i = 0; i < len; i++, len--) {
1324 		t = data[i];
1325 		data[i] = data[len];
1326 		data[len] = t;
1327 	}
1328 }
1329 
1330 static void
1331 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1332     int error)
1333 {
1334 	struct cryptop *crp;
1335 
1336 	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1337 
1338 	crp = vcrp;
1339 
1340 	s->pending--;
1341 
1342 	if (error != 0)
1343 		crp->crp_etype = error;
1344 
1345 	DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1346 	crypto_done(crp);
1347 }
1348 
1349 static void
1350 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1351     char *iv)
1352 {
1353 
1354 	crypto_read_iv(crp, iv);
1355 
1356 	/*
1357 	 * Append an explicit counter of 1 for GCM.
1358 	 */
1359 	if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16)
1360 		*(uint32_t *)&iv[12] = htobe32(1);
1361 
1362 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1363 	    csp->csp_ivlen < AES_BLOCK_LEN)
1364 		memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1365 
1366 	/* Reverse order of IV material for HW */
1367 	INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1368 	    csp->csp_ivlen);
1369 
1370 	/*
1371 	 * For unknown reasons, XTS mode expects the IV in the reverse byte
1372 	 * order to every other AES mode.
1373 	 */
1374 	if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1375 		ccp_byteswap(iv, AES_BLOCK_LEN);
1376 }
1377 
1378 static int __must_check
1379 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1380     size_t len)
1381 {
1382 	int error;
1383 
1384 	sglist_reset(qp->cq_sg_ulptx);
1385 	error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1386 	if (error != 0)
1387 		return (error);
1388 
1389 	error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1390 	    false, NULL);
1391 	return (error);
1392 }
1393 
1394 static int __must_check
1395 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1396     enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1397 {
1398 	struct ccp_desc *desc;
1399 	device_t dev;
1400 	unsigned i;
1401 	enum ccp_xts_unitsize usize;
1402 
1403 	/* IV and Key data are already loaded */
1404 
1405 	dev = qp->cq_softc->dev;
1406 
1407 	for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1408 		if (ccp_xts_unitsize_map[i].cxu_size ==
1409 		    crp->crp_payload_length) {
1410 			usize = ccp_xts_unitsize_map[i].cxu_id;
1411 			break;
1412 		}
1413 	if (i >= nitems(ccp_xts_unitsize_map))
1414 		return (EINVAL);
1415 
1416 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1417 		struct sglist_seg *seg;
1418 
1419 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1420 
1421 		desc = &qp->desc_ring[qp->cq_tail];
1422 		desc->engine = CCP_ENGINE_XTS_AES;
1423 		desc->som = (i == 0);
1424 		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1425 		desc->ioc = (desc->eom && cctx != NULL);
1426 		DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1427 		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1428 		    (int)desc->ioc, (int)dir);
1429 
1430 		if (desc->ioc)
1431 			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1432 			    sizeof(*cctx));
1433 
1434 		desc->aes_xts.encrypt = dir;
1435 		desc->aes_xts.type = s->blkcipher.cipher_type;
1436 		desc->aes_xts.size = usize;
1437 
1438 		DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1439 		    qp->cq_tail, (unsigned)desc->aes_xts.type,
1440 		    (unsigned)desc->aes_xts.size);
1441 
1442 		desc->length = seg->ss_len;
1443 		desc->src_lo = (uint32_t)seg->ss_paddr;
1444 		desc->src_hi = (seg->ss_paddr >> 32);
1445 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1446 
1447 		/* Crypt in-place */
1448 		desc->dst_lo = desc->src_lo;
1449 		desc->dst_hi = desc->src_hi;
1450 		desc->dst_mem = desc->src_mem;
1451 
1452 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1453 		desc->key_hi = 0;
1454 		desc->key_mem = CCP_MEMTYPE_SB;
1455 
1456 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1457 
1458 		qp->cq_tail = (qp->cq_tail + 1) %
1459 		    (1 << qp->cq_softc->ring_size_order);
1460 	}
1461 	return (0);
1462 }
1463 
1464 static int __must_check
1465 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1466     struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1467 {
1468 	const struct crypto_session_params *csp;
1469 	struct ccp_desc *desc;
1470 	char *keydata;
1471 	device_t dev;
1472 	enum ccp_cipher_dir dir;
1473 	int error, iv_len;
1474 	size_t keydata_len;
1475 	unsigned i, j;
1476 
1477 	dev = qp->cq_softc->dev;
1478 
1479 	if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1480 		DPRINTF(dev, "%s: empty\n", __func__);
1481 		return (EINVAL);
1482 	}
1483 	if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1484 		DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1485 		    crp->crp_payload_length);
1486 		return (EINVAL);
1487 	}
1488 
1489 	/*
1490 	 * Individual segments must be multiples of AES block size for the HW
1491 	 * to process it.  Non-compliant inputs aren't bogus, just not doable
1492 	 * on this hardware.
1493 	 */
1494 	for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1495 		if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1496 			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1497 			    qp->cq_sg_crp->sg_segs[i].ss_len);
1498 			return (EINVAL);
1499 		}
1500 
1501 	/* Gather IV/nonce data */
1502 	csp = crypto_get_params(crp->crp_session);
1503 	ccp_collect_iv(crp, csp, s->blkcipher.iv);
1504 	iv_len = csp->csp_ivlen;
1505 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1506 		iv_len = AES_BLOCK_LEN;
1507 
1508 	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1509 		dir = CCP_CIPHER_DIR_ENCRYPT;
1510 	else
1511 		dir = CCP_CIPHER_DIR_DECRYPT;
1512 
1513 	/* Set up passthrough op(s) to copy IV into LSB */
1514 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1515 	    s->blkcipher.iv, iv_len);
1516 	if (error != 0)
1517 		return (error);
1518 
1519 	/*
1520 	 * Initialize keydata and keydata_len for GCC.  The default case of the
1521 	 * following switch is impossible to reach, but GCC doesn't know that.
1522 	 */
1523 	keydata_len = 0;
1524 	keydata = NULL;
1525 
1526 	switch (csp->csp_cipher_alg) {
1527 	case CRYPTO_AES_XTS:
1528 		for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1529 			if (ccp_xts_unitsize_map[j].cxu_size ==
1530 			    crp->crp_payload_length)
1531 				break;
1532 		/* Input buffer must be a supported UnitSize */
1533 		if (j >= nitems(ccp_xts_unitsize_map)) {
1534 			device_printf(dev, "%s: rejected block size: %u\n",
1535 			    __func__, crp->crp_payload_length);
1536 			return (EOPNOTSUPP);
1537 		}
1538 		/* FALLTHROUGH */
1539 	case CRYPTO_AES_CBC:
1540 	case CRYPTO_AES_ICM:
1541 		keydata = s->blkcipher.enckey;
1542 		keydata_len = s->blkcipher.key_len;
1543 		break;
1544 	}
1545 
1546 	INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1547 	    keydata, " ");
1548 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1549 		INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1550 
1551 	/* Reverse order of key material for HW */
1552 	ccp_byteswap(keydata, keydata_len);
1553 
1554 	/* Store key material into LSB to avoid page boundaries */
1555 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1556 		/*
1557 		 * XTS mode uses 2 256-bit vectors for the primary key and the
1558 		 * tweak key.  For 128-bit keys, the vectors are zero-padded.
1559 		 *
1560 		 * After byteswapping the combined OCF-provided K1:K2 vector
1561 		 * above, we need to reverse the order again so the hardware
1562 		 * gets the swapped keys in the order K1':K2'.
1563 		 */
1564 		error = ccp_do_pst_to_lsb(qp,
1565 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1566 		    keydata_len / 2);
1567 		if (error != 0)
1568 			return (error);
1569 		error = ccp_do_pst_to_lsb(qp,
1570 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1571 		    keydata + (keydata_len / 2), keydata_len / 2);
1572 
1573 		/* Zero-pad 128 bit keys */
1574 		if (keydata_len == 32) {
1575 			if (error != 0)
1576 				return (error);
1577 			error = ccp_do_pst_to_lsb(qp,
1578 			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1579 			    keydata_len / 2, g_zeroes, keydata_len / 2);
1580 			if (error != 0)
1581 				return (error);
1582 			error = ccp_do_pst_to_lsb(qp,
1583 			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1584 			    keydata_len / 2, g_zeroes, keydata_len / 2);
1585 		}
1586 	} else
1587 		error = ccp_do_pst_to_lsb(qp,
1588 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1589 		    keydata_len);
1590 	if (error != 0)
1591 		return (error);
1592 
1593 	/*
1594 	 * Point SGLs at the subset of cryptop buffer contents representing the
1595 	 * data.
1596 	 */
1597 	sglist_reset(qp->cq_sg_ulptx);
1598 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1599 	    crp->crp_payload_start, crp->crp_payload_length);
1600 	if (error != 0)
1601 		return (error);
1602 
1603 	INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1604 	    (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1605 
1606 	DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1607 
1608 	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1609 		return (EAGAIN);
1610 
1611 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1612 		return (ccp_do_xts(qp, s, crp, dir, cctx));
1613 
1614 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1615 		struct sglist_seg *seg;
1616 
1617 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1618 
1619 		desc = &qp->desc_ring[qp->cq_tail];
1620 		desc->engine = CCP_ENGINE_AES;
1621 		desc->som = (i == 0);
1622 		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1623 		desc->ioc = (desc->eom && cctx != NULL);
1624 		DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1625 		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1626 		    (int)desc->ioc, (int)dir);
1627 
1628 		if (desc->ioc)
1629 			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1630 			    sizeof(*cctx));
1631 
1632 		desc->aes.encrypt = dir;
1633 		desc->aes.mode = s->blkcipher.cipher_mode;
1634 		desc->aes.type = s->blkcipher.cipher_type;
1635 		if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1636 			/*
1637 			 * Size of CTR value in bits, - 1.  ICM mode uses all
1638 			 * 128 bits as counter.
1639 			 */
1640 			desc->aes.size = 127;
1641 
1642 		DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1643 		    qp->cq_tail, (unsigned)desc->aes.mode,
1644 		    (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1645 
1646 		desc->length = seg->ss_len;
1647 		desc->src_lo = (uint32_t)seg->ss_paddr;
1648 		desc->src_hi = (seg->ss_paddr >> 32);
1649 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1650 
1651 		/* Crypt in-place */
1652 		desc->dst_lo = desc->src_lo;
1653 		desc->dst_hi = desc->src_hi;
1654 		desc->dst_mem = desc->src_mem;
1655 
1656 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1657 		desc->key_hi = 0;
1658 		desc->key_mem = CCP_MEMTYPE_SB;
1659 
1660 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1661 
1662 		qp->cq_tail = (qp->cq_tail + 1) %
1663 		    (1 << qp->cq_softc->ring_size_order);
1664 	}
1665 	return (0);
1666 }
1667 
1668 int __must_check
1669 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1670 {
1671 	struct ccp_completion_ctx ctx;
1672 
1673 	ctx.callback_fn = ccp_blkcipher_done;
1674 	ctx.session = s;
1675 	ctx.callback_arg = crp;
1676 
1677 	return (ccp_do_blkcipher(qp, s, crp, &ctx));
1678 }
1679 
1680 static void
1681 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1682     int error)
1683 {
1684 	struct cryptop *crp;
1685 
1686 	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1687 
1688 	crp = vcrp;
1689 
1690 	ccp_do_hmac_done(qp, s, crp, error);
1691 }
1692 
1693 int __must_check
1694 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1695 {
1696 	struct ccp_completion_ctx ctx;
1697 	int error;
1698 
1699 	ctx.callback_fn = ccp_authenc_done;
1700 	ctx.session = s;
1701 	ctx.callback_arg = crp;
1702 
1703 	/* Perform first operation */
1704 	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1705 		error = ccp_do_blkcipher(qp, s, crp, NULL);
1706 	else
1707 		error = ccp_do_hmac(qp, s, crp, NULL);
1708 	if (error != 0)
1709 		return (error);
1710 
1711 	/* Perform second operation */
1712 	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1713 		error = ccp_do_hmac(qp, s, crp, &ctx);
1714 	else
1715 		error = ccp_do_blkcipher(qp, s, crp, &ctx);
1716 	return (error);
1717 }
1718 
1719 static int __must_check
1720 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1721 {
1722 	struct ccp_desc *desc;
1723 	struct sglist_seg *seg;
1724 	unsigned i;
1725 
1726 	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1727 		return (EAGAIN);
1728 
1729 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1730 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1731 
1732 		desc = &qp->desc_ring[qp->cq_tail];
1733 
1734 		desc->engine = CCP_ENGINE_AES;
1735 		desc->aes.mode = CCP_AES_MODE_GHASH;
1736 		desc->aes.type = s->blkcipher.cipher_type;
1737 		desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1738 
1739 		desc->som = (i == 0);
1740 		desc->length = seg->ss_len;
1741 
1742 		desc->src_lo = (uint32_t)seg->ss_paddr;
1743 		desc->src_hi = (seg->ss_paddr >> 32);
1744 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1745 
1746 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1747 
1748 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1749 		desc->key_mem = CCP_MEMTYPE_SB;
1750 
1751 		qp->cq_tail = (qp->cq_tail + 1) %
1752 		    (1 << qp->cq_softc->ring_size_order);
1753 	}
1754 	return (0);
1755 }
1756 
1757 static int __must_check
1758 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1759     enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1760 {
1761 	struct ccp_desc *desc;
1762 
1763 	if (ccp_queue_get_ring_space(qp) == 0)
1764 		return (EAGAIN);
1765 
1766 	desc = &qp->desc_ring[qp->cq_tail];
1767 
1768 	desc->engine = CCP_ENGINE_AES;
1769 	desc->aes.mode = CCP_AES_MODE_GCTR;
1770 	desc->aes.type = s->blkcipher.cipher_type;
1771 	desc->aes.encrypt = dir;
1772 	desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1773 
1774 	desc->som = som;
1775 	desc->eom = eom;
1776 
1777 	/* Trailing bytes will be masked off by aes.size above. */
1778 	desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1779 
1780 	desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1781 	desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1782 	desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1783 
1784 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1785 
1786 	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1787 	desc->key_mem = CCP_MEMTYPE_SB;
1788 
1789 	qp->cq_tail = (qp->cq_tail + 1) %
1790 	    (1 << qp->cq_softc->ring_size_order);
1791 	return (0);
1792 }
1793 
1794 static int __must_check
1795 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1796 {
1797 	struct ccp_desc *desc;
1798 
1799 	if (ccp_queue_get_ring_space(qp) == 0)
1800 		return (EAGAIN);
1801 
1802 	desc = &qp->desc_ring[qp->cq_tail];
1803 
1804 	desc->engine = CCP_ENGINE_AES;
1805 	desc->aes.mode = CCP_AES_MODE_GHASH;
1806 	desc->aes.type = s->blkcipher.cipher_type;
1807 	desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1808 
1809 	desc->length = GMAC_BLOCK_LEN;
1810 
1811 	desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1812 	desc->src_mem = CCP_MEMTYPE_SB;
1813 
1814 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1815 
1816 	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1817 	desc->key_mem = CCP_MEMTYPE_SB;
1818 
1819 	desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1820 	desc->dst_mem = CCP_MEMTYPE_SB;
1821 
1822 	qp->cq_tail = (qp->cq_tail + 1) %
1823 	    (1 << qp->cq_softc->ring_size_order);
1824 	return (0);
1825 }
1826 
1827 static void
1828 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1829     int error)
1830 {
1831 	char tag[GMAC_DIGEST_LEN];
1832 	struct cryptop *crp;
1833 
1834 	crp = vcrp;
1835 
1836 	s->pending--;
1837 
1838 	if (error != 0) {
1839 		crp->crp_etype = error;
1840 		goto out;
1841 	}
1842 
1843 	/* Encrypt is done.  Decrypt needs to verify tag. */
1844 	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1845 		goto out;
1846 
1847 	/* Copy in message tag. */
1848 	crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1849 
1850 	/* Verify tag against computed GMAC */
1851 	if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1852 		crp->crp_etype = EBADMSG;
1853 
1854 out:
1855 	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1856 	explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1857 	crypto_done(crp);
1858 }
1859 
1860 int __must_check
1861 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1862 {
1863 	const struct crypto_session_params *csp;
1864 	struct ccp_completion_ctx ctx;
1865 	enum ccp_cipher_dir dir;
1866 	device_t dev;
1867 	unsigned i;
1868 	int error;
1869 
1870 	if (s->blkcipher.key_len == 0)
1871 		return (EINVAL);
1872 
1873 	dev = qp->cq_softc->dev;
1874 
1875 	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1876 		dir = CCP_CIPHER_DIR_ENCRYPT;
1877 	else
1878 		dir = CCP_CIPHER_DIR_DECRYPT;
1879 
1880 	/* Zero initial GHASH portion of context */
1881 	memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1882 
1883 	/* Gather IV data */
1884 	csp = crypto_get_params(crp->crp_session);
1885 	ccp_collect_iv(crp, csp, s->blkcipher.iv);
1886 
1887 	/* Reverse order of key material for HW */
1888 	ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1889 
1890 	/* Prepare input buffer of concatenated lengths for final GHASH */
1891 	be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1892 	be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1893 
1894 	/* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1895 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1896 	    s->blkcipher.iv, 32);
1897 	if (error != 0)
1898 		return (error);
1899 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1900 	    s->blkcipher.enckey, s->blkcipher.key_len);
1901 	if (error != 0)
1902 		return (error);
1903 	error = ccp_do_pst_to_lsb(qp,
1904 	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1905 	    GMAC_BLOCK_LEN);
1906 	if (error != 0)
1907 		return (error);
1908 
1909 	/* First step - compute GHASH over AAD */
1910 	if (crp->crp_aad_length != 0) {
1911 		sglist_reset(qp->cq_sg_ulptx);
1912 		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1913 		    crp->crp_aad_start, crp->crp_aad_length);
1914 		if (error != 0)
1915 			return (error);
1916 
1917 		/* This engine cannot process non-block multiple AAD data. */
1918 		for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1919 			if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1920 			    GMAC_BLOCK_LEN) != 0) {
1921 				DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1922 				    __func__,
1923 				    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1924 				return (EINVAL);
1925 			}
1926 
1927 		error = ccp_do_ghash_aad(qp, s);
1928 		if (error != 0)
1929 			return (error);
1930 	}
1931 
1932 	/* Feed data piece by piece into GCTR */
1933 	sglist_reset(qp->cq_sg_ulptx);
1934 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1935 	    crp->crp_payload_start, crp->crp_payload_length);
1936 	if (error != 0)
1937 		return (error);
1938 
1939 	/*
1940 	 * All segments except the last must be even multiples of AES block
1941 	 * size for the HW to process it.  Non-compliant inputs aren't bogus,
1942 	 * just not doable on this hardware.
1943 	 *
1944 	 * XXX: Well, the hardware will produce a valid tag for shorter final
1945 	 * segment inputs, but it will still write out a block-sized plaintext
1946 	 * or ciphertext chunk.  For a typical CRP this tramples trailing data,
1947 	 * including the provided message tag.  So, reject such inputs for now.
1948 	 */
1949 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1950 		if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1951 			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1952 			    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1953 			return (EINVAL);
1954 		}
1955 
1956 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1957 		struct sglist_seg *seg;
1958 
1959 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1960 		error = ccp_do_gctr(qp, s, dir, seg,
1961 		    (i == 0 && crp->crp_aad_length == 0),
1962 		    i == (qp->cq_sg_ulptx->sg_nseg - 1));
1963 		if (error != 0)
1964 			return (error);
1965 	}
1966 
1967 	/* Send just initial IV (not GHASH!) to LSB again */
1968 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1969 	    s->blkcipher.iv, AES_BLOCK_LEN);
1970 	if (error != 0)
1971 		return (error);
1972 
1973 	ctx.callback_fn = ccp_gcm_done;
1974 	ctx.session = s;
1975 	ctx.callback_arg = crp;
1976 
1977 	/* Compute final hash and copy result back */
1978 	error = ccp_do_ghash_final(qp, s);
1979 	if (error != 0)
1980 		return (error);
1981 
1982 	/* When encrypting, copy computed tag out to caller buffer. */
1983 	sglist_reset(qp->cq_sg_ulptx);
1984 	if (dir == CCP_CIPHER_DIR_ENCRYPT)
1985 		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1986 		    crp->crp_digest_start, s->gmac.hash_len);
1987 	else
1988 		/*
1989 		 * For decrypting, copy the computed tag out to our session
1990 		 * buffer to verify in our callback.
1991 		 */
1992 		error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1993 		    s->gmac.hash_len);
1994 	if (error != 0)
1995 		return (error);
1996 	error = ccp_passthrough_sgl(qp,
1997 	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
1998 	    s->gmac.hash_len, true, &ctx);
1999 	return (error);
2000 }
2001 
2002 #define MAX_TRNG_RETRIES	10
2003 u_int
2004 random_ccp_read(void *v, u_int c)
2005 {
2006 	uint32_t *buf;
2007 	u_int i, j;
2008 
2009 	KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2010 
2011 	buf = v;
2012 	for (i = c; i > 0; i -= sizeof(*buf)) {
2013 		for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2014 			*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2015 			if (*buf != 0)
2016 				break;
2017 		}
2018 		if (j == MAX_TRNG_RETRIES)
2019 			return (0);
2020 		buf++;
2021 	}
2022 	return (c);
2023 
2024 }
2025 
2026 #ifdef DDB
2027 void
2028 db_ccp_show_hw(struct ccp_softc *sc)
2029 {
2030 
2031 	db_printf("  queue mask: 0x%x\n",
2032 	    ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2033 	db_printf("  queue prio: 0x%x\n",
2034 	    ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2035 	db_printf("  reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2036 	db_printf("  trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2037 	db_printf("  cmd timeout: 0x%x\n",
2038 	    ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2039 	db_printf("  lsb public mask lo: 0x%x\n",
2040 	    ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2041 	db_printf("  lsb public mask hi: 0x%x\n",
2042 	    ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2043 	db_printf("  lsb private mask lo: 0x%x\n",
2044 	    ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2045 	db_printf("  lsb private mask hi: 0x%x\n",
2046 	    ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2047 	db_printf("  version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2048 }
2049 
2050 void
2051 db_ccp_show_queue_hw(struct ccp_queue *qp)
2052 {
2053 	const struct ccp_error_code *ec;
2054 	struct ccp_softc *sc;
2055 	uint32_t status, error, esource, faultblock, headlo, qcontrol;
2056 	unsigned q, i;
2057 
2058 	sc = qp->cq_softc;
2059 	q = qp->cq_qindex;
2060 
2061 	qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2062 	db_printf("  qcontrol: 0x%x%s%s\n", qcontrol,
2063 	    (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2064 	    (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2065 	db_printf("  tail_lo: 0x%x\n",
2066 	    ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2067 	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2068 	db_printf("  head_lo: 0x%x\n", headlo);
2069 	db_printf("  int enable: 0x%x\n",
2070 	    ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2071 	db_printf("  interrupt status: 0x%x\n",
2072 	    ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2073 	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2074 	db_printf("  status: 0x%x\n", status);
2075 	db_printf("  int stats: 0x%x\n",
2076 	    ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2077 
2078 	error = status & STATUS_ERROR_MASK;
2079 	if (error == 0)
2080 		return;
2081 
2082 	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2083 	    STATUS_ERRORSOURCE_MASK;
2084 	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2085 	    STATUS_VLSB_FAULTBLOCK_MASK;
2086 
2087 	ec = NULL;
2088 	for (i = 0; i < nitems(ccp_error_codes); i++)
2089 		if (ccp_error_codes[i].ce_code == error)
2090 			break;
2091 	if (i < nitems(ccp_error_codes))
2092 		ec = &ccp_error_codes[i];
2093 
2094 	db_printf("  Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2095 	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2096 	    faultblock);
2097 	if (ec != NULL)
2098 		db_printf("  Error description: %s\n", ec->ce_desc);
2099 
2100 	i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2101 	db_printf("  Bad descriptor idx: %u contents:\n  %32D\n", i,
2102 	    (void *)&qp->desc_ring[i], " ");
2103 }
2104 #endif
2105