xref: /freebsd/sys/crypto/ccp/ccp_hardware.c (revision 19fe57fdb4fd2c18a37f2a972617c8769609cdb8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017 Chelsio Communications, Inc.
5  * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6  * All rights reserved.
7  * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_ddb.h"
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/lock.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/module.h>
43 #include <sys/rman.h>
44 #include <sys/sglist.h>
45 #include <sys/sysctl.h>
46 
47 #ifdef DDB
48 #include <ddb/ddb.h>
49 #endif
50 
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 
54 #include <machine/bus.h>
55 #include <machine/resource.h>
56 #include <machine/vmparam.h>
57 
58 #include <opencrypto/cryptodev.h>
59 #include <opencrypto/xform.h>
60 
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 
64 #include "cryptodev_if.h"
65 
66 #include "ccp.h"
67 #include "ccp_hardware.h"
68 #include "ccp_lsb.h"
69 
70 CTASSERT(sizeof(struct ccp_desc) == 32);
71 
72 static struct ccp_xts_unitsize_map_entry {
73 	enum ccp_xts_unitsize cxu_id;
74 	unsigned cxu_size;
75 } ccp_xts_unitsize_map[] = {
76 	{ CCP_XTS_AES_UNIT_SIZE_16, 16 },
77 	{ CCP_XTS_AES_UNIT_SIZE_512, 512 },
78 	{ CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79 	{ CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80 	{ CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
81 };
82 
83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
84     "ccp node");
85 
86 unsigned g_ccp_ring_order = 11;
87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
88     0, "Set CCP ring order.  (1 << this) == ring size.  Min: 6, Max: 16");
89 
90 /*
91  * Zero buffer, sufficient for padding LSB entries, that does not span a page
92  * boundary
93  */
94 static const char g_zeroes[32] __aligned(32);
95 
96 static inline uint32_t
97 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
98 {
99 	return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
100 }
101 
102 static inline void
103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
104 {
105 	bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
106 }
107 
108 static inline uint32_t
109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
110 {
111 	/*
112 	 * Each queue gets its own 4kB register space.  Queue 0 is at 0x1000.
113 	 */
114 	return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
115 }
116 
117 static inline void
118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
119     uint32_t value)
120 {
121 	ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
122 }
123 
124 void
125 ccp_queue_write_tail(struct ccp_queue *qp)
126 {
127 	ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
128 	    ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
129 }
130 
131 /*
132  * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
133  * that entry for the queue's private LSB region.
134  */
135 static inline uint8_t
136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
137 {
138 	return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
139 }
140 
141 /*
142  * Given a queue and a reserved LSB entry index, compute the LSB *address* of
143  * that entry for the queue's private LSB region.
144  */
145 static inline uint32_t
146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
147 {
148 	return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
149 }
150 
151 /*
152  * Some terminology:
153  *
154  * LSB - Local Storage Block
155  * =========================
156  *
157  * 8 segments/regions, each containing 16 entries.
158  *
159  * Each entry contains 256 bits (32 bytes).
160  *
161  * Segments are virtually addressed in commands, but accesses cannot cross
162  * segment boundaries.  Virtual map uses an identity mapping by default
163  * (virtual segment N corresponds to physical segment N).
164  *
165  * Access to a physical region can be restricted to any subset of all five
166  * queues.
167  *
168  * "Pass-through" mode
169  * ===================
170  *
171  * Pass-through is a generic DMA engine, much like ioat(4).  Some nice
172  * features:
173  *
174  * - Supports byte-swapping for endian conversion (32- or 256-bit words)
175  * - AND, OR, XOR with fixed 256-bit mask
176  * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
177  * - Read/write of LSB
178  * - Memset
179  *
180  * If bit manipulation mode is enabled, input must be a multiple of 256 bits
181  * (32 bytes).
182  *
183  * If byte-swapping is enabled, input must be a multiple of the word size.
184  *
185  * Zlib mode -- only usable from one queue at a time, single job at a time.
186  * ========================================================================
187  *
188  * Only usable from private host, aka PSP?  Not host processor?
189  *
190  * RNG.
191  * ====
192  *
193  * Raw bits are conditioned with AES and fed through CTR_DRBG.  Output goes in
194  * a ring buffer readable by software.
195  *
196  * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
197  * implemented on the raw input stream and may be enabled to verify min-entropy
198  * of 0.5 bits per bit.
199  */
200 
201 static void
202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
203 {
204 	bus_addr_t *baddr;
205 
206 	KASSERT(error == 0, ("%s: error:%d", __func__, error));
207 	baddr = arg;
208 	*baddr = segs->ds_addr;
209 }
210 
211 static int
212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
213 {
214 	struct ccp_softc *sc;
215 	struct ccp_queue *qp;
216 	void *desc;
217 	size_t ringsz, num_descriptors;
218 	int error;
219 
220 	desc = NULL;
221 	sc = device_get_softc(dev);
222 	qp = &sc->queues[queue];
223 
224 	/*
225 	 * Don't bother allocating a ring for queues the host isn't allowed to
226 	 * drive.
227 	 */
228 	if ((sc->valid_queues & (1 << queue)) == 0)
229 		return (0);
230 
231 	ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
232 
233 	/* Ignore queues that do not have any LSB access. */
234 	if (qp->lsb_mask == 0) {
235 		device_printf(dev, "Ignoring queue %u with no LSB access\n",
236 		    queue);
237 		sc->valid_queues &= ~(1 << queue);
238 		return (0);
239 	}
240 
241 	num_descriptors = 1 << sc->ring_size_order;
242 	ringsz = sizeof(struct ccp_desc) * num_descriptors;
243 
244 	/*
245 	 * "Queue_Size" is order - 1.
246 	 *
247 	 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
248 	 */
249 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
250 	    1 << (5 + sc->ring_size_order),
251 #if defined(__i386__) && !defined(PAE)
252 	    0, BUS_SPACE_MAXADDR,
253 #else
254 	    (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
255 #endif
256 	    BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
257 	    ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
258 	if (error != 0)
259 		goto out;
260 
261 	error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
262 	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
263 	if (error != 0)
264 		goto out;
265 
266 	error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
267 	    ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
268 	if (error != 0)
269 		goto out;
270 
271 	qp->desc_ring = desc;
272 	qp->completions_ring = malloc(num_descriptors *
273 	    sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
274 
275 	/* Zero control register; among other things, clears the RUN flag. */
276 	qp->qcontrol = 0;
277 	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
278 	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
279 
280 	/* Clear any leftover interrupt status flags */
281 	ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
282 	    ALL_INTERRUPTS);
283 
284 	qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
285 
286 	ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
287 	    (uint32_t)qp->desc_ring_bus_addr);
288 	ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
289 	    (uint32_t)qp->desc_ring_bus_addr);
290 
291 	/*
292 	 * Enable completion interrupts, as well as error or administrative
293 	 * halt interrupts.  We don't use administrative halts, but they
294 	 * shouldn't trip unless we do, so it ought to be harmless.
295 	 */
296 	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
297 	    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
298 
299 	qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
300 	qp->qcontrol |= CMD_Q_RUN;
301 	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
302 
303 out:
304 	if (error != 0) {
305 		if (qp->desc_ring != NULL)
306 			bus_dmamap_unload(qp->ring_desc_tag,
307 			    qp->ring_desc_map);
308 		if (desc != NULL)
309 			bus_dmamem_free(qp->ring_desc_tag, desc,
310 			    qp->ring_desc_map);
311 		if (qp->ring_desc_tag != NULL)
312 			bus_dma_tag_destroy(qp->ring_desc_tag);
313 	}
314 	return (error);
315 }
316 
317 static void
318 ccp_hw_detach_queue(device_t dev, unsigned queue)
319 {
320 	struct ccp_softc *sc;
321 	struct ccp_queue *qp;
322 
323 	sc = device_get_softc(dev);
324 	qp = &sc->queues[queue];
325 
326 	/*
327 	 * Don't bother allocating a ring for queues the host isn't allowed to
328 	 * drive.
329 	 */
330 	if ((sc->valid_queues & (1 << queue)) == 0)
331 		return;
332 
333 	free(qp->completions_ring, M_CCP);
334 	bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
335 	bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
336 	bus_dma_tag_destroy(qp->ring_desc_tag);
337 }
338 
339 static int
340 ccp_map_pci_bar(device_t dev)
341 {
342 	struct ccp_softc *sc;
343 
344 	sc = device_get_softc(dev);
345 
346 	sc->pci_resource_id = PCIR_BAR(2);
347 	sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
348 	    &sc->pci_resource_id, RF_ACTIVE);
349 	if (sc->pci_resource == NULL) {
350 		device_printf(dev, "unable to allocate pci resource\n");
351 		return (ENODEV);
352 	}
353 
354 	sc->pci_resource_id_msix = PCIR_BAR(5);
355 	sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356 	    &sc->pci_resource_id_msix, RF_ACTIVE);
357 	if (sc->pci_resource_msix == NULL) {
358 		device_printf(dev, "unable to allocate pci resource msix\n");
359 		bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
360 		    sc->pci_resource);
361 		return (ENODEV);
362 	}
363 
364 	sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
365 	sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
366 	return (0);
367 }
368 
369 static void
370 ccp_unmap_pci_bar(device_t dev)
371 {
372 	struct ccp_softc *sc;
373 
374 	sc = device_get_softc(dev);
375 
376 	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
377 	    sc->pci_resource_msix);
378 	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
379 	    sc->pci_resource);
380 }
381 
382 const static struct ccp_error_code {
383 	uint8_t		ce_code;
384 	const char	*ce_name;
385 	int		ce_errno;
386 	const char	*ce_desc;
387 } ccp_error_codes[] = {
388 	{ 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
389 	{ 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
390 	    "A non-supported function type was specified" },
391 	{ 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
392 	    "A non-supported function mode was specified" },
393 	{ 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
394 	    "A CMAC type was specified when ENCRYPT was not specified" },
395 	{ 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
396 	    "A non-supported function size was specified.\n"
397 	    "AES-CFB: Size was not 127 or 7;\n"
398 	    "3DES-CFB: Size was not 7;\n"
399 	    "RSA: See supported size table (7.4.2);\n"
400 	    "ECC: Size was greater than 576 bits." },
401 	{ 0x07, "Zlib_MISSING_INIT_EOM", EIO,
402 	    "Zlib command does not have INIT and EOM set" },
403 	{ 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
404 	    "Reserved bits in a function specification were not 0" },
405 	{ 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
406 	    "The buffer length specified was not correct for the selected engine"
407 	},
408 	{ 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
409 	    "Undefined VLSB segment mapping or\n"
410 	    "mapping to unsupported LSB segment id" },
411 	{ 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
412 	    "The specified source/destination buffer access was illegal:\n"
413 	    "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
414 	    "Data buffer not completely contained within a single segment; or\n"
415 	    "Pointer with Fixed=1 is not 32-bit aligned; or\n"
416 	    "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
417 	},
418 	{ 0x0C, "ILLEGAL_MEM_SEL", EIO,
419 	    "A src_mem, dst_mem, or key_mem field was illegal:\n"
420 	    "A field was set to a reserved value; or\n"
421 	    "A public command attempted to reference AXI1 (local) or GART memory; or\n"
422 	    "A Zlib command attmpted to use the LSB." },
423 	{ 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
424 	    "The specified context location was illegal:\n"
425 	    "Context located in a LSB location disallowed by the LSB protection masks; or\n"
426 	    "Context not completely contained within a single segment." },
427 	{ 0x0E, "ILLEGAL_KEY_ADDR", EIO,
428 	    "The specified key location was illegal:\n"
429 	    "Key located in a LSB location disallowed by the LSB protection masks; or\n"
430 	    "Key not completely contained within a single segment." },
431 	{ 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
432 	/* XXX Could fill out these descriptions too */
433 	{ 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
434 	{ 0x14, "IDMA0_AXI_DECERR", EIO, "" },
435 	{ 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
436 	{ 0x17, "IDMA1_AXI_DECERR", EIO, "" },
437 	{ 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
438 	{ 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
439 	{ 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
440 	{ 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
441 	{ 0x1E, "ZLIB_BTYPE", EIO, "" },
442 	{ 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
443 	{ 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
444 	{ 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
445 	{ 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
446 	{ 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
447 	{ 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
448 	{ 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
449 	{ 0x27, "ODMA0_AXI_DECERR", EIO, "" },
450 	{ 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
451 	{ 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
452 	{ 0x2B, "LSB_PARITY_ERR", EIO,
453 	    "A read from the LSB encountered a parity error" },
454 };
455 
456 static void
457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
458 {
459 	struct ccp_completion_ctx *cctx;
460 	const struct ccp_error_code *ec;
461 	struct ccp_softc *sc;
462 	uint32_t status, error, esource, faultblock;
463 	unsigned q, idx;
464 	int errno;
465 
466 	sc = qp->cq_softc;
467 	q = qp->cq_qindex;
468 
469 	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
470 
471 	error = status & STATUS_ERROR_MASK;
472 
473 	/* Decode error status */
474 	ec = NULL;
475 	for (idx = 0; idx < nitems(ccp_error_codes); idx++)
476 		if (ccp_error_codes[idx].ce_code == error) {
477 			ec = &ccp_error_codes[idx];
478 			break;
479 		}
480 
481 	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
482 	    STATUS_ERRORSOURCE_MASK;
483 	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
484 	    STATUS_VLSB_FAULTBLOCK_MASK;
485 	device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
486 	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
487 	    faultblock);
488 	if (ec != NULL)
489 		device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
490 
491 	/* TODO Could format the desc nicely here */
492 	idx = desc - qp->desc_ring;
493 	DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
494 	    (const void *)desc, " ");
495 
496 	/*
497 	 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
498 	 * Zlib Decompress status may be interesting.
499 	 */
500 
501 	while (true) {
502 		/* Keep unused descriptors zero for next use. */
503 		memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
504 
505 		cctx = &qp->completions_ring[idx];
506 
507 		/*
508 		 * Restart procedure described in § 14.2.5.  Could be used by HoC if we
509 		 * used that.
510 		 *
511 		 * Advance HEAD_LO past bad descriptor + any remaining in
512 		 * transaction manually, then restart queue.
513 		 */
514 		idx = (idx + 1) % (1 << sc->ring_size_order);
515 
516 		/* Callback function signals end of transaction */
517 		if (cctx->callback_fn != NULL) {
518 			if (ec == NULL)
519 				errno = EIO;
520 			else
521 				errno = ec->ce_errno;
522 			/* TODO More specific error code */
523 			cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
524 			cctx->callback_fn = NULL;
525 			break;
526 		}
527 	}
528 
529 	qp->cq_head = idx;
530 	qp->cq_waiting = false;
531 	wakeup(&qp->cq_tail);
532 	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
533 	ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
534 	    (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
535 	ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
536 	DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
537 }
538 
539 static void
540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
541 {
542 	struct ccp_completion_ctx *cctx;
543 	struct ccp_softc *sc;
544 	const struct ccp_desc *desc;
545 	uint32_t headlo, idx;
546 	unsigned q, completed;
547 
548 	sc = qp->cq_softc;
549 	q = qp->cq_qindex;
550 
551 	mtx_lock(&qp->cq_lock);
552 
553 	/*
554 	 * Hardware HEAD_LO points to the first incomplete descriptor.  Process
555 	 * any submitted and completed descriptors, up to but not including
556 	 * HEAD_LO.
557 	 */
558 	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
559 	idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
560 
561 	DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
562 	    qp->cq_head);
563 	completed = 0;
564 	while (qp->cq_head != idx) {
565 		DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
566 
567 		cctx = &qp->completions_ring[qp->cq_head];
568 		if (cctx->callback_fn != NULL) {
569 			cctx->callback_fn(qp, cctx->session,
570 			    cctx->callback_arg, 0);
571 			cctx->callback_fn = NULL;
572 		}
573 
574 		/* Keep unused descriptors zero for next use. */
575 		memset(&qp->desc_ring[qp->cq_head], 0,
576 		    sizeof(qp->desc_ring[qp->cq_head]));
577 
578 		qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
579 		completed++;
580 	}
581 	if (completed > 0) {
582 		qp->cq_waiting = false;
583 		wakeup(&qp->cq_tail);
584 	}
585 
586 	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
587 
588 	/*
589 	 * Desc points to the first incomplete descriptor, at the time we read
590 	 * HEAD_LO.  If there was an error flagged in interrupt status, the HW
591 	 * will not proceed past the erroneous descriptor by itself.
592 	 */
593 	desc = &qp->desc_ring[idx];
594 	if ((ints & INT_ERROR) != 0)
595 		ccp_intr_handle_error(qp, desc);
596 
597 	mtx_unlock(&qp->cq_lock);
598 }
599 
600 static void
601 ccp_intr_handler(void *arg)
602 {
603 	struct ccp_softc *sc = arg;
604 	size_t i;
605 	uint32_t ints;
606 
607 	DPRINTF(sc->dev, "%s: interrupt\n", __func__);
608 
609 	/*
610 	 * We get one global interrupt per PCI device, shared over all of
611 	 * its queues.  Scan each valid queue on interrupt for flags indicating
612 	 * activity.
613 	 */
614 	for (i = 0; i < nitems(sc->queues); i++) {
615 		if ((sc->valid_queues & (1 << i)) == 0)
616 			continue;
617 
618 		ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
619 		if (ints == 0)
620 			continue;
621 
622 #if 0
623 		DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
624 		    (unsigned)ints, i);
625 #endif
626 		/* Write back 1s to clear interrupt status bits. */
627 		ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
628 
629 		/*
630 		 * If there was an error, we still need to run completions on
631 		 * any descriptors prior to the error.  The completions handler
632 		 * invoked below will also handle the error descriptor.
633 		 */
634 		if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
635 			ccp_intr_run_completions(&sc->queues[i], ints);
636 
637 		if ((ints & INT_QUEUE_STOPPED) != 0)
638 			device_printf(sc->dev, "%s: queue %zu stopped\n",
639 			    __func__, i);
640 	}
641 
642 	/* Re-enable interrupts after processing */
643 	for (i = 0; i < nitems(sc->queues); i++) {
644 		if ((sc->valid_queues & (1 << i)) == 0)
645 			continue;
646 		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
647 		    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
648 	}
649 }
650 
651 static int
652 ccp_intr_filter(void *arg)
653 {
654 	struct ccp_softc *sc = arg;
655 	size_t i;
656 
657 	/* TODO: Split individual queues into separate taskqueues? */
658 	for (i = 0; i < nitems(sc->queues); i++) {
659 		if ((sc->valid_queues & (1 << i)) == 0)
660 			continue;
661 
662 		/* Mask interrupt until task completes */
663 		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
664 	}
665 
666 	return (FILTER_SCHEDULE_THREAD);
667 }
668 
669 static int
670 ccp_setup_interrupts(struct ccp_softc *sc)
671 {
672 	uint32_t nvec;
673 	int rid, error, n, ridcopy;
674 
675 	n = pci_msix_count(sc->dev);
676 	if (n < 1) {
677 		device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
678 		return (ENXIO);
679 	}
680 
681 	nvec = n;
682 	error = pci_alloc_msix(sc->dev, &nvec);
683 	if (error != 0) {
684 		device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
685 		    error);
686 		return (error);
687 	}
688 	if (nvec < 1) {
689 		device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
690 		    __func__);
691 		return (ENXIO);
692 	}
693 	if (nvec > nitems(sc->intr_res)) {
694 		device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
695 		    nvec);
696 		nvec = nitems(sc->intr_res);
697 	}
698 
699 	for (rid = 1; rid < 1 + nvec; rid++) {
700 		ridcopy = rid;
701 		sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
702 		    SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
703 		if (sc->intr_res[rid - 1] == NULL) {
704 			device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
705 			    __func__);
706 			return (ENXIO);
707 		}
708 
709 		sc->intr_tag[rid - 1] = NULL;
710 		error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
711 		    INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
712 		    ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
713 		if (error != 0)
714 			device_printf(sc->dev, "%s: setup_intr: %d\n",
715 			    __func__, error);
716 	}
717 	sc->intr_count = nvec;
718 
719 	return (error);
720 }
721 
722 static void
723 ccp_release_interrupts(struct ccp_softc *sc)
724 {
725 	unsigned i;
726 
727 	for (i = 0; i < sc->intr_count; i++) {
728 		if (sc->intr_tag[i] != NULL)
729 			bus_teardown_intr(sc->dev, sc->intr_res[i],
730 			    sc->intr_tag[i]);
731 		if (sc->intr_res[i] != NULL)
732 			bus_release_resource(sc->dev, SYS_RES_IRQ,
733 			    rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
734 	}
735 
736 	pci_release_msi(sc->dev);
737 }
738 
739 int
740 ccp_hw_attach(device_t dev)
741 {
742 	struct ccp_softc *sc;
743 	uint64_t lsbmask;
744 	uint32_t version, lsbmasklo, lsbmaskhi;
745 	unsigned queue_idx, j;
746 	int error;
747 	bool bars_mapped, interrupts_setup;
748 
749 	queue_idx = 0;
750 	bars_mapped = interrupts_setup = false;
751 	sc = device_get_softc(dev);
752 
753 	error = ccp_map_pci_bar(dev);
754 	if (error != 0) {
755 		device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
756 		goto out;
757 	}
758 	bars_mapped = true;
759 
760 	error = pci_enable_busmaster(dev);
761 	if (error != 0) {
762 		device_printf(dev, "%s: couldn't enable busmaster\n",
763 		    __func__);
764 		goto out;
765 	}
766 
767 	sc->ring_size_order = g_ccp_ring_order;
768 	if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
769 		device_printf(dev, "bogus hw.ccp.ring_order\n");
770 		error = EINVAL;
771 		goto out;
772 	}
773 	sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
774 
775 	version = ccp_read_4(sc, VERSION_REG);
776 	if ((version & VERSION_NUM_MASK) < 5) {
777 		device_printf(dev,
778 		    "driver supports version 5 and later hardware\n");
779 		error = ENXIO;
780 		goto out;
781 	}
782 
783 	error = ccp_setup_interrupts(sc);
784 	if (error != 0)
785 		goto out;
786 	interrupts_setup = true;
787 
788 	sc->hw_version = version & VERSION_NUM_MASK;
789 	sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
790 	    VERSION_NUMVQM_MASK;
791 	sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
792 	    VERSION_LSBSIZE_MASK;
793 	sc->hw_features = version & VERSION_CAP_MASK;
794 
795 	/*
796 	 * Copy private LSB mask to public registers to enable access to LSB
797 	 * from all queues allowed by BIOS.
798 	 */
799 	lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
800 	lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
801 	ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
802 	ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
803 
804 	lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
805 
806 	for (; queue_idx < nitems(sc->queues); queue_idx++) {
807 		error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
808 		if (error != 0) {
809 			device_printf(dev, "%s: couldn't attach queue %u\n",
810 			    __func__, queue_idx);
811 			goto out;
812 		}
813 	}
814 	ccp_assign_lsb_regions(sc, lsbmask);
815 
816 out:
817 	if (error != 0) {
818 		if (interrupts_setup)
819 			ccp_release_interrupts(sc);
820 		for (j = 0; j < queue_idx; j++)
821 			ccp_hw_detach_queue(dev, j);
822 		if (sc->ring_size_order != 0)
823 			pci_disable_busmaster(dev);
824 		if (bars_mapped)
825 			ccp_unmap_pci_bar(dev);
826 	}
827 	return (error);
828 }
829 
830 void
831 ccp_hw_detach(device_t dev)
832 {
833 	struct ccp_softc *sc;
834 	unsigned i;
835 
836 	sc = device_get_softc(dev);
837 
838 	for (i = 0; i < nitems(sc->queues); i++)
839 		ccp_hw_detach_queue(dev, i);
840 
841 	ccp_release_interrupts(sc);
842 	pci_disable_busmaster(dev);
843 	ccp_unmap_pci_bar(dev);
844 }
845 
846 static int __must_check
847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
848     enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
849     bus_size_t len, enum ccp_passthru_byteswap swapmode,
850     enum ccp_passthru_bitwise bitmode, bool interrupt,
851     const struct ccp_completion_ctx *cctx)
852 {
853 	struct ccp_desc *desc;
854 
855 	if (ccp_queue_get_ring_space(qp) == 0)
856 		return (EAGAIN);
857 
858 	desc = &qp->desc_ring[qp->cq_tail];
859 
860 	memset(desc, 0, sizeof(*desc));
861 	desc->engine = CCP_ENGINE_PASSTHRU;
862 
863 	desc->pt.ioc = interrupt;
864 	desc->pt.byteswap = swapmode;
865 	desc->pt.bitwise = bitmode;
866 	desc->length = len;
867 
868 	desc->src_lo = (uint32_t)src;
869 	desc->src_hi = src >> 32;
870 	desc->src_mem = src_type;
871 
872 	desc->dst_lo = (uint32_t)dst;
873 	desc->dst_hi = dst >> 32;
874 	desc->dst_mem = dst_type;
875 
876 	if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
877 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
878 
879 	if (cctx != NULL)
880 		memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
881 
882 	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
883 	return (0);
884 }
885 
886 static int __must_check
887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
888     struct sglist *sgl, bus_size_t len, bool interrupt,
889     const struct ccp_completion_ctx *cctx)
890 {
891 	struct sglist_seg *seg;
892 	size_t i, remain, nb;
893 	int error;
894 
895 	remain = len;
896 	for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
897 		seg = &sgl->sg_segs[i];
898 		/* crd_len is int, so 32-bit min() is ok. */
899 		nb = min(remain, seg->ss_len);
900 
901 		if (tolsb)
902 			error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
903 			    seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
904 			    CCP_PASSTHRU_BYTESWAP_NOOP,
905 			    CCP_PASSTHRU_BITWISE_NOOP,
906 			    (nb == remain) && interrupt, cctx);
907 		else
908 			error = ccp_passthrough(qp, seg->ss_paddr,
909 			    CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
910 			    CCP_PASSTHRU_BYTESWAP_NOOP,
911 			    CCP_PASSTHRU_BITWISE_NOOP,
912 			    (nb == remain) && interrupt, cctx);
913 		if (error != 0)
914 			return (error);
915 
916 		remain -= nb;
917 	}
918 	return (0);
919 }
920 
921 /*
922  * Note that these vectors are in reverse of the usual order.
923  */
924 const struct SHA_vectors {
925 	uint32_t SHA1[8];
926 	uint32_t SHA224[8];
927 	uint32_t SHA256[8];
928 	uint64_t SHA384[8];
929 	uint64_t SHA512[8];
930 } SHA_H __aligned(PAGE_SIZE) = {
931 	.SHA1 = {
932 		0xc3d2e1f0ul,
933 		0x10325476ul,
934 		0x98badcfeul,
935 		0xefcdab89ul,
936 		0x67452301ul,
937 		0,
938 		0,
939 		0,
940 	},
941 	.SHA224 = {
942 		0xbefa4fa4ul,
943 		0x64f98fa7ul,
944 		0x68581511ul,
945 		0xffc00b31ul,
946 		0xf70e5939ul,
947 		0x3070dd17ul,
948 		0x367cd507ul,
949 		0xc1059ed8ul,
950 	},
951 	.SHA256 = {
952 		0x5be0cd19ul,
953 		0x1f83d9abul,
954 		0x9b05688cul,
955 		0x510e527ful,
956 		0xa54ff53aul,
957 		0x3c6ef372ul,
958 		0xbb67ae85ul,
959 		0x6a09e667ul,
960 	},
961 	.SHA384 = {
962 		0x47b5481dbefa4fa4ull,
963 		0xdb0c2e0d64f98fa7ull,
964 		0x8eb44a8768581511ull,
965 		0x67332667ffc00b31ull,
966 		0x152fecd8f70e5939ull,
967 		0x9159015a3070dd17ull,
968 		0x629a292a367cd507ull,
969 		0xcbbb9d5dc1059ed8ull,
970 	},
971 	.SHA512 = {
972 		0x5be0cd19137e2179ull,
973 		0x1f83d9abfb41bd6bull,
974 		0x9b05688c2b3e6c1full,
975 		0x510e527fade682d1ull,
976 		0xa54ff53a5f1d36f1ull,
977 		0x3c6ef372fe94f82bull,
978 		0xbb67ae8584caa73bull,
979 		0x6a09e667f3bcc908ull,
980 	},
981 };
982 /*
983  * Ensure vectors do not cross a page boundary.
984  *
985  * Disabled due to a new Clang error:  "expression is not an integral constant
986  * expression."  GCC (cross toolchain) seems to handle this assertion with
987  * _Static_assert just fine.
988  */
989 #if 0
990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
991 #endif
992 
993 const struct SHA_Defn {
994 	enum sha_version version;
995 	const void *H_vectors;
996 	size_t H_size;
997 	struct auth_hash *axf;
998 	enum ccp_sha_type engine_type;
999 } SHA_definitions[] = {
1000 	{
1001 		.version = SHA1,
1002 		.H_vectors = SHA_H.SHA1,
1003 		.H_size = sizeof(SHA_H.SHA1),
1004 		.axf = &auth_hash_hmac_sha1,
1005 		.engine_type = CCP_SHA_TYPE_1,
1006 	},
1007 #if 0
1008 	{
1009 		.version = SHA2_224,
1010 		.H_vectors = SHA_H.SHA224,
1011 		.H_size = sizeof(SHA_H.SHA224),
1012 		.axf = &auth_hash_hmac_sha2_224,
1013 		.engine_type = CCP_SHA_TYPE_224,
1014 	},
1015 #endif
1016 	{
1017 		.version = SHA2_256,
1018 		.H_vectors = SHA_H.SHA256,
1019 		.H_size = sizeof(SHA_H.SHA256),
1020 		.axf = &auth_hash_hmac_sha2_256,
1021 		.engine_type = CCP_SHA_TYPE_256,
1022 	},
1023 	{
1024 		.version = SHA2_384,
1025 		.H_vectors = SHA_H.SHA384,
1026 		.H_size = sizeof(SHA_H.SHA384),
1027 		.axf = &auth_hash_hmac_sha2_384,
1028 		.engine_type = CCP_SHA_TYPE_384,
1029 	},
1030 	{
1031 		.version = SHA2_512,
1032 		.H_vectors = SHA_H.SHA512,
1033 		.H_size = sizeof(SHA_H.SHA512),
1034 		.axf = &auth_hash_hmac_sha2_512,
1035 		.engine_type = CCP_SHA_TYPE_512,
1036 	},
1037 };
1038 
1039 static int __must_check
1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1041     vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1042 {
1043 	struct ccp_desc *desc;
1044 
1045 	if (ccp_queue_get_ring_space(qp) == 0)
1046 		return (EAGAIN);
1047 
1048 	desc = &qp->desc_ring[qp->cq_tail];
1049 
1050 	memset(desc, 0, sizeof(*desc));
1051 	desc->engine = CCP_ENGINE_SHA;
1052 	desc->som = start;
1053 	desc->eom = end;
1054 
1055 	desc->sha.type = defn->engine_type;
1056 	desc->length = len;
1057 
1058 	if (end) {
1059 		desc->sha_len_lo = (uint32_t)msgbits;
1060 		desc->sha_len_hi = msgbits >> 32;
1061 	}
1062 
1063 	desc->src_lo = (uint32_t)addr;
1064 	desc->src_hi = addr >> 32;
1065 	desc->src_mem = CCP_MEMTYPE_SYSTEM;
1066 
1067 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1068 
1069 	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1070 	return (0);
1071 }
1072 
1073 static int __must_check
1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1075     struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1076 {
1077 	const struct SHA_Defn *defn;
1078 	struct sglist_seg *seg;
1079 	size_t i, msgsize, remaining, nb;
1080 	uint32_t lsbaddr;
1081 	int error;
1082 
1083 	for (i = 0; i < nitems(SHA_definitions); i++)
1084 		if (SHA_definitions[i].version == version)
1085 			break;
1086 	if (i == nitems(SHA_definitions))
1087 		return (EINVAL);
1088 	defn = &SHA_definitions[i];
1089 
1090 	/* XXX validate input ??? */
1091 
1092 	/* Load initial SHA state into LSB */
1093 	/* XXX ensure H_vectors don't span page boundaries */
1094 	error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1095 	    CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1096 	    CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1097 	    CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1098 	    NULL);
1099 	if (error != 0)
1100 		return (error);
1101 
1102 	/* Execute series of SHA updates on correctly sized buffers */
1103 	msgsize = 0;
1104 	for (i = 0; i < sgl_src->sg_nseg; i++) {
1105 		seg = &sgl_src->sg_segs[i];
1106 		msgsize += seg->ss_len;
1107 		error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1108 		    seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1109 		    msgsize << 3);
1110 		if (error != 0)
1111 			return (error);
1112 	}
1113 
1114 	/* Copy result out to sgl_dst */
1115 	remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1116 	lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1117 	for (i = 0; i < sgl_dst->sg_nseg; i++) {
1118 		seg = &sgl_dst->sg_segs[i];
1119 		/* crd_len is int, so 32-bit min() is ok. */
1120 		nb = min(remaining, seg->ss_len);
1121 
1122 		error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1123 		    lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1124 		    CCP_PASSTHRU_BITWISE_NOOP,
1125 		    (cctx != NULL) ? (nb == remaining) : false,
1126 		    (nb == remaining) ? cctx : NULL);
1127 		if (error != 0)
1128 			return (error);
1129 
1130 		remaining -= nb;
1131 		lsbaddr += nb;
1132 		if (remaining == 0)
1133 			break;
1134 	}
1135 
1136 	return (0);
1137 }
1138 
1139 static void
1140 byteswap256(uint64_t *buffer)
1141 {
1142 	uint64_t t;
1143 
1144 	t = bswap64(buffer[3]);
1145 	buffer[3] = bswap64(buffer[0]);
1146 	buffer[0] = t;
1147 
1148 	t = bswap64(buffer[2]);
1149 	buffer[2] = bswap64(buffer[1]);
1150 	buffer[1] = t;
1151 }
1152 
1153 /*
1154  * Translate CCP internal LSB hash format into a standard hash ouput.
1155  *
1156  * Manipulates input buffer with byteswap256 operation.
1157  */
1158 static void
1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1160 {
1161 	const struct SHA_Defn *defn;
1162 	size_t i;
1163 
1164 	for (i = 0; i < nitems(SHA_definitions); i++)
1165 		if (SHA_definitions[i].version == version)
1166 			break;
1167 	if (i == nitems(SHA_definitions))
1168 		panic("bogus sha version auth_mode %u\n", (unsigned)version);
1169 
1170 	defn = &SHA_definitions[i];
1171 
1172 	/* Swap 256bit manually -- DMA engine can, but with limitations */
1173 	byteswap256((void *)buffer);
1174 	if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1175 		byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1176 
1177 	switch (defn->version) {
1178 	case SHA1:
1179 		memcpy(output, buffer + 12, defn->axf->hashsize);
1180 		break;
1181 #if 0
1182 	case SHA2_224:
1183 		memcpy(output, buffer + XXX, defn->axf->hashsize);
1184 		break;
1185 #endif
1186 	case SHA2_256:
1187 		memcpy(output, buffer, defn->axf->hashsize);
1188 		break;
1189 	case SHA2_384:
1190 		memcpy(output,
1191 		    buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1192 		    defn->axf->hashsize - LSB_ENTRY_SIZE);
1193 		memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1194 		    LSB_ENTRY_SIZE);
1195 		break;
1196 	case SHA2_512:
1197 		memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1198 		memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1199 		break;
1200 	}
1201 }
1202 
1203 static void
1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1205     struct cryptop *crp, struct cryptodesc *crd, int error)
1206 {
1207 	char ihash[SHA2_512_HASH_LEN /* max hash len */];
1208 	union authctx auth_ctx;
1209 	struct auth_hash *axf;
1210 
1211 	axf = s->hmac.auth_hash;
1212 
1213 	s->pending--;
1214 
1215 	if (error != 0) {
1216 		crp->crp_etype = error;
1217 		goto out;
1218 	}
1219 
1220 	/* Do remaining outer hash over small inner hash in software */
1221 	axf->Init(&auth_ctx);
1222 	axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1223 	ccp_sha_copy_result(ihash, s->hmac.ipad, s->hmac.auth_mode);
1224 #if 0
1225 	INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1226 	    (u_char *)ihash, " ");
1227 #endif
1228 	axf->Update(&auth_ctx, ihash, axf->hashsize);
1229 	axf->Final(s->hmac.ipad, &auth_ctx);
1230 
1231 	crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject,
1232 	    s->hmac.hash_len, s->hmac.ipad);
1233 
1234 	/* Avoid leaking key material */
1235 	explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1236 	explicit_bzero(s->hmac.ipad, sizeof(s->hmac.ipad));
1237 	explicit_bzero(s->hmac.opad, sizeof(s->hmac.opad));
1238 
1239 out:
1240 	crypto_done(crp);
1241 }
1242 
1243 static void
1244 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1245     int error)
1246 {
1247 	struct cryptodesc *crd;
1248 	struct cryptop *crp;
1249 
1250 	crp = vcrp;
1251 	crd = crp->crp_desc;
1252 	ccp_do_hmac_done(qp, s, crp, crd, error);
1253 }
1254 
1255 static int __must_check
1256 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1257     struct cryptodesc *crd, const struct ccp_completion_ctx *cctx)
1258 {
1259 	device_t dev;
1260 	struct auth_hash *axf;
1261 	int error;
1262 
1263 	dev = qp->cq_softc->dev;
1264 	axf = s->hmac.auth_hash;
1265 
1266 	/*
1267 	 * Populate the SGL describing inside hash contents.  We want to hash
1268 	 * the ipad (key XOR fixed bit pattern) concatenated with the user
1269 	 * data.
1270 	 */
1271 	sglist_reset(qp->cq_sg_ulptx);
1272 	error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1273 	if (error != 0)
1274 		return (error);
1275 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1276 	    crd->crd_skip, crd->crd_len);
1277 	if (error != 0) {
1278 		DPRINTF(dev, "%s: sglist too short\n", __func__);
1279 		return (error);
1280 	}
1281 	/* Populate SGL for output -- just reuse hmac.ipad buffer. */
1282 	sglist_reset(qp->cq_sg_dst);
1283 	error = sglist_append(qp->cq_sg_dst, s->hmac.ipad,
1284 	    roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1285 	if (error != 0)
1286 		return (error);
1287 
1288 	error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1289 	    cctx);
1290 	if (error != 0) {
1291 		DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1292 		return (error);
1293 	}
1294 	return (0);
1295 }
1296 
1297 int __must_check
1298 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1299 {
1300 	struct ccp_completion_ctx ctx;
1301 	struct cryptodesc *crd;
1302 
1303 	crd = crp->crp_desc;
1304 
1305 	ctx.callback_fn = ccp_hmac_done;
1306 	ctx.callback_arg = crp;
1307 	ctx.session = s;
1308 
1309 	return (ccp_do_hmac(qp, s, crp, crd, &ctx));
1310 }
1311 
1312 static void
1313 ccp_byteswap(char *data, size_t len)
1314 {
1315 	size_t i;
1316 	char t;
1317 
1318 	len--;
1319 	for (i = 0; i < len; i++, len--) {
1320 		t = data[i];
1321 		data[i] = data[len];
1322 		data[len] = t;
1323 	}
1324 }
1325 
1326 static void
1327 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1328     int error)
1329 {
1330 	struct cryptop *crp;
1331 
1332 	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1333 
1334 	crp = vcrp;
1335 
1336 	s->pending--;
1337 
1338 	if (error != 0)
1339 		crp->crp_etype = error;
1340 
1341 	DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1342 	crypto_done(crp);
1343 }
1344 
1345 static void
1346 ccp_collect_iv(struct ccp_session *s, struct cryptop *crp,
1347     struct cryptodesc *crd)
1348 {
1349 
1350 	if (crd->crd_flags & CRD_F_ENCRYPT) {
1351 		if (crd->crd_flags & CRD_F_IV_EXPLICIT)
1352 			memcpy(s->blkcipher.iv, crd->crd_iv,
1353 			    s->blkcipher.iv_len);
1354 		else
1355 			arc4rand(s->blkcipher.iv, s->blkcipher.iv_len, 0);
1356 		if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0)
1357 			crypto_copyback(crp->crp_flags, crp->crp_buf,
1358 			    crd->crd_inject, s->blkcipher.iv_len,
1359 			    s->blkcipher.iv);
1360 	} else {
1361 		if (crd->crd_flags & CRD_F_IV_EXPLICIT)
1362 			memcpy(s->blkcipher.iv, crd->crd_iv,
1363 			    s->blkcipher.iv_len);
1364 		else
1365 			crypto_copydata(crp->crp_flags, crp->crp_buf,
1366 			    crd->crd_inject, s->blkcipher.iv_len,
1367 			    s->blkcipher.iv);
1368 	}
1369 
1370 	/*
1371 	 * If the input IV is 12 bytes, append an explicit counter of 1.
1372 	 */
1373 	if (crd->crd_alg == CRYPTO_AES_NIST_GCM_16 &&
1374 	    s->blkcipher.iv_len == 12) {
1375 		*(uint32_t *)&s->blkcipher.iv[12] = htobe32(1);
1376 		s->blkcipher.iv_len = AES_BLOCK_LEN;
1377 	}
1378 
1379 	if (crd->crd_alg == CRYPTO_AES_XTS && s->blkcipher.iv_len != AES_BLOCK_LEN) {
1380 		DPRINTF(NULL, "got ivlen != 16: %u\n", s->blkcipher.iv_len);
1381 		if (s->blkcipher.iv_len < AES_BLOCK_LEN)
1382 			memset(&s->blkcipher.iv[s->blkcipher.iv_len], 0,
1383 			    AES_BLOCK_LEN - s->blkcipher.iv_len);
1384 		s->blkcipher.iv_len = AES_BLOCK_LEN;
1385 	}
1386 
1387 	/* Reverse order of IV material for HW */
1388 	INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__,
1389 	    s->blkcipher.iv, " ", s->blkcipher.iv_len);
1390 
1391 	/*
1392 	 * For unknown reasons, XTS mode expects the IV in the reverse byte
1393 	 * order to every other AES mode.
1394 	 */
1395 	if (crd->crd_alg != CRYPTO_AES_XTS)
1396 		ccp_byteswap(s->blkcipher.iv, s->blkcipher.iv_len);
1397 }
1398 
1399 static int __must_check
1400 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1401     size_t len)
1402 {
1403 	int error;
1404 
1405 	sglist_reset(qp->cq_sg_ulptx);
1406 	error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1407 	if (error != 0)
1408 		return (error);
1409 
1410 	error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1411 	    false, NULL);
1412 	return (error);
1413 }
1414 
1415 static int __must_check
1416 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1417     struct cryptodesc *crd, enum ccp_cipher_dir dir,
1418     const struct ccp_completion_ctx *cctx)
1419 {
1420 	struct ccp_desc *desc;
1421 	device_t dev;
1422 	unsigned i;
1423 	enum ccp_xts_unitsize usize;
1424 
1425 	/* IV and Key data are already loaded */
1426 
1427 	dev = qp->cq_softc->dev;
1428 
1429 	for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1430 		if (ccp_xts_unitsize_map[i].cxu_size == crd->crd_len) {
1431 			usize = ccp_xts_unitsize_map[i].cxu_id;
1432 			break;
1433 		}
1434 	if (i >= nitems(ccp_xts_unitsize_map))
1435 		return (EINVAL);
1436 
1437 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1438 		struct sglist_seg *seg;
1439 
1440 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1441 
1442 		desc = &qp->desc_ring[qp->cq_tail];
1443 		desc->engine = CCP_ENGINE_XTS_AES;
1444 		desc->som = (i == 0);
1445 		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1446 		desc->ioc = (desc->eom && cctx != NULL);
1447 		DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1448 		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1449 		    (int)desc->ioc, (int)dir);
1450 
1451 		if (desc->ioc)
1452 			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1453 			    sizeof(*cctx));
1454 
1455 		desc->aes_xts.encrypt = dir;
1456 		desc->aes_xts.type = s->blkcipher.cipher_type;
1457 		desc->aes_xts.size = usize;
1458 
1459 		DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1460 		    qp->cq_tail, (unsigned)desc->aes_xts.type,
1461 		    (unsigned)desc->aes_xts.size);
1462 
1463 		desc->length = seg->ss_len;
1464 		desc->src_lo = (uint32_t)seg->ss_paddr;
1465 		desc->src_hi = (seg->ss_paddr >> 32);
1466 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1467 
1468 		/* Crypt in-place */
1469 		desc->dst_lo = desc->src_lo;
1470 		desc->dst_hi = desc->src_hi;
1471 		desc->dst_mem = desc->src_mem;
1472 
1473 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1474 		desc->key_hi = 0;
1475 		desc->key_mem = CCP_MEMTYPE_SB;
1476 
1477 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1478 
1479 		qp->cq_tail = (qp->cq_tail + 1) %
1480 		    (1 << qp->cq_softc->ring_size_order);
1481 	}
1482 	return (0);
1483 }
1484 
1485 static int __must_check
1486 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1487     struct cryptop *crp, struct cryptodesc *crd,
1488     const struct ccp_completion_ctx *cctx)
1489 {
1490 	struct ccp_desc *desc;
1491 	char *keydata;
1492 	device_t dev;
1493 	enum ccp_cipher_dir dir;
1494 	int error;
1495 	size_t keydata_len;
1496 	unsigned i, j;
1497 
1498 	dev = qp->cq_softc->dev;
1499 
1500 	if (s->blkcipher.key_len == 0 || crd->crd_len == 0) {
1501 		DPRINTF(dev, "%s: empty\n", __func__);
1502 		return (EINVAL);
1503 	}
1504 	if ((crd->crd_len % AES_BLOCK_LEN) != 0) {
1505 		DPRINTF(dev, "%s: len modulo: %d\n", __func__, crd->crd_len);
1506 		return (EINVAL);
1507 	}
1508 
1509 	/*
1510 	 * Individual segments must be multiples of AES block size for the HW
1511 	 * to process it.  Non-compliant inputs aren't bogus, just not doable
1512 	 * on this hardware.
1513 	 */
1514 	for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1515 		if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1516 			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1517 			    qp->cq_sg_crp->sg_segs[i].ss_len);
1518 			return (EINVAL);
1519 		}
1520 
1521 	/* Gather IV/nonce data */
1522 	ccp_collect_iv(s, crp, crd);
1523 
1524 	if ((crd->crd_flags & CRD_F_ENCRYPT) != 0)
1525 		dir = CCP_CIPHER_DIR_ENCRYPT;
1526 	else
1527 		dir = CCP_CIPHER_DIR_DECRYPT;
1528 
1529 	/* Set up passthrough op(s) to copy IV into LSB */
1530 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1531 	    s->blkcipher.iv, s->blkcipher.iv_len);
1532 	if (error != 0)
1533 		return (error);
1534 
1535 	/*
1536 	 * Initialize keydata and keydata_len for GCC.  The default case of the
1537 	 * following switch is impossible to reach, but GCC doesn't know that.
1538 	 */
1539 	keydata_len = 0;
1540 	keydata = NULL;
1541 
1542 	switch (crd->crd_alg) {
1543 	case CRYPTO_AES_XTS:
1544 		for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1545 			if (ccp_xts_unitsize_map[j].cxu_size == crd->crd_len)
1546 				break;
1547 		/* Input buffer must be a supported UnitSize */
1548 		if (j >= nitems(ccp_xts_unitsize_map)) {
1549 			device_printf(dev, "%s: rejected block size: %u\n",
1550 			    __func__, crd->crd_len);
1551 			return (EOPNOTSUPP);
1552 		}
1553 		/* FALLTHROUGH */
1554 	case CRYPTO_AES_CBC:
1555 	case CRYPTO_AES_ICM:
1556 		keydata = s->blkcipher.enckey;
1557 		keydata_len = s->blkcipher.key_len;
1558 		break;
1559 	}
1560 
1561 	INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1562 	    keydata, " ");
1563 	if (crd->crd_alg == CRYPTO_AES_XTS)
1564 		INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1565 
1566 	/* Reverse order of key material for HW */
1567 	ccp_byteswap(keydata, keydata_len);
1568 
1569 	/* Store key material into LSB to avoid page boundaries */
1570 	if (crd->crd_alg == CRYPTO_AES_XTS) {
1571 		/*
1572 		 * XTS mode uses 2 256-bit vectors for the primary key and the
1573 		 * tweak key.  For 128-bit keys, the vectors are zero-padded.
1574 		 *
1575 		 * After byteswapping the combined OCF-provided K1:K2 vector
1576 		 * above, we need to reverse the order again so the hardware
1577 		 * gets the swapped keys in the order K1':K2'.
1578 		 */
1579 		error = ccp_do_pst_to_lsb(qp,
1580 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1581 		    keydata_len / 2);
1582 		if (error != 0)
1583 			return (error);
1584 		error = ccp_do_pst_to_lsb(qp,
1585 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1586 		    keydata + (keydata_len / 2), keydata_len / 2);
1587 
1588 		/* Zero-pad 128 bit keys */
1589 		if (keydata_len == 32) {
1590 			if (error != 0)
1591 				return (error);
1592 			error = ccp_do_pst_to_lsb(qp,
1593 			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1594 			    keydata_len / 2, g_zeroes, keydata_len / 2);
1595 			if (error != 0)
1596 				return (error);
1597 			error = ccp_do_pst_to_lsb(qp,
1598 			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1599 			    keydata_len / 2, g_zeroes, keydata_len / 2);
1600 		}
1601 	} else
1602 		error = ccp_do_pst_to_lsb(qp,
1603 		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1604 		    keydata_len);
1605 	if (error != 0)
1606 		return (error);
1607 
1608 	/*
1609 	 * Point SGLs at the subset of cryptop buffer contents representing the
1610 	 * data.
1611 	 */
1612 	sglist_reset(qp->cq_sg_ulptx);
1613 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1614 	    crd->crd_skip, crd->crd_len);
1615 	if (error != 0)
1616 		return (error);
1617 
1618 	INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1619 	    (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1620 
1621 	DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1622 
1623 	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1624 		return (EAGAIN);
1625 
1626 	if (crd->crd_alg == CRYPTO_AES_XTS)
1627 		return (ccp_do_xts(qp, s, crp, crd, dir, cctx));
1628 
1629 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1630 		struct sglist_seg *seg;
1631 
1632 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1633 
1634 		desc = &qp->desc_ring[qp->cq_tail];
1635 		desc->engine = CCP_ENGINE_AES;
1636 		desc->som = (i == 0);
1637 		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1638 		desc->ioc = (desc->eom && cctx != NULL);
1639 		DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1640 		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1641 		    (int)desc->ioc, (int)dir);
1642 
1643 		if (desc->ioc)
1644 			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1645 			    sizeof(*cctx));
1646 
1647 		desc->aes.encrypt = dir;
1648 		desc->aes.mode = s->blkcipher.cipher_mode;
1649 		desc->aes.type = s->blkcipher.cipher_type;
1650 		if (crd->crd_alg == CRYPTO_AES_ICM)
1651 			/*
1652 			 * Size of CTR value in bits, - 1.  ICM mode uses all
1653 			 * 128 bits as counter.
1654 			 */
1655 			desc->aes.size = 127;
1656 
1657 		DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1658 		    qp->cq_tail, (unsigned)desc->aes.mode,
1659 		    (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1660 
1661 		desc->length = seg->ss_len;
1662 		desc->src_lo = (uint32_t)seg->ss_paddr;
1663 		desc->src_hi = (seg->ss_paddr >> 32);
1664 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1665 
1666 		/* Crypt in-place */
1667 		desc->dst_lo = desc->src_lo;
1668 		desc->dst_hi = desc->src_hi;
1669 		desc->dst_mem = desc->src_mem;
1670 
1671 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1672 		desc->key_hi = 0;
1673 		desc->key_mem = CCP_MEMTYPE_SB;
1674 
1675 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1676 
1677 		qp->cq_tail = (qp->cq_tail + 1) %
1678 		    (1 << qp->cq_softc->ring_size_order);
1679 	}
1680 	return (0);
1681 }
1682 
1683 int __must_check
1684 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1685 {
1686 	struct ccp_completion_ctx ctx;
1687 	struct cryptodesc *crd;
1688 
1689 	crd = crp->crp_desc;
1690 
1691 	ctx.callback_fn = ccp_blkcipher_done;
1692 	ctx.session = s;
1693 	ctx.callback_arg = crp;
1694 
1695 	return (ccp_do_blkcipher(qp, s, crp, crd, &ctx));
1696 }
1697 
1698 static void
1699 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1700     int error)
1701 {
1702 	struct cryptodesc *crda;
1703 	struct cryptop *crp;
1704 
1705 	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1706 
1707 	crp = vcrp;
1708 	if (s->cipher_first)
1709 		crda = crp->crp_desc->crd_next;
1710 	else
1711 		crda = crp->crp_desc;
1712 
1713 	ccp_do_hmac_done(qp, s, crp, crda, error);
1714 }
1715 
1716 int __must_check
1717 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1718     struct cryptodesc *crda, struct cryptodesc *crde)
1719 {
1720 	struct ccp_completion_ctx ctx;
1721 	int error;
1722 
1723 	ctx.callback_fn = ccp_authenc_done;
1724 	ctx.session = s;
1725 	ctx.callback_arg = crp;
1726 
1727 	/* Perform first operation */
1728 	if (s->cipher_first)
1729 		error = ccp_do_blkcipher(qp, s, crp, crde, NULL);
1730 	else
1731 		error = ccp_do_hmac(qp, s, crp, crda, NULL);
1732 	if (error != 0)
1733 		return (error);
1734 
1735 	/* Perform second operation */
1736 	if (s->cipher_first)
1737 		error = ccp_do_hmac(qp, s, crp, crda, &ctx);
1738 	else
1739 		error = ccp_do_blkcipher(qp, s, crp, crde, &ctx);
1740 	return (error);
1741 }
1742 
1743 static int __must_check
1744 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1745 {
1746 	struct ccp_desc *desc;
1747 	struct sglist_seg *seg;
1748 	unsigned i;
1749 
1750 	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1751 		return (EAGAIN);
1752 
1753 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1754 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1755 
1756 		desc = &qp->desc_ring[qp->cq_tail];
1757 
1758 		desc->engine = CCP_ENGINE_AES;
1759 		desc->aes.mode = CCP_AES_MODE_GHASH;
1760 		desc->aes.type = s->blkcipher.cipher_type;
1761 		desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1762 
1763 		desc->som = (i == 0);
1764 		desc->length = seg->ss_len;
1765 
1766 		desc->src_lo = (uint32_t)seg->ss_paddr;
1767 		desc->src_hi = (seg->ss_paddr >> 32);
1768 		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1769 
1770 		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1771 
1772 		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1773 		desc->key_mem = CCP_MEMTYPE_SB;
1774 
1775 		qp->cq_tail = (qp->cq_tail + 1) %
1776 		    (1 << qp->cq_softc->ring_size_order);
1777 	}
1778 	return (0);
1779 }
1780 
1781 static int __must_check
1782 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1783     enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1784 {
1785 	struct ccp_desc *desc;
1786 
1787 	if (ccp_queue_get_ring_space(qp) == 0)
1788 		return (EAGAIN);
1789 
1790 	desc = &qp->desc_ring[qp->cq_tail];
1791 
1792 	desc->engine = CCP_ENGINE_AES;
1793 	desc->aes.mode = CCP_AES_MODE_GCTR;
1794 	desc->aes.type = s->blkcipher.cipher_type;
1795 	desc->aes.encrypt = dir;
1796 	desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1797 
1798 	desc->som = som;
1799 	desc->eom = eom;
1800 
1801 	/* Trailing bytes will be masked off by aes.size above. */
1802 	desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1803 
1804 	desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1805 	desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1806 	desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1807 
1808 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1809 
1810 	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1811 	desc->key_mem = CCP_MEMTYPE_SB;
1812 
1813 	qp->cq_tail = (qp->cq_tail + 1) %
1814 	    (1 << qp->cq_softc->ring_size_order);
1815 	return (0);
1816 }
1817 
1818 static int __must_check
1819 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1820 {
1821 	struct ccp_desc *desc;
1822 
1823 	if (ccp_queue_get_ring_space(qp) == 0)
1824 		return (EAGAIN);
1825 
1826 	desc = &qp->desc_ring[qp->cq_tail];
1827 
1828 	desc->engine = CCP_ENGINE_AES;
1829 	desc->aes.mode = CCP_AES_MODE_GHASH;
1830 	desc->aes.type = s->blkcipher.cipher_type;
1831 	desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1832 
1833 	desc->length = GMAC_BLOCK_LEN;
1834 
1835 	desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1836 	desc->src_mem = CCP_MEMTYPE_SB;
1837 
1838 	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1839 
1840 	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1841 	desc->key_mem = CCP_MEMTYPE_SB;
1842 
1843 	desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1844 	desc->dst_mem = CCP_MEMTYPE_SB;
1845 
1846 	qp->cq_tail = (qp->cq_tail + 1) %
1847 	    (1 << qp->cq_softc->ring_size_order);
1848 	return (0);
1849 }
1850 
1851 static void
1852 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1853     int error)
1854 {
1855 	char tag[GMAC_DIGEST_LEN];
1856 	struct cryptodesc *crde, *crda;
1857 	struct cryptop *crp;
1858 
1859 	crp = vcrp;
1860 	if (s->cipher_first) {
1861 		crde = crp->crp_desc;
1862 		crda = crp->crp_desc->crd_next;
1863 	} else {
1864 		crde = crp->crp_desc->crd_next;
1865 		crda = crp->crp_desc;
1866 	}
1867 
1868 	s->pending--;
1869 
1870 	if (error != 0) {
1871 		crp->crp_etype = error;
1872 		goto out;
1873 	}
1874 
1875 	/* Encrypt is done.  Decrypt needs to verify tag. */
1876 	if ((crde->crd_flags & CRD_F_ENCRYPT) != 0)
1877 		goto out;
1878 
1879 	/* Copy in message tag. */
1880 	crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_inject,
1881 	    sizeof(tag), tag);
1882 
1883 	/* Verify tag against computed GMAC */
1884 	if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1885 		crp->crp_etype = EBADMSG;
1886 
1887 out:
1888 	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1889 	explicit_bzero(&s->gmac, sizeof(s->gmac));
1890 	crypto_done(crp);
1891 }
1892 
1893 int __must_check
1894 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1895     struct cryptodesc *crda, struct cryptodesc *crde)
1896 {
1897 	struct ccp_completion_ctx ctx;
1898 	enum ccp_cipher_dir dir;
1899 	device_t dev;
1900 	unsigned i;
1901 	int error;
1902 
1903 	if (s->blkcipher.key_len == 0)
1904 		return (EINVAL);
1905 
1906 	/*
1907 	 * AAD is only permitted before the cipher/plain text, not
1908 	 * after.
1909 	 */
1910 	if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip)
1911 		return (EINVAL);
1912 
1913 	dev = qp->cq_softc->dev;
1914 
1915 	if ((crde->crd_flags & CRD_F_ENCRYPT) != 0)
1916 		dir = CCP_CIPHER_DIR_ENCRYPT;
1917 	else
1918 		dir = CCP_CIPHER_DIR_DECRYPT;
1919 
1920 	/* Zero initial GHASH portion of context */
1921 	memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1922 
1923 	/* Gather IV data */
1924 	ccp_collect_iv(s, crp, crde);
1925 
1926 	/* Reverse order of key material for HW */
1927 	ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1928 
1929 	/* Prepare input buffer of concatenated lengths for final GHASH */
1930 	be64enc(s->gmac.final_block, (uint64_t)crda->crd_len * 8);
1931 	be64enc(&s->gmac.final_block[8], (uint64_t)crde->crd_len * 8);
1932 
1933 	/* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1934 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1935 	    s->blkcipher.iv, 32);
1936 	if (error != 0)
1937 		return (error);
1938 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1939 	    s->blkcipher.enckey, s->blkcipher.key_len);
1940 	if (error != 0)
1941 		return (error);
1942 	error = ccp_do_pst_to_lsb(qp,
1943 	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1944 	    GMAC_BLOCK_LEN);
1945 	if (error != 0)
1946 		return (error);
1947 
1948 	/* First step - compute GHASH over AAD */
1949 	if (crda->crd_len != 0) {
1950 		sglist_reset(qp->cq_sg_ulptx);
1951 		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1952 		    crda->crd_skip, crda->crd_len);
1953 		if (error != 0)
1954 			return (error);
1955 
1956 		/* This engine cannot process non-block multiple AAD data. */
1957 		for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1958 			if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1959 			    GMAC_BLOCK_LEN) != 0) {
1960 				DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1961 				    __func__,
1962 				    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1963 				return (EINVAL);
1964 			}
1965 
1966 		error = ccp_do_ghash_aad(qp, s);
1967 		if (error != 0)
1968 			return (error);
1969 	}
1970 
1971 	/* Feed data piece by piece into GCTR */
1972 	sglist_reset(qp->cq_sg_ulptx);
1973 	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1974 	    crde->crd_skip, crde->crd_len);
1975 	if (error != 0)
1976 		return (error);
1977 
1978 	/*
1979 	 * All segments except the last must be even multiples of AES block
1980 	 * size for the HW to process it.  Non-compliant inputs aren't bogus,
1981 	 * just not doable on this hardware.
1982 	 *
1983 	 * XXX: Well, the hardware will produce a valid tag for shorter final
1984 	 * segment inputs, but it will still write out a block-sized plaintext
1985 	 * or ciphertext chunk.  For a typical CRP this tramples trailing data,
1986 	 * including the provided message tag.  So, reject such inputs for now.
1987 	 */
1988 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1989 		if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1990 			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1991 			    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1992 			return (EINVAL);
1993 		}
1994 
1995 	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1996 		struct sglist_seg *seg;
1997 
1998 		seg = &qp->cq_sg_ulptx->sg_segs[i];
1999 		error = ccp_do_gctr(qp, s, dir, seg,
2000 		    (i == 0 && crda->crd_len == 0),
2001 		    i == (qp->cq_sg_ulptx->sg_nseg - 1));
2002 		if (error != 0)
2003 			return (error);
2004 	}
2005 
2006 	/* Send just initial IV (not GHASH!) to LSB again */
2007 	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
2008 	    s->blkcipher.iv, s->blkcipher.iv_len);
2009 	if (error != 0)
2010 		return (error);
2011 
2012 	ctx.callback_fn = ccp_gcm_done;
2013 	ctx.session = s;
2014 	ctx.callback_arg = crp;
2015 
2016 	/* Compute final hash and copy result back */
2017 	error = ccp_do_ghash_final(qp, s);
2018 	if (error != 0)
2019 		return (error);
2020 
2021 	/* When encrypting, copy computed tag out to caller buffer. */
2022 	sglist_reset(qp->cq_sg_ulptx);
2023 	if (dir == CCP_CIPHER_DIR_ENCRYPT)
2024 		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
2025 		    crda->crd_inject, s->gmac.hash_len);
2026 	else
2027 		/*
2028 		 * For decrypting, copy the computed tag out to our session
2029 		 * buffer to verify in our callback.
2030 		 */
2031 		error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
2032 		    s->gmac.hash_len);
2033 	if (error != 0)
2034 		return (error);
2035 	error = ccp_passthrough_sgl(qp,
2036 	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2037 	    s->gmac.hash_len, true, &ctx);
2038 	return (error);
2039 }
2040 
2041 #define MAX_TRNG_RETRIES	10
2042 u_int
2043 random_ccp_read(void *v, u_int c)
2044 {
2045 	uint32_t *buf;
2046 	u_int i, j;
2047 
2048 	KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2049 
2050 	buf = v;
2051 	for (i = c; i > 0; i -= sizeof(*buf)) {
2052 		for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2053 			*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2054 			if (*buf != 0)
2055 				break;
2056 		}
2057 		if (j == MAX_TRNG_RETRIES)
2058 			return (0);
2059 		buf++;
2060 	}
2061 	return (c);
2062 
2063 }
2064 
2065 #ifdef DDB
2066 void
2067 db_ccp_show_hw(struct ccp_softc *sc)
2068 {
2069 
2070 	db_printf("  queue mask: 0x%x\n",
2071 	    ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2072 	db_printf("  queue prio: 0x%x\n",
2073 	    ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2074 	db_printf("  reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2075 	db_printf("  trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2076 	db_printf("  cmd timeout: 0x%x\n",
2077 	    ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2078 	db_printf("  lsb public mask lo: 0x%x\n",
2079 	    ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2080 	db_printf("  lsb public mask hi: 0x%x\n",
2081 	    ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2082 	db_printf("  lsb private mask lo: 0x%x\n",
2083 	    ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2084 	db_printf("  lsb private mask hi: 0x%x\n",
2085 	    ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2086 	db_printf("  version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2087 }
2088 
2089 void
2090 db_ccp_show_queue_hw(struct ccp_queue *qp)
2091 {
2092 	const struct ccp_error_code *ec;
2093 	struct ccp_softc *sc;
2094 	uint32_t status, error, esource, faultblock, headlo, qcontrol;
2095 	unsigned q, i;
2096 
2097 	sc = qp->cq_softc;
2098 	q = qp->cq_qindex;
2099 
2100 	qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2101 	db_printf("  qcontrol: 0x%x%s%s\n", qcontrol,
2102 	    (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2103 	    (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2104 	db_printf("  tail_lo: 0x%x\n",
2105 	    ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2106 	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2107 	db_printf("  head_lo: 0x%x\n", headlo);
2108 	db_printf("  int enable: 0x%x\n",
2109 	    ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2110 	db_printf("  interrupt status: 0x%x\n",
2111 	    ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2112 	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2113 	db_printf("  status: 0x%x\n", status);
2114 	db_printf("  int stats: 0x%x\n",
2115 	    ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2116 
2117 	error = status & STATUS_ERROR_MASK;
2118 	if (error == 0)
2119 		return;
2120 
2121 	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2122 	    STATUS_ERRORSOURCE_MASK;
2123 	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2124 	    STATUS_VLSB_FAULTBLOCK_MASK;
2125 
2126 	ec = NULL;
2127 	for (i = 0; i < nitems(ccp_error_codes); i++)
2128 		if (ccp_error_codes[i].ce_code == error)
2129 			break;
2130 	if (i < nitems(ccp_error_codes))
2131 		ec = &ccp_error_codes[i];
2132 
2133 	db_printf("  Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2134 	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2135 	    faultblock);
2136 	if (ec != NULL)
2137 		db_printf("  Error description: %s\n", ec->ce_desc);
2138 
2139 	i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2140 	db_printf("  Bad descriptor idx: %u contents:\n  %32D\n", i,
2141 	    (void *)&qp->desc_ring[i], " ");
2142 }
2143 #endif
2144