1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6 * All rights reserved.
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #include "opt_ddb.h"
33
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/lock.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/module.h>
41 #include <sys/rman.h>
42 #include <sys/sglist.h>
43 #include <sys/sysctl.h>
44
45 #ifdef DDB
46 #include <ddb/ddb.h>
47 #endif
48
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51
52 #include <machine/bus.h>
53 #include <machine/resource.h>
54 #include <machine/vmparam.h>
55
56 #include <opencrypto/cryptodev.h>
57 #include <opencrypto/xform.h>
58
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61
62 #include "cryptodev_if.h"
63
64 #include "ccp.h"
65 #include "ccp_hardware.h"
66 #include "ccp_lsb.h"
67
68 CTASSERT(sizeof(struct ccp_desc) == 32);
69
70 static struct ccp_xts_unitsize_map_entry {
71 enum ccp_xts_unitsize cxu_id;
72 unsigned cxu_size;
73 } ccp_xts_unitsize_map[] = {
74 { CCP_XTS_AES_UNIT_SIZE_16, 16 },
75 { CCP_XTS_AES_UNIT_SIZE_512, 512 },
76 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
77 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
78 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
79 };
80
81 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
82 "ccp node");
83
84 unsigned g_ccp_ring_order = 11;
85 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
86 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16");
87
88 /*
89 * Zero buffer, sufficient for padding LSB entries, that does not span a page
90 * boundary
91 */
92 static const char g_zeroes[32] __aligned(32);
93
94 static inline uint32_t
ccp_read_4(struct ccp_softc * sc,uint32_t offset)95 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
96 {
97 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
98 }
99
100 static inline void
ccp_write_4(struct ccp_softc * sc,uint32_t offset,uint32_t value)101 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
102 {
103 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
104 }
105
106 static inline uint32_t
ccp_read_queue_4(struct ccp_softc * sc,unsigned queue,uint32_t offset)107 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
108 {
109 /*
110 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000.
111 */
112 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
113 }
114
115 static inline void
ccp_write_queue_4(struct ccp_softc * sc,unsigned queue,uint32_t offset,uint32_t value)116 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
117 uint32_t value)
118 {
119 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
120 }
121
122 void
ccp_queue_write_tail(struct ccp_queue * qp)123 ccp_queue_write_tail(struct ccp_queue *qp)
124 {
125 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
126 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
127 }
128
129 /*
130 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
131 * that entry for the queue's private LSB region.
132 */
133 static inline uint8_t
ccp_queue_lsb_entry(struct ccp_queue * qp,unsigned lsb_entry)134 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
135 {
136 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
137 }
138
139 /*
140 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
141 * that entry for the queue's private LSB region.
142 */
143 static inline uint32_t
ccp_queue_lsb_address(struct ccp_queue * qp,unsigned lsb_entry)144 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
145 {
146 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
147 }
148
149 /*
150 * Some terminology:
151 *
152 * LSB - Local Storage Block
153 * =========================
154 *
155 * 8 segments/regions, each containing 16 entries.
156 *
157 * Each entry contains 256 bits (32 bytes).
158 *
159 * Segments are virtually addressed in commands, but accesses cannot cross
160 * segment boundaries. Virtual map uses an identity mapping by default
161 * (virtual segment N corresponds to physical segment N).
162 *
163 * Access to a physical region can be restricted to any subset of all five
164 * queues.
165 *
166 * "Pass-through" mode
167 * ===================
168 *
169 * Pass-through is a generic DMA engine, much like ioat(4). Some nice
170 * features:
171 *
172 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
173 * - AND, OR, XOR with fixed 256-bit mask
174 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
175 * - Read/write of LSB
176 * - Memset
177 *
178 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
179 * (32 bytes).
180 *
181 * If byte-swapping is enabled, input must be a multiple of the word size.
182 *
183 * Zlib mode -- only usable from one queue at a time, single job at a time.
184 * ========================================================================
185 *
186 * Only usable from private host, aka PSP? Not host processor?
187 *
188 * RNG.
189 * ====
190 *
191 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in
192 * a ring buffer readable by software.
193 *
194 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
195 * implemented on the raw input stream and may be enabled to verify min-entropy
196 * of 0.5 bits per bit.
197 */
198
199 static void
ccp_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)200 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
201 {
202 bus_addr_t *baddr;
203
204 KASSERT(error == 0, ("%s: error:%d", __func__, error));
205 baddr = arg;
206 *baddr = segs->ds_addr;
207 }
208
209 static int
ccp_hw_attach_queue(device_t dev,uint64_t lsbmask,unsigned queue)210 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
211 {
212 struct ccp_softc *sc;
213 struct ccp_queue *qp;
214 void *desc;
215 size_t ringsz, num_descriptors;
216 int error;
217
218 desc = NULL;
219 sc = device_get_softc(dev);
220 qp = &sc->queues[queue];
221
222 /*
223 * Don't bother allocating a ring for queues the host isn't allowed to
224 * drive.
225 */
226 if ((sc->valid_queues & (1 << queue)) == 0)
227 return (0);
228
229 ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
230
231 /* Ignore queues that do not have any LSB access. */
232 if (qp->lsb_mask == 0) {
233 device_printf(dev, "Ignoring queue %u with no LSB access\n",
234 queue);
235 sc->valid_queues &= ~(1 << queue);
236 return (0);
237 }
238
239 num_descriptors = 1 << sc->ring_size_order;
240 ringsz = sizeof(struct ccp_desc) * num_descriptors;
241
242 /*
243 * "Queue_Size" is order - 1.
244 *
245 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
246 */
247 error = bus_dma_tag_create(bus_get_dma_tag(dev),
248 1 << (5 + sc->ring_size_order),
249 #if defined(__i386__) && !defined(PAE)
250 0, BUS_SPACE_MAXADDR,
251 #else
252 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
253 #endif
254 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
255 ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
256 if (error != 0)
257 goto out;
258
259 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
260 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
261 if (error != 0)
262 goto out;
263
264 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
265 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
266 if (error != 0)
267 goto out;
268
269 qp->desc_ring = desc;
270 qp->completions_ring = malloc(num_descriptors *
271 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
272
273 /* Zero control register; among other things, clears the RUN flag. */
274 qp->qcontrol = 0;
275 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
276 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
277
278 /* Clear any leftover interrupt status flags */
279 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
280 ALL_INTERRUPTS);
281
282 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
283
284 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
285 (uint32_t)qp->desc_ring_bus_addr);
286 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
287 (uint32_t)qp->desc_ring_bus_addr);
288
289 /*
290 * Enable completion interrupts, as well as error or administrative
291 * halt interrupts. We don't use administrative halts, but they
292 * shouldn't trip unless we do, so it ought to be harmless.
293 */
294 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
295 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
296
297 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
298 qp->qcontrol |= CMD_Q_RUN;
299 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
300
301 out:
302 if (error != 0) {
303 if (qp->desc_ring != NULL)
304 bus_dmamap_unload(qp->ring_desc_tag,
305 qp->ring_desc_map);
306 if (desc != NULL)
307 bus_dmamem_free(qp->ring_desc_tag, desc,
308 qp->ring_desc_map);
309 if (qp->ring_desc_tag != NULL)
310 bus_dma_tag_destroy(qp->ring_desc_tag);
311 }
312 return (error);
313 }
314
315 static void
ccp_hw_detach_queue(device_t dev,unsigned queue)316 ccp_hw_detach_queue(device_t dev, unsigned queue)
317 {
318 struct ccp_softc *sc;
319 struct ccp_queue *qp;
320
321 sc = device_get_softc(dev);
322 qp = &sc->queues[queue];
323
324 /*
325 * Don't bother allocating a ring for queues the host isn't allowed to
326 * drive.
327 */
328 if ((sc->valid_queues & (1 << queue)) == 0)
329 return;
330
331 free(qp->completions_ring, M_CCP);
332 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
333 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
334 bus_dma_tag_destroy(qp->ring_desc_tag);
335 }
336
337 static int
ccp_map_pci_bar(device_t dev)338 ccp_map_pci_bar(device_t dev)
339 {
340 struct ccp_softc *sc;
341
342 sc = device_get_softc(dev);
343
344 sc->pci_resource_id = PCIR_BAR(2);
345 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
346 &sc->pci_resource_id, RF_ACTIVE);
347 if (sc->pci_resource == NULL) {
348 device_printf(dev, "unable to allocate pci resource\n");
349 return (ENODEV);
350 }
351
352 sc->pci_resource_id_msix = PCIR_BAR(5);
353 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
354 &sc->pci_resource_id_msix, RF_ACTIVE);
355 if (sc->pci_resource_msix == NULL) {
356 device_printf(dev, "unable to allocate pci resource msix\n");
357 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
358 sc->pci_resource);
359 return (ENODEV);
360 }
361
362 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
363 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
364 return (0);
365 }
366
367 static void
ccp_unmap_pci_bar(device_t dev)368 ccp_unmap_pci_bar(device_t dev)
369 {
370 struct ccp_softc *sc;
371
372 sc = device_get_softc(dev);
373
374 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
375 sc->pci_resource_msix);
376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
377 sc->pci_resource);
378 }
379
380 const static struct ccp_error_code {
381 uint8_t ce_code;
382 const char *ce_name;
383 int ce_errno;
384 const char *ce_desc;
385 } ccp_error_codes[] = {
386 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
387 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
388 "A non-supported function type was specified" },
389 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
390 "A non-supported function mode was specified" },
391 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
392 "A CMAC type was specified when ENCRYPT was not specified" },
393 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
394 "A non-supported function size was specified.\n"
395 "AES-CFB: Size was not 127 or 7;\n"
396 "3DES-CFB: Size was not 7;\n"
397 "RSA: See supported size table (7.4.2);\n"
398 "ECC: Size was greater than 576 bits." },
399 { 0x07, "Zlib_MISSING_INIT_EOM", EIO,
400 "Zlib command does not have INIT and EOM set" },
401 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
402 "Reserved bits in a function specification were not 0" },
403 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
404 "The buffer length specified was not correct for the selected engine"
405 },
406 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
407 "Undefined VLSB segment mapping or\n"
408 "mapping to unsupported LSB segment id" },
409 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
410 "The specified source/destination buffer access was illegal:\n"
411 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
412 "Data buffer not completely contained within a single segment; or\n"
413 "Pointer with Fixed=1 is not 32-bit aligned; or\n"
414 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
415 },
416 { 0x0C, "ILLEGAL_MEM_SEL", EIO,
417 "A src_mem, dst_mem, or key_mem field was illegal:\n"
418 "A field was set to a reserved value; or\n"
419 "A public command attempted to reference AXI1 (local) or GART memory; or\n"
420 "A Zlib command attmpted to use the LSB." },
421 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
422 "The specified context location was illegal:\n"
423 "Context located in a LSB location disallowed by the LSB protection masks; or\n"
424 "Context not completely contained within a single segment." },
425 { 0x0E, "ILLEGAL_KEY_ADDR", EIO,
426 "The specified key location was illegal:\n"
427 "Key located in a LSB location disallowed by the LSB protection masks; or\n"
428 "Key not completely contained within a single segment." },
429 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
430 /* XXX Could fill out these descriptions too */
431 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
432 { 0x14, "IDMA0_AXI_DECERR", EIO, "" },
433 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
434 { 0x17, "IDMA1_AXI_DECERR", EIO, "" },
435 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
436 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
437 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
438 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
439 { 0x1E, "ZLIB_BTYPE", EIO, "" },
440 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
441 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
442 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
443 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
444 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
445 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
446 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
447 { 0x27, "ODMA0_AXI_DECERR", EIO, "" },
448 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
449 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
450 { 0x2B, "LSB_PARITY_ERR", EIO,
451 "A read from the LSB encountered a parity error" },
452 };
453
454 static void
ccp_intr_handle_error(struct ccp_queue * qp,const struct ccp_desc * desc)455 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
456 {
457 struct ccp_completion_ctx *cctx;
458 const struct ccp_error_code *ec;
459 struct ccp_softc *sc;
460 uint32_t status, error, esource, faultblock;
461 unsigned q, idx;
462 int errno;
463
464 sc = qp->cq_softc;
465 q = qp->cq_qindex;
466
467 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
468
469 error = status & STATUS_ERROR_MASK;
470
471 /* Decode error status */
472 ec = NULL;
473 for (idx = 0; idx < nitems(ccp_error_codes); idx++)
474 if (ccp_error_codes[idx].ce_code == error) {
475 ec = &ccp_error_codes[idx];
476 break;
477 }
478
479 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
480 STATUS_ERRORSOURCE_MASK;
481 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
482 STATUS_VLSB_FAULTBLOCK_MASK;
483 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
484 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
485 faultblock);
486 if (ec != NULL)
487 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
488
489 /* TODO Could format the desc nicely here */
490 idx = desc - qp->desc_ring;
491 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
492 (const void *)desc, " ");
493
494 /*
495 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
496 * Zlib Decompress status may be interesting.
497 */
498
499 while (true) {
500 /* Keep unused descriptors zero for next use. */
501 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
502
503 cctx = &qp->completions_ring[idx];
504
505 /*
506 * Restart procedure described in § 14.2.5. Could be used by HoC if we
507 * used that.
508 *
509 * Advance HEAD_LO past bad descriptor + any remaining in
510 * transaction manually, then restart queue.
511 */
512 idx = (idx + 1) % (1 << sc->ring_size_order);
513
514 /* Callback function signals end of transaction */
515 if (cctx->callback_fn != NULL) {
516 if (ec == NULL)
517 errno = EIO;
518 else
519 errno = ec->ce_errno;
520 /* TODO More specific error code */
521 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
522 cctx->callback_fn = NULL;
523 break;
524 }
525 }
526
527 qp->cq_head = idx;
528 qp->cq_waiting = false;
529 wakeup(&qp->cq_tail);
530 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
531 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
532 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
533 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
534 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
535 }
536
537 static void
ccp_intr_run_completions(struct ccp_queue * qp,uint32_t ints)538 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
539 {
540 struct ccp_completion_ctx *cctx;
541 struct ccp_softc *sc;
542 const struct ccp_desc *desc;
543 uint32_t headlo, idx;
544 unsigned q, completed;
545
546 sc = qp->cq_softc;
547 q = qp->cq_qindex;
548
549 mtx_lock(&qp->cq_lock);
550
551 /*
552 * Hardware HEAD_LO points to the first incomplete descriptor. Process
553 * any submitted and completed descriptors, up to but not including
554 * HEAD_LO.
555 */
556 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
557 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
558
559 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
560 qp->cq_head);
561 completed = 0;
562 while (qp->cq_head != idx) {
563 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
564
565 cctx = &qp->completions_ring[qp->cq_head];
566 if (cctx->callback_fn != NULL) {
567 cctx->callback_fn(qp, cctx->session,
568 cctx->callback_arg, 0);
569 cctx->callback_fn = NULL;
570 }
571
572 /* Keep unused descriptors zero for next use. */
573 memset(&qp->desc_ring[qp->cq_head], 0,
574 sizeof(qp->desc_ring[qp->cq_head]));
575
576 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
577 completed++;
578 }
579 if (completed > 0) {
580 qp->cq_waiting = false;
581 wakeup(&qp->cq_tail);
582 }
583
584 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
585
586 /*
587 * Desc points to the first incomplete descriptor, at the time we read
588 * HEAD_LO. If there was an error flagged in interrupt status, the HW
589 * will not proceed past the erroneous descriptor by itself.
590 */
591 desc = &qp->desc_ring[idx];
592 if ((ints & INT_ERROR) != 0)
593 ccp_intr_handle_error(qp, desc);
594
595 mtx_unlock(&qp->cq_lock);
596 }
597
598 static void
ccp_intr_handler(void * arg)599 ccp_intr_handler(void *arg)
600 {
601 struct ccp_softc *sc = arg;
602 size_t i;
603 uint32_t ints;
604
605 DPRINTF(sc->dev, "%s: interrupt\n", __func__);
606
607 /*
608 * We get one global interrupt per PCI device, shared over all of
609 * its queues. Scan each valid queue on interrupt for flags indicating
610 * activity.
611 */
612 for (i = 0; i < nitems(sc->queues); i++) {
613 if ((sc->valid_queues & (1 << i)) == 0)
614 continue;
615
616 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
617 if (ints == 0)
618 continue;
619
620 #if 0
621 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
622 (unsigned)ints, i);
623 #endif
624 /* Write back 1s to clear interrupt status bits. */
625 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
626
627 /*
628 * If there was an error, we still need to run completions on
629 * any descriptors prior to the error. The completions handler
630 * invoked below will also handle the error descriptor.
631 */
632 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
633 ccp_intr_run_completions(&sc->queues[i], ints);
634
635 if ((ints & INT_QUEUE_STOPPED) != 0)
636 device_printf(sc->dev, "%s: queue %zu stopped\n",
637 __func__, i);
638 }
639
640 /* Re-enable interrupts after processing */
641 for (i = 0; i < nitems(sc->queues); i++) {
642 if ((sc->valid_queues & (1 << i)) == 0)
643 continue;
644 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
645 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
646 }
647 }
648
649 static int
ccp_intr_filter(void * arg)650 ccp_intr_filter(void *arg)
651 {
652 struct ccp_softc *sc = arg;
653 size_t i;
654
655 /* TODO: Split individual queues into separate taskqueues? */
656 for (i = 0; i < nitems(sc->queues); i++) {
657 if ((sc->valid_queues & (1 << i)) == 0)
658 continue;
659
660 /* Mask interrupt until task completes */
661 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
662 }
663
664 return (FILTER_SCHEDULE_THREAD);
665 }
666
667 static int
ccp_setup_interrupts(struct ccp_softc * sc)668 ccp_setup_interrupts(struct ccp_softc *sc)
669 {
670 uint32_t nvec;
671 int rid, error, n, ridcopy;
672
673 n = pci_msix_count(sc->dev);
674 if (n < 1) {
675 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
676 return (ENXIO);
677 }
678
679 nvec = n;
680 error = pci_alloc_msix(sc->dev, &nvec);
681 if (error != 0) {
682 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
683 error);
684 return (error);
685 }
686 if (nvec < 1) {
687 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
688 __func__);
689 return (ENXIO);
690 }
691 if (nvec > nitems(sc->intr_res)) {
692 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
693 nvec);
694 nvec = nitems(sc->intr_res);
695 }
696
697 for (rid = 1; rid < 1 + nvec; rid++) {
698 ridcopy = rid;
699 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
700 SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
701 if (sc->intr_res[rid - 1] == NULL) {
702 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
703 __func__);
704 return (ENXIO);
705 }
706
707 sc->intr_tag[rid - 1] = NULL;
708 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
709 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
710 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
711 if (error != 0)
712 device_printf(sc->dev, "%s: setup_intr: %d\n",
713 __func__, error);
714 }
715 sc->intr_count = nvec;
716
717 return (error);
718 }
719
720 static void
ccp_release_interrupts(struct ccp_softc * sc)721 ccp_release_interrupts(struct ccp_softc *sc)
722 {
723 unsigned i;
724
725 for (i = 0; i < sc->intr_count; i++) {
726 if (sc->intr_tag[i] != NULL)
727 bus_teardown_intr(sc->dev, sc->intr_res[i],
728 sc->intr_tag[i]);
729 if (sc->intr_res[i] != NULL)
730 bus_release_resource(sc->dev, SYS_RES_IRQ,
731 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
732 }
733
734 pci_release_msi(sc->dev);
735 }
736
737 int
ccp_hw_attach(device_t dev)738 ccp_hw_attach(device_t dev)
739 {
740 struct ccp_softc *sc;
741 uint64_t lsbmask;
742 uint32_t version, lsbmasklo, lsbmaskhi;
743 unsigned queue_idx, j;
744 int error;
745 bool bars_mapped, interrupts_setup;
746
747 queue_idx = 0;
748 bars_mapped = interrupts_setup = false;
749 sc = device_get_softc(dev);
750
751 error = ccp_map_pci_bar(dev);
752 if (error != 0) {
753 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
754 goto out;
755 }
756 bars_mapped = true;
757
758 error = pci_enable_busmaster(dev);
759 if (error != 0) {
760 device_printf(dev, "%s: couldn't enable busmaster\n",
761 __func__);
762 goto out;
763 }
764
765 sc->ring_size_order = g_ccp_ring_order;
766 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
767 device_printf(dev, "bogus hw.ccp.ring_order\n");
768 error = EINVAL;
769 goto out;
770 }
771 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
772
773 version = ccp_read_4(sc, VERSION_REG);
774 if ((version & VERSION_NUM_MASK) < 5) {
775 device_printf(dev,
776 "driver supports version 5 and later hardware\n");
777 error = ENXIO;
778 goto out;
779 }
780
781 error = ccp_setup_interrupts(sc);
782 if (error != 0)
783 goto out;
784 interrupts_setup = true;
785
786 sc->hw_version = version & VERSION_NUM_MASK;
787 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
788 VERSION_NUMVQM_MASK;
789 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
790 VERSION_LSBSIZE_MASK;
791 sc->hw_features = version & VERSION_CAP_MASK;
792
793 /*
794 * Copy private LSB mask to public registers to enable access to LSB
795 * from all queues allowed by BIOS.
796 */
797 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
798 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
799 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
800 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
801
802 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
803
804 for (; queue_idx < nitems(sc->queues); queue_idx++) {
805 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
806 if (error != 0) {
807 device_printf(dev, "%s: couldn't attach queue %u\n",
808 __func__, queue_idx);
809 goto out;
810 }
811 }
812 ccp_assign_lsb_regions(sc, lsbmask);
813
814 out:
815 if (error != 0) {
816 if (interrupts_setup)
817 ccp_release_interrupts(sc);
818 for (j = 0; j < queue_idx; j++)
819 ccp_hw_detach_queue(dev, j);
820 if (sc->ring_size_order != 0)
821 pci_disable_busmaster(dev);
822 if (bars_mapped)
823 ccp_unmap_pci_bar(dev);
824 }
825 return (error);
826 }
827
828 void
ccp_hw_detach(device_t dev)829 ccp_hw_detach(device_t dev)
830 {
831 struct ccp_softc *sc;
832 unsigned i;
833
834 sc = device_get_softc(dev);
835
836 for (i = 0; i < nitems(sc->queues); i++)
837 ccp_hw_detach_queue(dev, i);
838
839 ccp_release_interrupts(sc);
840 pci_disable_busmaster(dev);
841 ccp_unmap_pci_bar(dev);
842 }
843
844 static int __must_check
ccp_passthrough(struct ccp_queue * qp,bus_addr_t dst,enum ccp_memtype dst_type,bus_addr_t src,enum ccp_memtype src_type,bus_size_t len,enum ccp_passthru_byteswap swapmode,enum ccp_passthru_bitwise bitmode,bool interrupt,const struct ccp_completion_ctx * cctx)845 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
846 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
847 bus_size_t len, enum ccp_passthru_byteswap swapmode,
848 enum ccp_passthru_bitwise bitmode, bool interrupt,
849 const struct ccp_completion_ctx *cctx)
850 {
851 struct ccp_desc *desc;
852
853 if (ccp_queue_get_ring_space(qp) == 0)
854 return (EAGAIN);
855
856 desc = &qp->desc_ring[qp->cq_tail];
857
858 memset(desc, 0, sizeof(*desc));
859 desc->engine = CCP_ENGINE_PASSTHRU;
860
861 desc->pt.ioc = interrupt;
862 desc->pt.byteswap = swapmode;
863 desc->pt.bitwise = bitmode;
864 desc->length = len;
865
866 desc->src_lo = (uint32_t)src;
867 desc->src_hi = src >> 32;
868 desc->src_mem = src_type;
869
870 desc->dst_lo = (uint32_t)dst;
871 desc->dst_hi = dst >> 32;
872 desc->dst_mem = dst_type;
873
874 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
875 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
876
877 if (cctx != NULL)
878 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
879
880 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
881 return (0);
882 }
883
884 static int __must_check
ccp_passthrough_sgl(struct ccp_queue * qp,bus_addr_t lsb_addr,bool tolsb,struct sglist * sgl,bus_size_t len,bool interrupt,const struct ccp_completion_ctx * cctx)885 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
886 struct sglist *sgl, bus_size_t len, bool interrupt,
887 const struct ccp_completion_ctx *cctx)
888 {
889 struct sglist_seg *seg;
890 size_t i, remain, nb;
891 int error;
892
893 remain = len;
894 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
895 seg = &sgl->sg_segs[i];
896 /* crp lengths are int, so 32-bit min() is ok. */
897 nb = min(remain, seg->ss_len);
898
899 if (tolsb)
900 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
901 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
902 CCP_PASSTHRU_BYTESWAP_NOOP,
903 CCP_PASSTHRU_BITWISE_NOOP,
904 (nb == remain) && interrupt, cctx);
905 else
906 error = ccp_passthrough(qp, seg->ss_paddr,
907 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
908 CCP_PASSTHRU_BYTESWAP_NOOP,
909 CCP_PASSTHRU_BITWISE_NOOP,
910 (nb == remain) && interrupt, cctx);
911 if (error != 0)
912 return (error);
913
914 remain -= nb;
915 }
916 return (0);
917 }
918
919 /*
920 * Note that these vectors are in reverse of the usual order.
921 */
922 const struct SHA_vectors {
923 uint32_t SHA1[8];
924 uint32_t SHA224[8];
925 uint32_t SHA256[8];
926 uint64_t SHA384[8];
927 uint64_t SHA512[8];
928 } SHA_H __aligned(PAGE_SIZE) = {
929 .SHA1 = {
930 0xc3d2e1f0ul,
931 0x10325476ul,
932 0x98badcfeul,
933 0xefcdab89ul,
934 0x67452301ul,
935 0,
936 0,
937 0,
938 },
939 .SHA224 = {
940 0xbefa4fa4ul,
941 0x64f98fa7ul,
942 0x68581511ul,
943 0xffc00b31ul,
944 0xf70e5939ul,
945 0x3070dd17ul,
946 0x367cd507ul,
947 0xc1059ed8ul,
948 },
949 .SHA256 = {
950 0x5be0cd19ul,
951 0x1f83d9abul,
952 0x9b05688cul,
953 0x510e527ful,
954 0xa54ff53aul,
955 0x3c6ef372ul,
956 0xbb67ae85ul,
957 0x6a09e667ul,
958 },
959 .SHA384 = {
960 0x47b5481dbefa4fa4ull,
961 0xdb0c2e0d64f98fa7ull,
962 0x8eb44a8768581511ull,
963 0x67332667ffc00b31ull,
964 0x152fecd8f70e5939ull,
965 0x9159015a3070dd17ull,
966 0x629a292a367cd507ull,
967 0xcbbb9d5dc1059ed8ull,
968 },
969 .SHA512 = {
970 0x5be0cd19137e2179ull,
971 0x1f83d9abfb41bd6bull,
972 0x9b05688c2b3e6c1full,
973 0x510e527fade682d1ull,
974 0xa54ff53a5f1d36f1ull,
975 0x3c6ef372fe94f82bull,
976 0xbb67ae8584caa73bull,
977 0x6a09e667f3bcc908ull,
978 },
979 };
980 /*
981 * Ensure vectors do not cross a page boundary.
982 *
983 * Disabled due to a new Clang error: "expression is not an integral constant
984 * expression." GCC (cross toolchain) seems to handle this assertion with
985 * _Static_assert just fine.
986 */
987 #if 0
988 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
989 #endif
990
991 const struct SHA_Defn {
992 enum sha_version version;
993 const void *H_vectors;
994 size_t H_size;
995 const struct auth_hash *axf;
996 enum ccp_sha_type engine_type;
997 } SHA_definitions[] = {
998 {
999 .version = SHA1,
1000 .H_vectors = SHA_H.SHA1,
1001 .H_size = sizeof(SHA_H.SHA1),
1002 .axf = &auth_hash_hmac_sha1,
1003 .engine_type = CCP_SHA_TYPE_1,
1004 },
1005 #if 0
1006 {
1007 .version = SHA2_224,
1008 .H_vectors = SHA_H.SHA224,
1009 .H_size = sizeof(SHA_H.SHA224),
1010 .axf = &auth_hash_hmac_sha2_224,
1011 .engine_type = CCP_SHA_TYPE_224,
1012 },
1013 #endif
1014 {
1015 .version = SHA2_256,
1016 .H_vectors = SHA_H.SHA256,
1017 .H_size = sizeof(SHA_H.SHA256),
1018 .axf = &auth_hash_hmac_sha2_256,
1019 .engine_type = CCP_SHA_TYPE_256,
1020 },
1021 {
1022 .version = SHA2_384,
1023 .H_vectors = SHA_H.SHA384,
1024 .H_size = sizeof(SHA_H.SHA384),
1025 .axf = &auth_hash_hmac_sha2_384,
1026 .engine_type = CCP_SHA_TYPE_384,
1027 },
1028 {
1029 .version = SHA2_512,
1030 .H_vectors = SHA_H.SHA512,
1031 .H_size = sizeof(SHA_H.SHA512),
1032 .axf = &auth_hash_hmac_sha2_512,
1033 .engine_type = CCP_SHA_TYPE_512,
1034 },
1035 };
1036
1037 static int __must_check
ccp_sha_single_desc(struct ccp_queue * qp,const struct SHA_Defn * defn,vm_paddr_t addr,size_t len,bool start,bool end,uint64_t msgbits)1038 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1039 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1040 {
1041 struct ccp_desc *desc;
1042
1043 if (ccp_queue_get_ring_space(qp) == 0)
1044 return (EAGAIN);
1045
1046 desc = &qp->desc_ring[qp->cq_tail];
1047
1048 memset(desc, 0, sizeof(*desc));
1049 desc->engine = CCP_ENGINE_SHA;
1050 desc->som = start;
1051 desc->eom = end;
1052
1053 desc->sha.type = defn->engine_type;
1054 desc->length = len;
1055
1056 if (end) {
1057 desc->sha_len_lo = (uint32_t)msgbits;
1058 desc->sha_len_hi = msgbits >> 32;
1059 }
1060
1061 desc->src_lo = (uint32_t)addr;
1062 desc->src_hi = addr >> 32;
1063 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1064
1065 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1066
1067 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1068 return (0);
1069 }
1070
1071 static int __must_check
ccp_sha(struct ccp_queue * qp,enum sha_version version,struct sglist * sgl_src,struct sglist * sgl_dst,const struct ccp_completion_ctx * cctx)1072 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1073 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1074 {
1075 const struct SHA_Defn *defn;
1076 struct sglist_seg *seg;
1077 size_t i, msgsize, remaining, nb;
1078 uint32_t lsbaddr;
1079 int error;
1080
1081 for (i = 0; i < nitems(SHA_definitions); i++)
1082 if (SHA_definitions[i].version == version)
1083 break;
1084 if (i == nitems(SHA_definitions))
1085 return (EINVAL);
1086 defn = &SHA_definitions[i];
1087
1088 /* XXX validate input ??? */
1089
1090 /* Load initial SHA state into LSB */
1091 /* XXX ensure H_vectors don't span page boundaries */
1092 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1093 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1094 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1095 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1096 NULL);
1097 if (error != 0)
1098 return (error);
1099
1100 /* Execute series of SHA updates on correctly sized buffers */
1101 msgsize = 0;
1102 for (i = 0; i < sgl_src->sg_nseg; i++) {
1103 seg = &sgl_src->sg_segs[i];
1104 msgsize += seg->ss_len;
1105 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1106 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1107 msgsize << 3);
1108 if (error != 0)
1109 return (error);
1110 }
1111
1112 /* Copy result out to sgl_dst */
1113 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1114 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1115 for (i = 0; i < sgl_dst->sg_nseg; i++) {
1116 seg = &sgl_dst->sg_segs[i];
1117 /* crp lengths are int, so 32-bit min() is ok. */
1118 nb = min(remaining, seg->ss_len);
1119
1120 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1121 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1122 CCP_PASSTHRU_BITWISE_NOOP,
1123 (cctx != NULL) ? (nb == remaining) : false,
1124 (nb == remaining) ? cctx : NULL);
1125 if (error != 0)
1126 return (error);
1127
1128 remaining -= nb;
1129 lsbaddr += nb;
1130 if (remaining == 0)
1131 break;
1132 }
1133
1134 return (0);
1135 }
1136
1137 static void
byteswap256(uint64_t * buffer)1138 byteswap256(uint64_t *buffer)
1139 {
1140 uint64_t t;
1141
1142 t = bswap64(buffer[3]);
1143 buffer[3] = bswap64(buffer[0]);
1144 buffer[0] = t;
1145
1146 t = bswap64(buffer[2]);
1147 buffer[2] = bswap64(buffer[1]);
1148 buffer[1] = t;
1149 }
1150
1151 /*
1152 * Translate CCP internal LSB hash format into a standard hash ouput.
1153 *
1154 * Manipulates input buffer with byteswap256 operation.
1155 */
1156 static void
ccp_sha_copy_result(char * output,char * buffer,enum sha_version version)1157 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1158 {
1159 const struct SHA_Defn *defn;
1160 size_t i;
1161
1162 for (i = 0; i < nitems(SHA_definitions); i++)
1163 if (SHA_definitions[i].version == version)
1164 break;
1165 if (i == nitems(SHA_definitions))
1166 panic("bogus sha version auth_mode %u\n", (unsigned)version);
1167
1168 defn = &SHA_definitions[i];
1169
1170 /* Swap 256bit manually -- DMA engine can, but with limitations */
1171 byteswap256((void *)buffer);
1172 if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1173 byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1174
1175 switch (defn->version) {
1176 case SHA1:
1177 memcpy(output, buffer + 12, defn->axf->hashsize);
1178 break;
1179 #if 0
1180 case SHA2_224:
1181 memcpy(output, buffer + XXX, defn->axf->hashsize);
1182 break;
1183 #endif
1184 case SHA2_256:
1185 memcpy(output, buffer, defn->axf->hashsize);
1186 break;
1187 case SHA2_384:
1188 memcpy(output,
1189 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1190 defn->axf->hashsize - LSB_ENTRY_SIZE);
1191 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1192 LSB_ENTRY_SIZE);
1193 break;
1194 case SHA2_512:
1195 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1196 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1197 break;
1198 }
1199 }
1200
1201 static void
ccp_do_hmac_done(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp,int error)1202 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1203 struct cryptop *crp, int error)
1204 {
1205 char ihash[SHA2_512_HASH_LEN /* max hash len */];
1206 union authctx auth_ctx;
1207 const struct auth_hash *axf;
1208
1209 axf = s->hmac.auth_hash;
1210
1211 s->pending--;
1212
1213 if (error != 0) {
1214 crp->crp_etype = error;
1215 goto out;
1216 }
1217
1218 /* Do remaining outer hash over small inner hash in software */
1219 axf->Init(&auth_ctx);
1220 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1221 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1222 #if 0
1223 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1224 (u_char *)ihash, " ");
1225 #endif
1226 axf->Update(&auth_ctx, ihash, axf->hashsize);
1227 axf->Final(s->hmac.res, &auth_ctx);
1228
1229 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1230 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1231 ihash);
1232 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1233 crp->crp_etype = EBADMSG;
1234 } else
1235 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1236 s->hmac.res);
1237
1238 /* Avoid leaking key material */
1239 explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1240 explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1241
1242 out:
1243 crypto_done(crp);
1244 }
1245
1246 static void
ccp_hmac_done(struct ccp_queue * qp,struct ccp_session * s,void * vcrp,int error)1247 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1248 int error)
1249 {
1250 struct cryptop *crp;
1251
1252 crp = vcrp;
1253 ccp_do_hmac_done(qp, s, crp, error);
1254 }
1255
1256 static int __must_check
ccp_do_hmac(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp,const struct ccp_completion_ctx * cctx)1257 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1258 const struct ccp_completion_ctx *cctx)
1259 {
1260 device_t dev;
1261 const struct auth_hash *axf;
1262 int error;
1263
1264 dev = qp->cq_softc->dev;
1265 axf = s->hmac.auth_hash;
1266
1267 /*
1268 * Populate the SGL describing inside hash contents. We want to hash
1269 * the ipad (key XOR fixed bit pattern) concatenated with the user
1270 * data.
1271 */
1272 sglist_reset(qp->cq_sg_ulptx);
1273 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1274 if (error != 0)
1275 return (error);
1276 if (crp->crp_aad_length != 0) {
1277 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1278 crp->crp_aad_start, crp->crp_aad_length);
1279 if (error != 0)
1280 return (error);
1281 }
1282 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1283 crp->crp_payload_start, crp->crp_payload_length);
1284 if (error != 0) {
1285 DPRINTF(dev, "%s: sglist too short\n", __func__);
1286 return (error);
1287 }
1288 /* Populate SGL for output -- use hmac.res buffer. */
1289 sglist_reset(qp->cq_sg_dst);
1290 error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1291 roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1292 if (error != 0)
1293 return (error);
1294
1295 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1296 cctx);
1297 if (error != 0) {
1298 DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1299 return (error);
1300 }
1301 return (0);
1302 }
1303
1304 int __must_check
ccp_hmac(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp)1305 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1306 {
1307 struct ccp_completion_ctx ctx;
1308
1309 ctx.callback_fn = ccp_hmac_done;
1310 ctx.callback_arg = crp;
1311 ctx.session = s;
1312
1313 return (ccp_do_hmac(qp, s, crp, &ctx));
1314 }
1315
1316 static void
ccp_byteswap(char * data,size_t len)1317 ccp_byteswap(char *data, size_t len)
1318 {
1319 size_t i;
1320 char t;
1321
1322 len--;
1323 for (i = 0; i < len; i++, len--) {
1324 t = data[i];
1325 data[i] = data[len];
1326 data[len] = t;
1327 }
1328 }
1329
1330 static void
ccp_blkcipher_done(struct ccp_queue * qp,struct ccp_session * s,void * vcrp,int error)1331 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1332 int error)
1333 {
1334 struct cryptop *crp;
1335
1336 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1337
1338 crp = vcrp;
1339
1340 s->pending--;
1341
1342 if (error != 0)
1343 crp->crp_etype = error;
1344
1345 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1346 crypto_done(crp);
1347 }
1348
1349 static void
ccp_collect_iv(struct cryptop * crp,const struct crypto_session_params * csp,char * iv)1350 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1351 char *iv)
1352 {
1353
1354 crypto_read_iv(crp, iv);
1355
1356 /*
1357 * Append an explicit counter of 1 for GCM.
1358 */
1359 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16)
1360 *(uint32_t *)&iv[12] = htobe32(1);
1361
1362 if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1363 csp->csp_ivlen < AES_BLOCK_LEN)
1364 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1365
1366 /* Reverse order of IV material for HW */
1367 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1368 csp->csp_ivlen);
1369
1370 /*
1371 * For unknown reasons, XTS mode expects the IV in the reverse byte
1372 * order to every other AES mode.
1373 */
1374 if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1375 ccp_byteswap(iv, AES_BLOCK_LEN);
1376 }
1377
1378 static int __must_check
ccp_do_pst_to_lsb(struct ccp_queue * qp,uint32_t lsbaddr,const void * src,size_t len)1379 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1380 size_t len)
1381 {
1382 int error;
1383
1384 sglist_reset(qp->cq_sg_ulptx);
1385 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1386 if (error != 0)
1387 return (error);
1388
1389 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1390 false, NULL);
1391 return (error);
1392 }
1393
1394 static int __must_check
ccp_do_xts(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp,enum ccp_cipher_dir dir,const struct ccp_completion_ctx * cctx)1395 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1396 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1397 {
1398 struct ccp_desc *desc;
1399 device_t dev;
1400 unsigned i;
1401 enum ccp_xts_unitsize usize;
1402
1403 /* IV and Key data are already loaded */
1404
1405 dev = qp->cq_softc->dev;
1406
1407 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1408 if (ccp_xts_unitsize_map[i].cxu_size ==
1409 crp->crp_payload_length) {
1410 usize = ccp_xts_unitsize_map[i].cxu_id;
1411 break;
1412 }
1413 if (i >= nitems(ccp_xts_unitsize_map))
1414 return (EINVAL);
1415
1416 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1417 struct sglist_seg *seg;
1418
1419 seg = &qp->cq_sg_ulptx->sg_segs[i];
1420
1421 desc = &qp->desc_ring[qp->cq_tail];
1422 desc->engine = CCP_ENGINE_XTS_AES;
1423 desc->som = (i == 0);
1424 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1425 desc->ioc = (desc->eom && cctx != NULL);
1426 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1427 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1428 (int)desc->ioc, (int)dir);
1429
1430 if (desc->ioc)
1431 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1432 sizeof(*cctx));
1433
1434 desc->aes_xts.encrypt = dir;
1435 desc->aes_xts.type = s->blkcipher.cipher_type;
1436 desc->aes_xts.size = usize;
1437
1438 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1439 qp->cq_tail, (unsigned)desc->aes_xts.type,
1440 (unsigned)desc->aes_xts.size);
1441
1442 desc->length = seg->ss_len;
1443 desc->src_lo = (uint32_t)seg->ss_paddr;
1444 desc->src_hi = (seg->ss_paddr >> 32);
1445 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1446
1447 /* Crypt in-place */
1448 desc->dst_lo = desc->src_lo;
1449 desc->dst_hi = desc->src_hi;
1450 desc->dst_mem = desc->src_mem;
1451
1452 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1453 desc->key_hi = 0;
1454 desc->key_mem = CCP_MEMTYPE_SB;
1455
1456 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1457
1458 qp->cq_tail = (qp->cq_tail + 1) %
1459 (1 << qp->cq_softc->ring_size_order);
1460 }
1461 return (0);
1462 }
1463
1464 static int __must_check
ccp_do_blkcipher(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp,const struct ccp_completion_ctx * cctx)1465 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1466 struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1467 {
1468 const struct crypto_session_params *csp;
1469 struct ccp_desc *desc;
1470 char *keydata;
1471 device_t dev;
1472 enum ccp_cipher_dir dir;
1473 int error, iv_len;
1474 size_t keydata_len;
1475 unsigned i, j;
1476
1477 dev = qp->cq_softc->dev;
1478
1479 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1480 DPRINTF(dev, "%s: empty\n", __func__);
1481 return (EINVAL);
1482 }
1483 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1484 DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1485 crp->crp_payload_length);
1486 return (EINVAL);
1487 }
1488
1489 /*
1490 * Individual segments must be multiples of AES block size for the HW
1491 * to process it. Non-compliant inputs aren't bogus, just not doable
1492 * on this hardware.
1493 */
1494 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1495 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1496 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1497 qp->cq_sg_crp->sg_segs[i].ss_len);
1498 return (EINVAL);
1499 }
1500
1501 /* Gather IV/nonce data */
1502 csp = crypto_get_params(crp->crp_session);
1503 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1504 iv_len = csp->csp_ivlen;
1505 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1506 iv_len = AES_BLOCK_LEN;
1507
1508 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1509 dir = CCP_CIPHER_DIR_ENCRYPT;
1510 else
1511 dir = CCP_CIPHER_DIR_DECRYPT;
1512
1513 /* Set up passthrough op(s) to copy IV into LSB */
1514 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1515 s->blkcipher.iv, iv_len);
1516 if (error != 0)
1517 return (error);
1518
1519 /*
1520 * Initialize keydata and keydata_len for GCC. The default case of the
1521 * following switch is impossible to reach, but GCC doesn't know that.
1522 */
1523 keydata_len = 0;
1524 keydata = NULL;
1525
1526 switch (csp->csp_cipher_alg) {
1527 case CRYPTO_AES_XTS:
1528 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1529 if (ccp_xts_unitsize_map[j].cxu_size ==
1530 crp->crp_payload_length)
1531 break;
1532 /* Input buffer must be a supported UnitSize */
1533 if (j >= nitems(ccp_xts_unitsize_map)) {
1534 device_printf(dev, "%s: rejected block size: %u\n",
1535 __func__, crp->crp_payload_length);
1536 return (EOPNOTSUPP);
1537 }
1538 /* FALLTHROUGH */
1539 case CRYPTO_AES_CBC:
1540 case CRYPTO_AES_ICM:
1541 keydata = s->blkcipher.enckey;
1542 keydata_len = s->blkcipher.key_len;
1543 break;
1544 }
1545
1546 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1547 keydata, " ");
1548 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1549 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1550
1551 /* Reverse order of key material for HW */
1552 ccp_byteswap(keydata, keydata_len);
1553
1554 /* Store key material into LSB to avoid page boundaries */
1555 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1556 /*
1557 * XTS mode uses 2 256-bit vectors for the primary key and the
1558 * tweak key. For 128-bit keys, the vectors are zero-padded.
1559 *
1560 * After byteswapping the combined OCF-provided K1:K2 vector
1561 * above, we need to reverse the order again so the hardware
1562 * gets the swapped keys in the order K1':K2'.
1563 */
1564 error = ccp_do_pst_to_lsb(qp,
1565 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1566 keydata_len / 2);
1567 if (error != 0)
1568 return (error);
1569 error = ccp_do_pst_to_lsb(qp,
1570 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1571 keydata + (keydata_len / 2), keydata_len / 2);
1572
1573 /* Zero-pad 128 bit keys */
1574 if (keydata_len == 32) {
1575 if (error != 0)
1576 return (error);
1577 error = ccp_do_pst_to_lsb(qp,
1578 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1579 keydata_len / 2, g_zeroes, keydata_len / 2);
1580 if (error != 0)
1581 return (error);
1582 error = ccp_do_pst_to_lsb(qp,
1583 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1584 keydata_len / 2, g_zeroes, keydata_len / 2);
1585 }
1586 } else
1587 error = ccp_do_pst_to_lsb(qp,
1588 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1589 keydata_len);
1590 if (error != 0)
1591 return (error);
1592
1593 /*
1594 * Point SGLs at the subset of cryptop buffer contents representing the
1595 * data.
1596 */
1597 sglist_reset(qp->cq_sg_ulptx);
1598 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1599 crp->crp_payload_start, crp->crp_payload_length);
1600 if (error != 0)
1601 return (error);
1602
1603 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1604 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1605
1606 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1607
1608 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1609 return (EAGAIN);
1610
1611 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1612 return (ccp_do_xts(qp, s, crp, dir, cctx));
1613
1614 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1615 struct sglist_seg *seg;
1616
1617 seg = &qp->cq_sg_ulptx->sg_segs[i];
1618
1619 desc = &qp->desc_ring[qp->cq_tail];
1620 desc->engine = CCP_ENGINE_AES;
1621 desc->som = (i == 0);
1622 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1623 desc->ioc = (desc->eom && cctx != NULL);
1624 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1625 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1626 (int)desc->ioc, (int)dir);
1627
1628 if (desc->ioc)
1629 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1630 sizeof(*cctx));
1631
1632 desc->aes.encrypt = dir;
1633 desc->aes.mode = s->blkcipher.cipher_mode;
1634 desc->aes.type = s->blkcipher.cipher_type;
1635 if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1636 /*
1637 * Size of CTR value in bits, - 1. ICM mode uses all
1638 * 128 bits as counter.
1639 */
1640 desc->aes.size = 127;
1641
1642 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1643 qp->cq_tail, (unsigned)desc->aes.mode,
1644 (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1645
1646 desc->length = seg->ss_len;
1647 desc->src_lo = (uint32_t)seg->ss_paddr;
1648 desc->src_hi = (seg->ss_paddr >> 32);
1649 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1650
1651 /* Crypt in-place */
1652 desc->dst_lo = desc->src_lo;
1653 desc->dst_hi = desc->src_hi;
1654 desc->dst_mem = desc->src_mem;
1655
1656 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1657 desc->key_hi = 0;
1658 desc->key_mem = CCP_MEMTYPE_SB;
1659
1660 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1661
1662 qp->cq_tail = (qp->cq_tail + 1) %
1663 (1 << qp->cq_softc->ring_size_order);
1664 }
1665 return (0);
1666 }
1667
1668 int __must_check
ccp_blkcipher(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp)1669 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1670 {
1671 struct ccp_completion_ctx ctx;
1672
1673 ctx.callback_fn = ccp_blkcipher_done;
1674 ctx.session = s;
1675 ctx.callback_arg = crp;
1676
1677 return (ccp_do_blkcipher(qp, s, crp, &ctx));
1678 }
1679
1680 static void
ccp_authenc_done(struct ccp_queue * qp,struct ccp_session * s,void * vcrp,int error)1681 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1682 int error)
1683 {
1684 struct cryptop *crp;
1685
1686 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1687
1688 crp = vcrp;
1689
1690 ccp_do_hmac_done(qp, s, crp, error);
1691 }
1692
1693 int __must_check
ccp_authenc(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp)1694 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1695 {
1696 struct ccp_completion_ctx ctx;
1697 int error;
1698
1699 ctx.callback_fn = ccp_authenc_done;
1700 ctx.session = s;
1701 ctx.callback_arg = crp;
1702
1703 /* Perform first operation */
1704 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1705 error = ccp_do_blkcipher(qp, s, crp, NULL);
1706 else
1707 error = ccp_do_hmac(qp, s, crp, NULL);
1708 if (error != 0)
1709 return (error);
1710
1711 /* Perform second operation */
1712 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1713 error = ccp_do_hmac(qp, s, crp, &ctx);
1714 else
1715 error = ccp_do_blkcipher(qp, s, crp, &ctx);
1716 return (error);
1717 }
1718
1719 static int __must_check
ccp_do_ghash_aad(struct ccp_queue * qp,struct ccp_session * s)1720 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1721 {
1722 struct ccp_desc *desc;
1723 struct sglist_seg *seg;
1724 unsigned i;
1725
1726 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1727 return (EAGAIN);
1728
1729 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1730 seg = &qp->cq_sg_ulptx->sg_segs[i];
1731
1732 desc = &qp->desc_ring[qp->cq_tail];
1733
1734 desc->engine = CCP_ENGINE_AES;
1735 desc->aes.mode = CCP_AES_MODE_GHASH;
1736 desc->aes.type = s->blkcipher.cipher_type;
1737 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1738
1739 desc->som = (i == 0);
1740 desc->length = seg->ss_len;
1741
1742 desc->src_lo = (uint32_t)seg->ss_paddr;
1743 desc->src_hi = (seg->ss_paddr >> 32);
1744 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1745
1746 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1747
1748 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1749 desc->key_mem = CCP_MEMTYPE_SB;
1750
1751 qp->cq_tail = (qp->cq_tail + 1) %
1752 (1 << qp->cq_softc->ring_size_order);
1753 }
1754 return (0);
1755 }
1756
1757 static int __must_check
ccp_do_gctr(struct ccp_queue * qp,struct ccp_session * s,enum ccp_cipher_dir dir,struct sglist_seg * seg,bool som,bool eom)1758 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1759 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1760 {
1761 struct ccp_desc *desc;
1762
1763 if (ccp_queue_get_ring_space(qp) == 0)
1764 return (EAGAIN);
1765
1766 desc = &qp->desc_ring[qp->cq_tail];
1767
1768 desc->engine = CCP_ENGINE_AES;
1769 desc->aes.mode = CCP_AES_MODE_GCTR;
1770 desc->aes.type = s->blkcipher.cipher_type;
1771 desc->aes.encrypt = dir;
1772 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1773
1774 desc->som = som;
1775 desc->eom = eom;
1776
1777 /* Trailing bytes will be masked off by aes.size above. */
1778 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1779
1780 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1781 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1782 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1783
1784 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1785
1786 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1787 desc->key_mem = CCP_MEMTYPE_SB;
1788
1789 qp->cq_tail = (qp->cq_tail + 1) %
1790 (1 << qp->cq_softc->ring_size_order);
1791 return (0);
1792 }
1793
1794 static int __must_check
ccp_do_ghash_final(struct ccp_queue * qp,struct ccp_session * s)1795 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1796 {
1797 struct ccp_desc *desc;
1798
1799 if (ccp_queue_get_ring_space(qp) == 0)
1800 return (EAGAIN);
1801
1802 desc = &qp->desc_ring[qp->cq_tail];
1803
1804 desc->engine = CCP_ENGINE_AES;
1805 desc->aes.mode = CCP_AES_MODE_GHASH;
1806 desc->aes.type = s->blkcipher.cipher_type;
1807 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1808
1809 desc->length = GMAC_BLOCK_LEN;
1810
1811 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1812 desc->src_mem = CCP_MEMTYPE_SB;
1813
1814 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1815
1816 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1817 desc->key_mem = CCP_MEMTYPE_SB;
1818
1819 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1820 desc->dst_mem = CCP_MEMTYPE_SB;
1821
1822 qp->cq_tail = (qp->cq_tail + 1) %
1823 (1 << qp->cq_softc->ring_size_order);
1824 return (0);
1825 }
1826
1827 static void
ccp_gcm_done(struct ccp_queue * qp,struct ccp_session * s,void * vcrp,int error)1828 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1829 int error)
1830 {
1831 char tag[GMAC_DIGEST_LEN];
1832 struct cryptop *crp;
1833
1834 crp = vcrp;
1835
1836 s->pending--;
1837
1838 if (error != 0) {
1839 crp->crp_etype = error;
1840 goto out;
1841 }
1842
1843 /* Encrypt is done. Decrypt needs to verify tag. */
1844 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1845 goto out;
1846
1847 /* Copy in message tag. */
1848 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1849
1850 /* Verify tag against computed GMAC */
1851 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1852 crp->crp_etype = EBADMSG;
1853
1854 out:
1855 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1856 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1857 crypto_done(crp);
1858 }
1859
1860 int __must_check
ccp_gcm(struct ccp_queue * qp,struct ccp_session * s,struct cryptop * crp)1861 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1862 {
1863 const struct crypto_session_params *csp;
1864 struct ccp_completion_ctx ctx;
1865 enum ccp_cipher_dir dir;
1866 device_t dev;
1867 unsigned i;
1868 int error;
1869
1870 if (s->blkcipher.key_len == 0)
1871 return (EINVAL);
1872
1873 dev = qp->cq_softc->dev;
1874
1875 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1876 dir = CCP_CIPHER_DIR_ENCRYPT;
1877 else
1878 dir = CCP_CIPHER_DIR_DECRYPT;
1879
1880 /* Zero initial GHASH portion of context */
1881 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1882
1883 /* Gather IV data */
1884 csp = crypto_get_params(crp->crp_session);
1885 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1886
1887 /* Reverse order of key material for HW */
1888 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1889
1890 /* Prepare input buffer of concatenated lengths for final GHASH */
1891 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1892 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1893
1894 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1895 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1896 s->blkcipher.iv, 32);
1897 if (error != 0)
1898 return (error);
1899 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1900 s->blkcipher.enckey, s->blkcipher.key_len);
1901 if (error != 0)
1902 return (error);
1903 error = ccp_do_pst_to_lsb(qp,
1904 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1905 GMAC_BLOCK_LEN);
1906 if (error != 0)
1907 return (error);
1908
1909 /* First step - compute GHASH over AAD */
1910 if (crp->crp_aad_length != 0) {
1911 sglist_reset(qp->cq_sg_ulptx);
1912 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1913 crp->crp_aad_start, crp->crp_aad_length);
1914 if (error != 0)
1915 return (error);
1916
1917 /* This engine cannot process non-block multiple AAD data. */
1918 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1919 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1920 GMAC_BLOCK_LEN) != 0) {
1921 DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1922 __func__,
1923 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1924 return (EINVAL);
1925 }
1926
1927 error = ccp_do_ghash_aad(qp, s);
1928 if (error != 0)
1929 return (error);
1930 }
1931
1932 /* Feed data piece by piece into GCTR */
1933 sglist_reset(qp->cq_sg_ulptx);
1934 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1935 crp->crp_payload_start, crp->crp_payload_length);
1936 if (error != 0)
1937 return (error);
1938
1939 /*
1940 * All segments except the last must be even multiples of AES block
1941 * size for the HW to process it. Non-compliant inputs aren't bogus,
1942 * just not doable on this hardware.
1943 *
1944 * XXX: Well, the hardware will produce a valid tag for shorter final
1945 * segment inputs, but it will still write out a block-sized plaintext
1946 * or ciphertext chunk. For a typical CRP this tramples trailing data,
1947 * including the provided message tag. So, reject such inputs for now.
1948 */
1949 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1950 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1951 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1952 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1953 return (EINVAL);
1954 }
1955
1956 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1957 struct sglist_seg *seg;
1958
1959 seg = &qp->cq_sg_ulptx->sg_segs[i];
1960 error = ccp_do_gctr(qp, s, dir, seg,
1961 (i == 0 && crp->crp_aad_length == 0),
1962 i == (qp->cq_sg_ulptx->sg_nseg - 1));
1963 if (error != 0)
1964 return (error);
1965 }
1966
1967 /* Send just initial IV (not GHASH!) to LSB again */
1968 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1969 s->blkcipher.iv, AES_BLOCK_LEN);
1970 if (error != 0)
1971 return (error);
1972
1973 ctx.callback_fn = ccp_gcm_done;
1974 ctx.session = s;
1975 ctx.callback_arg = crp;
1976
1977 /* Compute final hash and copy result back */
1978 error = ccp_do_ghash_final(qp, s);
1979 if (error != 0)
1980 return (error);
1981
1982 /* When encrypting, copy computed tag out to caller buffer. */
1983 sglist_reset(qp->cq_sg_ulptx);
1984 if (dir == CCP_CIPHER_DIR_ENCRYPT)
1985 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1986 crp->crp_digest_start, s->gmac.hash_len);
1987 else
1988 /*
1989 * For decrypting, copy the computed tag out to our session
1990 * buffer to verify in our callback.
1991 */
1992 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1993 s->gmac.hash_len);
1994 if (error != 0)
1995 return (error);
1996 error = ccp_passthrough_sgl(qp,
1997 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
1998 s->gmac.hash_len, true, &ctx);
1999 return (error);
2000 }
2001
2002 #define MAX_TRNG_RETRIES 10
2003 u_int
random_ccp_read(void * v,u_int c)2004 random_ccp_read(void *v, u_int c)
2005 {
2006 uint32_t *buf;
2007 u_int i, j;
2008
2009 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2010
2011 buf = v;
2012 for (i = c; i > 0; i -= sizeof(*buf)) {
2013 for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2014 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2015 if (*buf != 0)
2016 break;
2017 }
2018 if (j == MAX_TRNG_RETRIES)
2019 return (0);
2020 buf++;
2021 }
2022 return (c);
2023
2024 }
2025
2026 #ifdef DDB
2027 void
db_ccp_show_hw(struct ccp_softc * sc)2028 db_ccp_show_hw(struct ccp_softc *sc)
2029 {
2030
2031 db_printf(" queue mask: 0x%x\n",
2032 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2033 db_printf(" queue prio: 0x%x\n",
2034 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2035 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2036 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2037 db_printf(" cmd timeout: 0x%x\n",
2038 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2039 db_printf(" lsb public mask lo: 0x%x\n",
2040 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2041 db_printf(" lsb public mask hi: 0x%x\n",
2042 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2043 db_printf(" lsb private mask lo: 0x%x\n",
2044 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2045 db_printf(" lsb private mask hi: 0x%x\n",
2046 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2047 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2048 }
2049
2050 void
db_ccp_show_queue_hw(struct ccp_queue * qp)2051 db_ccp_show_queue_hw(struct ccp_queue *qp)
2052 {
2053 const struct ccp_error_code *ec;
2054 struct ccp_softc *sc;
2055 uint32_t status, error, esource, faultblock, headlo, qcontrol;
2056 unsigned q, i;
2057
2058 sc = qp->cq_softc;
2059 q = qp->cq_qindex;
2060
2061 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2062 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol,
2063 (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2064 (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2065 db_printf(" tail_lo: 0x%x\n",
2066 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2067 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2068 db_printf(" head_lo: 0x%x\n", headlo);
2069 db_printf(" int enable: 0x%x\n",
2070 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2071 db_printf(" interrupt status: 0x%x\n",
2072 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2073 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2074 db_printf(" status: 0x%x\n", status);
2075 db_printf(" int stats: 0x%x\n",
2076 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2077
2078 error = status & STATUS_ERROR_MASK;
2079 if (error == 0)
2080 return;
2081
2082 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2083 STATUS_ERRORSOURCE_MASK;
2084 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2085 STATUS_VLSB_FAULTBLOCK_MASK;
2086
2087 ec = NULL;
2088 for (i = 0; i < nitems(ccp_error_codes); i++)
2089 if (ccp_error_codes[i].ce_code == error)
2090 break;
2091 if (i < nitems(ccp_error_codes))
2092 ec = &ccp_error_codes[i];
2093
2094 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2095 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2096 faultblock);
2097 if (ec != NULL)
2098 db_printf(" Error description: %s\n", ec->ce_desc);
2099
2100 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2101 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i,
2102 (void *)&qp->desc_ring[i], " ");
2103 }
2104 #endif
2105