1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017 Chelsio Communications, Inc. 5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> 6 * All rights reserved. 7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 36 #include <sys/param.h> 37 #include <sys/bus.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/sglist.h> 45 #include <sys/sysctl.h> 46 47 #ifdef DDB 48 #include <ddb/ddb.h> 49 #endif 50 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 54 #include <machine/bus.h> 55 #include <machine/resource.h> 56 #include <machine/vmparam.h> 57 58 #include <opencrypto/cryptodev.h> 59 #include <opencrypto/xform.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include "cryptodev_if.h" 65 66 #include "ccp.h" 67 #include "ccp_hardware.h" 68 #include "ccp_lsb.h" 69 70 CTASSERT(sizeof(struct ccp_desc) == 32); 71 72 static struct ccp_xts_unitsize_map_entry { 73 enum ccp_xts_unitsize cxu_id; 74 unsigned cxu_size; 75 } ccp_xts_unitsize_map[] = { 76 { CCP_XTS_AES_UNIT_SIZE_16, 16 }, 77 { CCP_XTS_AES_UNIT_SIZE_512, 512 }, 78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 }, 79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 }, 80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 }, 81 }; 82 83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 84 "ccp node"); 85 86 unsigned g_ccp_ring_order = 11; 87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, 88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); 89 90 /* 91 * Zero buffer, sufficient for padding LSB entries, that does not span a page 92 * boundary 93 */ 94 static const char g_zeroes[32] __aligned(32); 95 96 static inline uint32_t 97 ccp_read_4(struct ccp_softc *sc, uint32_t offset) 98 { 99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); 100 } 101 102 static inline void 103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) 104 { 105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); 106 } 107 108 static inline uint32_t 109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) 110 { 111 /* 112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. 113 */ 114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); 115 } 116 117 static inline void 118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, 119 uint32_t value) 120 { 121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); 122 } 123 124 void 125 ccp_queue_write_tail(struct ccp_queue *qp) 126 { 127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, 128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); 129 } 130 131 /* 132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of 133 * that entry for the queue's private LSB region. 134 */ 135 static inline uint8_t 136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) 137 { 138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); 139 } 140 141 /* 142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of 143 * that entry for the queue's private LSB region. 144 */ 145 static inline uint32_t 146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) 147 { 148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); 149 } 150 151 /* 152 * Some terminology: 153 * 154 * LSB - Local Storage Block 155 * ========================= 156 * 157 * 8 segments/regions, each containing 16 entries. 158 * 159 * Each entry contains 256 bits (32 bytes). 160 * 161 * Segments are virtually addressed in commands, but accesses cannot cross 162 * segment boundaries. Virtual map uses an identity mapping by default 163 * (virtual segment N corresponds to physical segment N). 164 * 165 * Access to a physical region can be restricted to any subset of all five 166 * queues. 167 * 168 * "Pass-through" mode 169 * =================== 170 * 171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice 172 * features: 173 * 174 * - Supports byte-swapping for endian conversion (32- or 256-bit words) 175 * - AND, OR, XOR with fixed 256-bit mask 176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations) 177 * - Read/write of LSB 178 * - Memset 179 * 180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits 181 * (32 bytes). 182 * 183 * If byte-swapping is enabled, input must be a multiple of the word size. 184 * 185 * Zlib mode -- only usable from one queue at a time, single job at a time. 186 * ======================================================================== 187 * 188 * Only usable from private host, aka PSP? Not host processor? 189 * 190 * RNG. 191 * ==== 192 * 193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in 194 * a ring buffer readable by software. 195 * 196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are 197 * implemented on the raw input stream and may be enabled to verify min-entropy 198 * of 0.5 bits per bit. 199 */ 200 201 static void 202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 203 { 204 bus_addr_t *baddr; 205 206 KASSERT(error == 0, ("%s: error:%d", __func__, error)); 207 baddr = arg; 208 *baddr = segs->ds_addr; 209 } 210 211 static int 212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) 213 { 214 struct ccp_softc *sc; 215 struct ccp_queue *qp; 216 void *desc; 217 size_t ringsz, num_descriptors; 218 int error; 219 220 desc = NULL; 221 sc = device_get_softc(dev); 222 qp = &sc->queues[queue]; 223 224 /* 225 * Don't bother allocating a ring for queues the host isn't allowed to 226 * drive. 227 */ 228 if ((sc->valid_queues & (1 << queue)) == 0) 229 return (0); 230 231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue); 232 233 /* Ignore queues that do not have any LSB access. */ 234 if (qp->lsb_mask == 0) { 235 device_printf(dev, "Ignoring queue %u with no LSB access\n", 236 queue); 237 sc->valid_queues &= ~(1 << queue); 238 return (0); 239 } 240 241 num_descriptors = 1 << sc->ring_size_order; 242 ringsz = sizeof(struct ccp_desc) * num_descriptors; 243 244 /* 245 * "Queue_Size" is order - 1. 246 * 247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. 248 */ 249 error = bus_dma_tag_create(bus_get_dma_tag(dev), 250 1 << (5 + sc->ring_size_order), 251 #if defined(__i386__) && !defined(PAE) 252 0, BUS_SPACE_MAXADDR, 253 #else 254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, 255 #endif 256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, 257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag); 258 if (error != 0) 259 goto out; 260 261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, 262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); 263 if (error != 0) 264 goto out; 265 266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, 267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); 268 if (error != 0) 269 goto out; 270 271 qp->desc_ring = desc; 272 qp->completions_ring = malloc(num_descriptors * 273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); 274 275 /* Zero control register; among other things, clears the RUN flag. */ 276 qp->qcontrol = 0; 277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); 279 280 /* Clear any leftover interrupt status flags */ 281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, 282 ALL_INTERRUPTS); 283 284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; 285 286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, 287 (uint32_t)qp->desc_ring_bus_addr); 288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, 289 (uint32_t)qp->desc_ring_bus_addr); 290 291 /* 292 * Enable completion interrupts, as well as error or administrative 293 * halt interrupts. We don't use administrative halts, but they 294 * shouldn't trip unless we do, so it ought to be harmless. 295 */ 296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 298 299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; 300 qp->qcontrol |= CMD_Q_RUN; 301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 302 303 out: 304 if (error != 0) { 305 if (qp->desc_ring != NULL) 306 bus_dmamap_unload(qp->ring_desc_tag, 307 qp->ring_desc_map); 308 if (desc != NULL) 309 bus_dmamem_free(qp->ring_desc_tag, desc, 310 qp->ring_desc_map); 311 if (qp->ring_desc_tag != NULL) 312 bus_dma_tag_destroy(qp->ring_desc_tag); 313 } 314 return (error); 315 } 316 317 static void 318 ccp_hw_detach_queue(device_t dev, unsigned queue) 319 { 320 struct ccp_softc *sc; 321 struct ccp_queue *qp; 322 323 sc = device_get_softc(dev); 324 qp = &sc->queues[queue]; 325 326 /* 327 * Don't bother allocating a ring for queues the host isn't allowed to 328 * drive. 329 */ 330 if ((sc->valid_queues & (1 << queue)) == 0) 331 return; 332 333 free(qp->completions_ring, M_CCP); 334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); 335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); 336 bus_dma_tag_destroy(qp->ring_desc_tag); 337 } 338 339 static int 340 ccp_map_pci_bar(device_t dev) 341 { 342 struct ccp_softc *sc; 343 344 sc = device_get_softc(dev); 345 346 sc->pci_resource_id = PCIR_BAR(2); 347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 348 &sc->pci_resource_id, RF_ACTIVE); 349 if (sc->pci_resource == NULL) { 350 device_printf(dev, "unable to allocate pci resource\n"); 351 return (ENODEV); 352 } 353 354 sc->pci_resource_id_msix = PCIR_BAR(5); 355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 356 &sc->pci_resource_id_msix, RF_ACTIVE); 357 if (sc->pci_resource_msix == NULL) { 358 device_printf(dev, "unable to allocate pci resource msix\n"); 359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 360 sc->pci_resource); 361 return (ENODEV); 362 } 363 364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); 365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); 366 return (0); 367 } 368 369 static void 370 ccp_unmap_pci_bar(device_t dev) 371 { 372 struct ccp_softc *sc; 373 374 sc = device_get_softc(dev); 375 376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, 377 sc->pci_resource_msix); 378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 379 sc->pci_resource); 380 } 381 382 const static struct ccp_error_code { 383 uint8_t ce_code; 384 const char *ce_name; 385 int ce_errno; 386 const char *ce_desc; 387 } ccp_error_codes[] = { 388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, 389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, 390 "A non-supported function type was specified" }, 391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, 392 "A non-supported function mode was specified" }, 393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, 394 "A CMAC type was specified when ENCRYPT was not specified" }, 395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, 396 "A non-supported function size was specified.\n" 397 "AES-CFB: Size was not 127 or 7;\n" 398 "3DES-CFB: Size was not 7;\n" 399 "RSA: See supported size table (7.4.2);\n" 400 "ECC: Size was greater than 576 bits." }, 401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO, 402 "Zlib command does not have INIT and EOM set" }, 403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, 404 "Reserved bits in a function specification were not 0" }, 405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, 406 "The buffer length specified was not correct for the selected engine" 407 }, 408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" 409 "Undefined VLSB segment mapping or\n" 410 "mapping to unsupported LSB segment id" }, 411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, 412 "The specified source/destination buffer access was illegal:\n" 413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" 414 "Data buffer not completely contained within a single segment; or\n" 415 "Pointer with Fixed=1 is not 32-bit aligned; or\n" 416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." 417 }, 418 { 0x0C, "ILLEGAL_MEM_SEL", EIO, 419 "A src_mem, dst_mem, or key_mem field was illegal:\n" 420 "A field was set to a reserved value; or\n" 421 "A public command attempted to reference AXI1 (local) or GART memory; or\n" 422 "A Zlib command attmpted to use the LSB." }, 423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, 424 "The specified context location was illegal:\n" 425 "Context located in a LSB location disallowed by the LSB protection masks; or\n" 426 "Context not completely contained within a single segment." }, 427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO, 428 "The specified key location was illegal:\n" 429 "Key located in a LSB location disallowed by the LSB protection masks; or\n" 430 "Key not completely contained within a single segment." }, 431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, 432 /* XXX Could fill out these descriptions too */ 433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, 434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, 435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, 436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, 437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, 438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, 439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, 440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, 441 { 0x1E, "ZLIB_BTYPE", EIO, "" }, 442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, 443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, 444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, 445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, 446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, 447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, 448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, 449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, 450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, 451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, 452 { 0x2B, "LSB_PARITY_ERR", EIO, 453 "A read from the LSB encountered a parity error" }, 454 }; 455 456 static void 457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) 458 { 459 struct ccp_completion_ctx *cctx; 460 const struct ccp_error_code *ec; 461 struct ccp_softc *sc; 462 uint32_t status, error, esource, faultblock; 463 unsigned q, idx; 464 int errno; 465 466 sc = qp->cq_softc; 467 q = qp->cq_qindex; 468 469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 470 471 error = status & STATUS_ERROR_MASK; 472 473 /* Decode error status */ 474 ec = NULL; 475 for (idx = 0; idx < nitems(ccp_error_codes); idx++) 476 if (ccp_error_codes[idx].ce_code == error) { 477 ec = &ccp_error_codes[idx]; 478 break; 479 } 480 481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 482 STATUS_ERRORSOURCE_MASK; 483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 484 STATUS_VLSB_FAULTBLOCK_MASK; 485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", 486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 487 faultblock); 488 if (ec != NULL) 489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); 490 491 /* TODO Could format the desc nicely here */ 492 idx = desc - qp->desc_ring; 493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx, 494 (const void *)desc, " "); 495 496 /* 497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, 498 * Zlib Decompress status may be interesting. 499 */ 500 501 while (true) { 502 /* Keep unused descriptors zero for next use. */ 503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); 504 505 cctx = &qp->completions_ring[idx]; 506 507 /* 508 * Restart procedure described in § 14.2.5. Could be used by HoC if we 509 * used that. 510 * 511 * Advance HEAD_LO past bad descriptor + any remaining in 512 * transaction manually, then restart queue. 513 */ 514 idx = (idx + 1) % (1 << sc->ring_size_order); 515 516 /* Callback function signals end of transaction */ 517 if (cctx->callback_fn != NULL) { 518 if (ec == NULL) 519 errno = EIO; 520 else 521 errno = ec->ce_errno; 522 /* TODO More specific error code */ 523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); 524 cctx->callback_fn = NULL; 525 break; 526 } 527 } 528 529 qp->cq_head = idx; 530 qp->cq_waiting = false; 531 wakeup(&qp->cq_tail); 532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, 534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); 535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); 536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__); 537 } 538 539 static void 540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) 541 { 542 struct ccp_completion_ctx *cctx; 543 struct ccp_softc *sc; 544 const struct ccp_desc *desc; 545 uint32_t headlo, idx; 546 unsigned q, completed; 547 548 sc = qp->cq_softc; 549 q = qp->cq_qindex; 550 551 mtx_lock(&qp->cq_lock); 552 553 /* 554 * Hardware HEAD_LO points to the first incomplete descriptor. Process 555 * any submitted and completed descriptors, up to but not including 556 * HEAD_LO. 557 */ 558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 560 561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, 562 qp->cq_head); 563 completed = 0; 564 while (qp->cq_head != idx) { 565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head); 566 567 cctx = &qp->completions_ring[qp->cq_head]; 568 if (cctx->callback_fn != NULL) { 569 cctx->callback_fn(qp, cctx->session, 570 cctx->callback_arg, 0); 571 cctx->callback_fn = NULL; 572 } 573 574 /* Keep unused descriptors zero for next use. */ 575 memset(&qp->desc_ring[qp->cq_head], 0, 576 sizeof(qp->desc_ring[qp->cq_head])); 577 578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); 579 completed++; 580 } 581 if (completed > 0) { 582 qp->cq_waiting = false; 583 wakeup(&qp->cq_tail); 584 } 585 586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 587 588 /* 589 * Desc points to the first incomplete descriptor, at the time we read 590 * HEAD_LO. If there was an error flagged in interrupt status, the HW 591 * will not proceed past the erroneous descriptor by itself. 592 */ 593 desc = &qp->desc_ring[idx]; 594 if ((ints & INT_ERROR) != 0) 595 ccp_intr_handle_error(qp, desc); 596 597 mtx_unlock(&qp->cq_lock); 598 } 599 600 static void 601 ccp_intr_handler(void *arg) 602 { 603 struct ccp_softc *sc = arg; 604 size_t i; 605 uint32_t ints; 606 607 DPRINTF(sc->dev, "%s: interrupt\n", __func__); 608 609 /* 610 * We get one global interrupt per PCI device, shared over all of 611 * its queues. Scan each valid queue on interrupt for flags indicating 612 * activity. 613 */ 614 for (i = 0; i < nitems(sc->queues); i++) { 615 if ((sc->valid_queues & (1 << i)) == 0) 616 continue; 617 618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); 619 if (ints == 0) 620 continue; 621 622 #if 0 623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__, 624 (unsigned)ints, i); 625 #endif 626 /* Write back 1s to clear interrupt status bits. */ 627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); 628 629 /* 630 * If there was an error, we still need to run completions on 631 * any descriptors prior to the error. The completions handler 632 * invoked below will also handle the error descriptor. 633 */ 634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) 635 ccp_intr_run_completions(&sc->queues[i], ints); 636 637 if ((ints & INT_QUEUE_STOPPED) != 0) 638 device_printf(sc->dev, "%s: queue %zu stopped\n", 639 __func__, i); 640 } 641 642 /* Re-enable interrupts after processing */ 643 for (i = 0; i < nitems(sc->queues); i++) { 644 if ((sc->valid_queues & (1 << i)) == 0) 645 continue; 646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 648 } 649 } 650 651 static int 652 ccp_intr_filter(void *arg) 653 { 654 struct ccp_softc *sc = arg; 655 size_t i; 656 657 /* TODO: Split individual queues into separate taskqueues? */ 658 for (i = 0; i < nitems(sc->queues); i++) { 659 if ((sc->valid_queues & (1 << i)) == 0) 660 continue; 661 662 /* Mask interrupt until task completes */ 663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); 664 } 665 666 return (FILTER_SCHEDULE_THREAD); 667 } 668 669 static int 670 ccp_setup_interrupts(struct ccp_softc *sc) 671 { 672 uint32_t nvec; 673 int rid, error, n, ridcopy; 674 675 n = pci_msix_count(sc->dev); 676 if (n < 1) { 677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n); 678 return (ENXIO); 679 } 680 681 nvec = n; 682 error = pci_alloc_msix(sc->dev, &nvec); 683 if (error != 0) { 684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__, 685 error); 686 return (error); 687 } 688 if (nvec < 1) { 689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n", 690 __func__); 691 return (ENXIO); 692 } 693 if (nvec > nitems(sc->intr_res)) { 694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__, 695 nvec); 696 nvec = nitems(sc->intr_res); 697 } 698 699 for (rid = 1; rid < 1 + nvec; rid++) { 700 ridcopy = rid; 701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, 702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE); 703 if (sc->intr_res[rid - 1] == NULL) { 704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", 705 __func__); 706 return (ENXIO); 707 } 708 709 sc->intr_tag[rid - 1] = NULL; 710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], 711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, 712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); 713 if (error != 0) 714 device_printf(sc->dev, "%s: setup_intr: %d\n", 715 __func__, error); 716 } 717 sc->intr_count = nvec; 718 719 return (error); 720 } 721 722 static void 723 ccp_release_interrupts(struct ccp_softc *sc) 724 { 725 unsigned i; 726 727 for (i = 0; i < sc->intr_count; i++) { 728 if (sc->intr_tag[i] != NULL) 729 bus_teardown_intr(sc->dev, sc->intr_res[i], 730 sc->intr_tag[i]); 731 if (sc->intr_res[i] != NULL) 732 bus_release_resource(sc->dev, SYS_RES_IRQ, 733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); 734 } 735 736 pci_release_msi(sc->dev); 737 } 738 739 int 740 ccp_hw_attach(device_t dev) 741 { 742 struct ccp_softc *sc; 743 uint64_t lsbmask; 744 uint32_t version, lsbmasklo, lsbmaskhi; 745 unsigned queue_idx, j; 746 int error; 747 bool bars_mapped, interrupts_setup; 748 749 queue_idx = 0; 750 bars_mapped = interrupts_setup = false; 751 sc = device_get_softc(dev); 752 753 error = ccp_map_pci_bar(dev); 754 if (error != 0) { 755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__); 756 goto out; 757 } 758 bars_mapped = true; 759 760 error = pci_enable_busmaster(dev); 761 if (error != 0) { 762 device_printf(dev, "%s: couldn't enable busmaster\n", 763 __func__); 764 goto out; 765 } 766 767 sc->ring_size_order = g_ccp_ring_order; 768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { 769 device_printf(dev, "bogus hw.ccp.ring_order\n"); 770 error = EINVAL; 771 goto out; 772 } 773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); 774 775 version = ccp_read_4(sc, VERSION_REG); 776 if ((version & VERSION_NUM_MASK) < 5) { 777 device_printf(dev, 778 "driver supports version 5 and later hardware\n"); 779 error = ENXIO; 780 goto out; 781 } 782 783 error = ccp_setup_interrupts(sc); 784 if (error != 0) 785 goto out; 786 interrupts_setup = true; 787 788 sc->hw_version = version & VERSION_NUM_MASK; 789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & 790 VERSION_NUMVQM_MASK; 791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & 792 VERSION_LSBSIZE_MASK; 793 sc->hw_features = version & VERSION_CAP_MASK; 794 795 /* 796 * Copy private LSB mask to public registers to enable access to LSB 797 * from all queues allowed by BIOS. 798 */ 799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); 800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); 801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); 802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); 803 804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; 805 806 for (; queue_idx < nitems(sc->queues); queue_idx++) { 807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); 808 if (error != 0) { 809 device_printf(dev, "%s: couldn't attach queue %u\n", 810 __func__, queue_idx); 811 goto out; 812 } 813 } 814 ccp_assign_lsb_regions(sc, lsbmask); 815 816 out: 817 if (error != 0) { 818 if (interrupts_setup) 819 ccp_release_interrupts(sc); 820 for (j = 0; j < queue_idx; j++) 821 ccp_hw_detach_queue(dev, j); 822 if (sc->ring_size_order != 0) 823 pci_disable_busmaster(dev); 824 if (bars_mapped) 825 ccp_unmap_pci_bar(dev); 826 } 827 return (error); 828 } 829 830 void 831 ccp_hw_detach(device_t dev) 832 { 833 struct ccp_softc *sc; 834 unsigned i; 835 836 sc = device_get_softc(dev); 837 838 for (i = 0; i < nitems(sc->queues); i++) 839 ccp_hw_detach_queue(dev, i); 840 841 ccp_release_interrupts(sc); 842 pci_disable_busmaster(dev); 843 ccp_unmap_pci_bar(dev); 844 } 845 846 static int __must_check 847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, 848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, 849 bus_size_t len, enum ccp_passthru_byteswap swapmode, 850 enum ccp_passthru_bitwise bitmode, bool interrupt, 851 const struct ccp_completion_ctx *cctx) 852 { 853 struct ccp_desc *desc; 854 855 if (ccp_queue_get_ring_space(qp) == 0) 856 return (EAGAIN); 857 858 desc = &qp->desc_ring[qp->cq_tail]; 859 860 memset(desc, 0, sizeof(*desc)); 861 desc->engine = CCP_ENGINE_PASSTHRU; 862 863 desc->pt.ioc = interrupt; 864 desc->pt.byteswap = swapmode; 865 desc->pt.bitwise = bitmode; 866 desc->length = len; 867 868 desc->src_lo = (uint32_t)src; 869 desc->src_hi = src >> 32; 870 desc->src_mem = src_type; 871 872 desc->dst_lo = (uint32_t)dst; 873 desc->dst_hi = dst >> 32; 874 desc->dst_mem = dst_type; 875 876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) 877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); 878 879 if (cctx != NULL) 880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); 881 882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 883 return (0); 884 } 885 886 static int __must_check 887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, 888 struct sglist *sgl, bus_size_t len, bool interrupt, 889 const struct ccp_completion_ctx *cctx) 890 { 891 struct sglist_seg *seg; 892 size_t i, remain, nb; 893 int error; 894 895 remain = len; 896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { 897 seg = &sgl->sg_segs[i]; 898 /* crp lengths are int, so 32-bit min() is ok. */ 899 nb = min(remain, seg->ss_len); 900 901 if (tolsb) 902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, 903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, 904 CCP_PASSTHRU_BYTESWAP_NOOP, 905 CCP_PASSTHRU_BITWISE_NOOP, 906 (nb == remain) && interrupt, cctx); 907 else 908 error = ccp_passthrough(qp, seg->ss_paddr, 909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, 910 CCP_PASSTHRU_BYTESWAP_NOOP, 911 CCP_PASSTHRU_BITWISE_NOOP, 912 (nb == remain) && interrupt, cctx); 913 if (error != 0) 914 return (error); 915 916 remain -= nb; 917 } 918 return (0); 919 } 920 921 /* 922 * Note that these vectors are in reverse of the usual order. 923 */ 924 const struct SHA_vectors { 925 uint32_t SHA1[8]; 926 uint32_t SHA224[8]; 927 uint32_t SHA256[8]; 928 uint64_t SHA384[8]; 929 uint64_t SHA512[8]; 930 } SHA_H __aligned(PAGE_SIZE) = { 931 .SHA1 = { 932 0xc3d2e1f0ul, 933 0x10325476ul, 934 0x98badcfeul, 935 0xefcdab89ul, 936 0x67452301ul, 937 0, 938 0, 939 0, 940 }, 941 .SHA224 = { 942 0xbefa4fa4ul, 943 0x64f98fa7ul, 944 0x68581511ul, 945 0xffc00b31ul, 946 0xf70e5939ul, 947 0x3070dd17ul, 948 0x367cd507ul, 949 0xc1059ed8ul, 950 }, 951 .SHA256 = { 952 0x5be0cd19ul, 953 0x1f83d9abul, 954 0x9b05688cul, 955 0x510e527ful, 956 0xa54ff53aul, 957 0x3c6ef372ul, 958 0xbb67ae85ul, 959 0x6a09e667ul, 960 }, 961 .SHA384 = { 962 0x47b5481dbefa4fa4ull, 963 0xdb0c2e0d64f98fa7ull, 964 0x8eb44a8768581511ull, 965 0x67332667ffc00b31ull, 966 0x152fecd8f70e5939ull, 967 0x9159015a3070dd17ull, 968 0x629a292a367cd507ull, 969 0xcbbb9d5dc1059ed8ull, 970 }, 971 .SHA512 = { 972 0x5be0cd19137e2179ull, 973 0x1f83d9abfb41bd6bull, 974 0x9b05688c2b3e6c1full, 975 0x510e527fade682d1ull, 976 0xa54ff53a5f1d36f1ull, 977 0x3c6ef372fe94f82bull, 978 0xbb67ae8584caa73bull, 979 0x6a09e667f3bcc908ull, 980 }, 981 }; 982 /* 983 * Ensure vectors do not cross a page boundary. 984 * 985 * Disabled due to a new Clang error: "expression is not an integral constant 986 * expression." GCC (cross toolchain) seems to handle this assertion with 987 * _Static_assert just fine. 988 */ 989 #if 0 990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); 991 #endif 992 993 const struct SHA_Defn { 994 enum sha_version version; 995 const void *H_vectors; 996 size_t H_size; 997 struct auth_hash *axf; 998 enum ccp_sha_type engine_type; 999 } SHA_definitions[] = { 1000 { 1001 .version = SHA1, 1002 .H_vectors = SHA_H.SHA1, 1003 .H_size = sizeof(SHA_H.SHA1), 1004 .axf = &auth_hash_hmac_sha1, 1005 .engine_type = CCP_SHA_TYPE_1, 1006 }, 1007 #if 0 1008 { 1009 .version = SHA2_224, 1010 .H_vectors = SHA_H.SHA224, 1011 .H_size = sizeof(SHA_H.SHA224), 1012 .axf = &auth_hash_hmac_sha2_224, 1013 .engine_type = CCP_SHA_TYPE_224, 1014 }, 1015 #endif 1016 { 1017 .version = SHA2_256, 1018 .H_vectors = SHA_H.SHA256, 1019 .H_size = sizeof(SHA_H.SHA256), 1020 .axf = &auth_hash_hmac_sha2_256, 1021 .engine_type = CCP_SHA_TYPE_256, 1022 }, 1023 { 1024 .version = SHA2_384, 1025 .H_vectors = SHA_H.SHA384, 1026 .H_size = sizeof(SHA_H.SHA384), 1027 .axf = &auth_hash_hmac_sha2_384, 1028 .engine_type = CCP_SHA_TYPE_384, 1029 }, 1030 { 1031 .version = SHA2_512, 1032 .H_vectors = SHA_H.SHA512, 1033 .H_size = sizeof(SHA_H.SHA512), 1034 .axf = &auth_hash_hmac_sha2_512, 1035 .engine_type = CCP_SHA_TYPE_512, 1036 }, 1037 }; 1038 1039 static int __must_check 1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, 1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) 1042 { 1043 struct ccp_desc *desc; 1044 1045 if (ccp_queue_get_ring_space(qp) == 0) 1046 return (EAGAIN); 1047 1048 desc = &qp->desc_ring[qp->cq_tail]; 1049 1050 memset(desc, 0, sizeof(*desc)); 1051 desc->engine = CCP_ENGINE_SHA; 1052 desc->som = start; 1053 desc->eom = end; 1054 1055 desc->sha.type = defn->engine_type; 1056 desc->length = len; 1057 1058 if (end) { 1059 desc->sha_len_lo = (uint32_t)msgbits; 1060 desc->sha_len_hi = msgbits >> 32; 1061 } 1062 1063 desc->src_lo = (uint32_t)addr; 1064 desc->src_hi = addr >> 32; 1065 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1066 1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); 1068 1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 1070 return (0); 1071 } 1072 1073 static int __must_check 1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, 1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) 1076 { 1077 const struct SHA_Defn *defn; 1078 struct sglist_seg *seg; 1079 size_t i, msgsize, remaining, nb; 1080 uint32_t lsbaddr; 1081 int error; 1082 1083 for (i = 0; i < nitems(SHA_definitions); i++) 1084 if (SHA_definitions[i].version == version) 1085 break; 1086 if (i == nitems(SHA_definitions)) 1087 return (EINVAL); 1088 defn = &SHA_definitions[i]; 1089 1090 /* XXX validate input ??? */ 1091 1092 /* Load initial SHA state into LSB */ 1093 /* XXX ensure H_vectors don't span page boundaries */ 1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), 1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), 1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), 1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, 1098 NULL); 1099 if (error != 0) 1100 return (error); 1101 1102 /* Execute series of SHA updates on correctly sized buffers */ 1103 msgsize = 0; 1104 for (i = 0; i < sgl_src->sg_nseg; i++) { 1105 seg = &sgl_src->sg_segs[i]; 1106 msgsize += seg->ss_len; 1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, 1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, 1109 msgsize << 3); 1110 if (error != 0) 1111 return (error); 1112 } 1113 1114 /* Copy result out to sgl_dst */ 1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); 1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); 1117 for (i = 0; i < sgl_dst->sg_nseg; i++) { 1118 seg = &sgl_dst->sg_segs[i]; 1119 /* crp lengths are int, so 32-bit min() is ok. */ 1120 nb = min(remaining, seg->ss_len); 1121 1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, 1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, 1124 CCP_PASSTHRU_BITWISE_NOOP, 1125 (cctx != NULL) ? (nb == remaining) : false, 1126 (nb == remaining) ? cctx : NULL); 1127 if (error != 0) 1128 return (error); 1129 1130 remaining -= nb; 1131 lsbaddr += nb; 1132 if (remaining == 0) 1133 break; 1134 } 1135 1136 return (0); 1137 } 1138 1139 static void 1140 byteswap256(uint64_t *buffer) 1141 { 1142 uint64_t t; 1143 1144 t = bswap64(buffer[3]); 1145 buffer[3] = bswap64(buffer[0]); 1146 buffer[0] = t; 1147 1148 t = bswap64(buffer[2]); 1149 buffer[2] = bswap64(buffer[1]); 1150 buffer[1] = t; 1151 } 1152 1153 /* 1154 * Translate CCP internal LSB hash format into a standard hash ouput. 1155 * 1156 * Manipulates input buffer with byteswap256 operation. 1157 */ 1158 static void 1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) 1160 { 1161 const struct SHA_Defn *defn; 1162 size_t i; 1163 1164 for (i = 0; i < nitems(SHA_definitions); i++) 1165 if (SHA_definitions[i].version == version) 1166 break; 1167 if (i == nitems(SHA_definitions)) 1168 panic("bogus sha version auth_mode %u\n", (unsigned)version); 1169 1170 defn = &SHA_definitions[i]; 1171 1172 /* Swap 256bit manually -- DMA engine can, but with limitations */ 1173 byteswap256((void *)buffer); 1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE) 1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); 1176 1177 switch (defn->version) { 1178 case SHA1: 1179 memcpy(output, buffer + 12, defn->axf->hashsize); 1180 break; 1181 #if 0 1182 case SHA2_224: 1183 memcpy(output, buffer + XXX, defn->axf->hashsize); 1184 break; 1185 #endif 1186 case SHA2_256: 1187 memcpy(output, buffer, defn->axf->hashsize); 1188 break; 1189 case SHA2_384: 1190 memcpy(output, 1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, 1192 defn->axf->hashsize - LSB_ENTRY_SIZE); 1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, 1194 LSB_ENTRY_SIZE); 1195 break; 1196 case SHA2_512: 1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); 1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); 1199 break; 1200 } 1201 } 1202 1203 static void 1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, 1205 struct cryptop *crp, int error) 1206 { 1207 char ihash[SHA2_512_HASH_LEN /* max hash len */]; 1208 union authctx auth_ctx; 1209 struct auth_hash *axf; 1210 1211 axf = s->hmac.auth_hash; 1212 1213 s->pending--; 1214 1215 if (error != 0) { 1216 crp->crp_etype = error; 1217 goto out; 1218 } 1219 1220 /* Do remaining outer hash over small inner hash in software */ 1221 axf->Init(&auth_ctx); 1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); 1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode); 1224 #if 0 1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__, 1226 (u_char *)ihash, " "); 1227 #endif 1228 axf->Update(&auth_ctx, ihash, axf->hashsize); 1229 axf->Final(s->hmac.res, &auth_ctx); 1230 1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { 1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len, 1233 ihash); 1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0) 1235 crp->crp_etype = EBADMSG; 1236 } else 1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len, 1238 s->hmac.res); 1239 1240 /* Avoid leaking key material */ 1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx)); 1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res)); 1243 1244 out: 1245 crypto_done(crp); 1246 } 1247 1248 static void 1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1250 int error) 1251 { 1252 struct cryptop *crp; 1253 1254 crp = vcrp; 1255 ccp_do_hmac_done(qp, s, crp, error); 1256 } 1257 1258 static int __must_check 1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1260 const struct ccp_completion_ctx *cctx) 1261 { 1262 device_t dev; 1263 struct auth_hash *axf; 1264 int error; 1265 1266 dev = qp->cq_softc->dev; 1267 axf = s->hmac.auth_hash; 1268 1269 /* 1270 * Populate the SGL describing inside hash contents. We want to hash 1271 * the ipad (key XOR fixed bit pattern) concatenated with the user 1272 * data. 1273 */ 1274 sglist_reset(qp->cq_sg_ulptx); 1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); 1276 if (error != 0) 1277 return (error); 1278 if (crp->crp_aad_length != 0) { 1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1280 crp->crp_aad_start, crp->crp_aad_length); 1281 if (error != 0) 1282 return (error); 1283 } 1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1285 crp->crp_payload_start, crp->crp_payload_length); 1286 if (error != 0) { 1287 DPRINTF(dev, "%s: sglist too short\n", __func__); 1288 return (error); 1289 } 1290 /* Populate SGL for output -- use hmac.res buffer. */ 1291 sglist_reset(qp->cq_sg_dst); 1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res, 1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE)); 1294 if (error != 0) 1295 return (error); 1296 1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, 1298 cctx); 1299 if (error != 0) { 1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__); 1301 return (error); 1302 } 1303 return (0); 1304 } 1305 1306 int __must_check 1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1308 { 1309 struct ccp_completion_ctx ctx; 1310 1311 ctx.callback_fn = ccp_hmac_done; 1312 ctx.callback_arg = crp; 1313 ctx.session = s; 1314 1315 return (ccp_do_hmac(qp, s, crp, &ctx)); 1316 } 1317 1318 static void 1319 ccp_byteswap(char *data, size_t len) 1320 { 1321 size_t i; 1322 char t; 1323 1324 len--; 1325 for (i = 0; i < len; i++, len--) { 1326 t = data[i]; 1327 data[i] = data[len]; 1328 data[len] = t; 1329 } 1330 } 1331 1332 static void 1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1334 int error) 1335 { 1336 struct cryptop *crp; 1337 1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1339 1340 crp = vcrp; 1341 1342 s->pending--; 1343 1344 if (error != 0) 1345 crp->crp_etype = error; 1346 1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp); 1348 crypto_done(crp); 1349 } 1350 1351 static void 1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp, 1353 char *iv) 1354 { 1355 1356 crypto_read_iv(crp, iv); 1357 1358 /* 1359 * If the input IV is 12 bytes, append an explicit counter of 1. 1360 */ 1361 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 && 1362 csp->csp_ivlen == 12) 1363 *(uint32_t *)&iv[12] = htobe32(1); 1364 1365 if (csp->csp_cipher_alg == CRYPTO_AES_XTS && 1366 csp->csp_ivlen < AES_BLOCK_LEN) 1367 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen); 1368 1369 /* Reverse order of IV material for HW */ 1370 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ", 1371 csp->csp_ivlen); 1372 1373 /* 1374 * For unknown reasons, XTS mode expects the IV in the reverse byte 1375 * order to every other AES mode. 1376 */ 1377 if (csp->csp_cipher_alg != CRYPTO_AES_XTS) 1378 ccp_byteswap(iv, AES_BLOCK_LEN); 1379 } 1380 1381 static int __must_check 1382 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, 1383 size_t len) 1384 { 1385 int error; 1386 1387 sglist_reset(qp->cq_sg_ulptx); 1388 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); 1389 if (error != 0) 1390 return (error); 1391 1392 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, 1393 false, NULL); 1394 return (error); 1395 } 1396 1397 static int __must_check 1398 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1399 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx) 1400 { 1401 struct ccp_desc *desc; 1402 device_t dev; 1403 unsigned i; 1404 enum ccp_xts_unitsize usize; 1405 1406 /* IV and Key data are already loaded */ 1407 1408 dev = qp->cq_softc->dev; 1409 1410 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++) 1411 if (ccp_xts_unitsize_map[i].cxu_size == 1412 crp->crp_payload_length) { 1413 usize = ccp_xts_unitsize_map[i].cxu_id; 1414 break; 1415 } 1416 if (i >= nitems(ccp_xts_unitsize_map)) 1417 return (EINVAL); 1418 1419 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1420 struct sglist_seg *seg; 1421 1422 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1423 1424 desc = &qp->desc_ring[qp->cq_tail]; 1425 desc->engine = CCP_ENGINE_XTS_AES; 1426 desc->som = (i == 0); 1427 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1428 desc->ioc = (desc->eom && cctx != NULL); 1429 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n", 1430 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1431 (int)desc->ioc, (int)dir); 1432 1433 if (desc->ioc) 1434 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1435 sizeof(*cctx)); 1436 1437 desc->aes_xts.encrypt = dir; 1438 desc->aes_xts.type = s->blkcipher.cipher_type; 1439 desc->aes_xts.size = usize; 1440 1441 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__, 1442 qp->cq_tail, (unsigned)desc->aes_xts.type, 1443 (unsigned)desc->aes_xts.size); 1444 1445 desc->length = seg->ss_len; 1446 desc->src_lo = (uint32_t)seg->ss_paddr; 1447 desc->src_hi = (seg->ss_paddr >> 32); 1448 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1449 1450 /* Crypt in-place */ 1451 desc->dst_lo = desc->src_lo; 1452 desc->dst_hi = desc->src_hi; 1453 desc->dst_mem = desc->src_mem; 1454 1455 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1456 desc->key_hi = 0; 1457 desc->key_mem = CCP_MEMTYPE_SB; 1458 1459 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1460 1461 qp->cq_tail = (qp->cq_tail + 1) % 1462 (1 << qp->cq_softc->ring_size_order); 1463 } 1464 return (0); 1465 } 1466 1467 static int __must_check 1468 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, 1469 struct cryptop *crp, const struct ccp_completion_ctx *cctx) 1470 { 1471 const struct crypto_session_params *csp; 1472 struct ccp_desc *desc; 1473 char *keydata; 1474 device_t dev; 1475 enum ccp_cipher_dir dir; 1476 int error, iv_len; 1477 size_t keydata_len; 1478 unsigned i, j; 1479 1480 dev = qp->cq_softc->dev; 1481 1482 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) { 1483 DPRINTF(dev, "%s: empty\n", __func__); 1484 return (EINVAL); 1485 } 1486 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { 1487 DPRINTF(dev, "%s: len modulo: %d\n", __func__, 1488 crp->crp_payload_length); 1489 return (EINVAL); 1490 } 1491 1492 /* 1493 * Individual segments must be multiples of AES block size for the HW 1494 * to process it. Non-compliant inputs aren't bogus, just not doable 1495 * on this hardware. 1496 */ 1497 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) 1498 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1499 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1500 qp->cq_sg_crp->sg_segs[i].ss_len); 1501 return (EINVAL); 1502 } 1503 1504 /* Gather IV/nonce data */ 1505 csp = crypto_get_params(crp->crp_session); 1506 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1507 iv_len = csp->csp_ivlen; 1508 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1509 iv_len = AES_BLOCK_LEN; 1510 1511 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1512 dir = CCP_CIPHER_DIR_ENCRYPT; 1513 else 1514 dir = CCP_CIPHER_DIR_DECRYPT; 1515 1516 /* Set up passthrough op(s) to copy IV into LSB */ 1517 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1518 s->blkcipher.iv, iv_len); 1519 if (error != 0) 1520 return (error); 1521 1522 /* 1523 * Initialize keydata and keydata_len for GCC. The default case of the 1524 * following switch is impossible to reach, but GCC doesn't know that. 1525 */ 1526 keydata_len = 0; 1527 keydata = NULL; 1528 1529 switch (csp->csp_cipher_alg) { 1530 case CRYPTO_AES_XTS: 1531 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++) 1532 if (ccp_xts_unitsize_map[j].cxu_size == 1533 crp->crp_payload_length) 1534 break; 1535 /* Input buffer must be a supported UnitSize */ 1536 if (j >= nitems(ccp_xts_unitsize_map)) { 1537 device_printf(dev, "%s: rejected block size: %u\n", 1538 __func__, crp->crp_payload_length); 1539 return (EOPNOTSUPP); 1540 } 1541 /* FALLTHROUGH */ 1542 case CRYPTO_AES_CBC: 1543 case CRYPTO_AES_ICM: 1544 keydata = s->blkcipher.enckey; 1545 keydata_len = s->blkcipher.key_len; 1546 break; 1547 } 1548 1549 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len, 1550 keydata, " "); 1551 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1552 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " "); 1553 1554 /* Reverse order of key material for HW */ 1555 ccp_byteswap(keydata, keydata_len); 1556 1557 /* Store key material into LSB to avoid page boundaries */ 1558 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) { 1559 /* 1560 * XTS mode uses 2 256-bit vectors for the primary key and the 1561 * tweak key. For 128-bit keys, the vectors are zero-padded. 1562 * 1563 * After byteswapping the combined OCF-provided K1:K2 vector 1564 * above, we need to reverse the order again so the hardware 1565 * gets the swapped keys in the order K1':K2'. 1566 */ 1567 error = ccp_do_pst_to_lsb(qp, 1568 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata, 1569 keydata_len / 2); 1570 if (error != 0) 1571 return (error); 1572 error = ccp_do_pst_to_lsb(qp, 1573 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1574 keydata + (keydata_len / 2), keydata_len / 2); 1575 1576 /* Zero-pad 128 bit keys */ 1577 if (keydata_len == 32) { 1578 if (error != 0) 1579 return (error); 1580 error = ccp_do_pst_to_lsb(qp, 1581 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) + 1582 keydata_len / 2, g_zeroes, keydata_len / 2); 1583 if (error != 0) 1584 return (error); 1585 error = ccp_do_pst_to_lsb(qp, 1586 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) + 1587 keydata_len / 2, g_zeroes, keydata_len / 2); 1588 } 1589 } else 1590 error = ccp_do_pst_to_lsb(qp, 1591 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata, 1592 keydata_len); 1593 if (error != 0) 1594 return (error); 1595 1596 /* 1597 * Point SGLs at the subset of cryptop buffer contents representing the 1598 * data. 1599 */ 1600 sglist_reset(qp->cq_sg_ulptx); 1601 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1602 crp->crp_payload_start, crp->crp_payload_length); 1603 if (error != 0) 1604 return (error); 1605 1606 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__, 1607 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); 1608 1609 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail); 1610 1611 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1612 return (EAGAIN); 1613 1614 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1615 return (ccp_do_xts(qp, s, crp, dir, cctx)); 1616 1617 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1618 struct sglist_seg *seg; 1619 1620 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1621 1622 desc = &qp->desc_ring[qp->cq_tail]; 1623 desc->engine = CCP_ENGINE_AES; 1624 desc->som = (i == 0); 1625 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1626 desc->ioc = (desc->eom && cctx != NULL); 1627 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", 1628 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1629 (int)desc->ioc, (int)dir); 1630 1631 if (desc->ioc) 1632 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1633 sizeof(*cctx)); 1634 1635 desc->aes.encrypt = dir; 1636 desc->aes.mode = s->blkcipher.cipher_mode; 1637 desc->aes.type = s->blkcipher.cipher_type; 1638 if (csp->csp_cipher_alg == CRYPTO_AES_ICM) 1639 /* 1640 * Size of CTR value in bits, - 1. ICM mode uses all 1641 * 128 bits as counter. 1642 */ 1643 desc->aes.size = 127; 1644 1645 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__, 1646 qp->cq_tail, (unsigned)desc->aes.mode, 1647 (unsigned)desc->aes.type, (unsigned)desc->aes.size); 1648 1649 desc->length = seg->ss_len; 1650 desc->src_lo = (uint32_t)seg->ss_paddr; 1651 desc->src_hi = (seg->ss_paddr >> 32); 1652 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1653 1654 /* Crypt in-place */ 1655 desc->dst_lo = desc->src_lo; 1656 desc->dst_hi = desc->src_hi; 1657 desc->dst_mem = desc->src_mem; 1658 1659 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1660 desc->key_hi = 0; 1661 desc->key_mem = CCP_MEMTYPE_SB; 1662 1663 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1664 1665 qp->cq_tail = (qp->cq_tail + 1) % 1666 (1 << qp->cq_softc->ring_size_order); 1667 } 1668 return (0); 1669 } 1670 1671 int __must_check 1672 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1673 { 1674 struct ccp_completion_ctx ctx; 1675 1676 ctx.callback_fn = ccp_blkcipher_done; 1677 ctx.session = s; 1678 ctx.callback_arg = crp; 1679 1680 return (ccp_do_blkcipher(qp, s, crp, &ctx)); 1681 } 1682 1683 static void 1684 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1685 int error) 1686 { 1687 struct cryptop *crp; 1688 1689 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1690 1691 crp = vcrp; 1692 1693 ccp_do_hmac_done(qp, s, crp, error); 1694 } 1695 1696 int __must_check 1697 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1698 { 1699 struct ccp_completion_ctx ctx; 1700 int error; 1701 1702 ctx.callback_fn = ccp_authenc_done; 1703 ctx.session = s; 1704 ctx.callback_arg = crp; 1705 1706 /* Perform first operation */ 1707 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1708 error = ccp_do_blkcipher(qp, s, crp, NULL); 1709 else 1710 error = ccp_do_hmac(qp, s, crp, NULL); 1711 if (error != 0) 1712 return (error); 1713 1714 /* Perform second operation */ 1715 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1716 error = ccp_do_hmac(qp, s, crp, &ctx); 1717 else 1718 error = ccp_do_blkcipher(qp, s, crp, &ctx); 1719 return (error); 1720 } 1721 1722 static int __must_check 1723 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) 1724 { 1725 struct ccp_desc *desc; 1726 struct sglist_seg *seg; 1727 unsigned i; 1728 1729 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1730 return (EAGAIN); 1731 1732 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1733 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1734 1735 desc = &qp->desc_ring[qp->cq_tail]; 1736 1737 desc->engine = CCP_ENGINE_AES; 1738 desc->aes.mode = CCP_AES_MODE_GHASH; 1739 desc->aes.type = s->blkcipher.cipher_type; 1740 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; 1741 1742 desc->som = (i == 0); 1743 desc->length = seg->ss_len; 1744 1745 desc->src_lo = (uint32_t)seg->ss_paddr; 1746 desc->src_hi = (seg->ss_paddr >> 32); 1747 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1748 1749 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1750 1751 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1752 desc->key_mem = CCP_MEMTYPE_SB; 1753 1754 qp->cq_tail = (qp->cq_tail + 1) % 1755 (1 << qp->cq_softc->ring_size_order); 1756 } 1757 return (0); 1758 } 1759 1760 static int __must_check 1761 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, 1762 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) 1763 { 1764 struct ccp_desc *desc; 1765 1766 if (ccp_queue_get_ring_space(qp) == 0) 1767 return (EAGAIN); 1768 1769 desc = &qp->desc_ring[qp->cq_tail]; 1770 1771 desc->engine = CCP_ENGINE_AES; 1772 desc->aes.mode = CCP_AES_MODE_GCTR; 1773 desc->aes.type = s->blkcipher.cipher_type; 1774 desc->aes.encrypt = dir; 1775 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; 1776 1777 desc->som = som; 1778 desc->eom = eom; 1779 1780 /* Trailing bytes will be masked off by aes.size above. */ 1781 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); 1782 1783 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; 1784 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; 1785 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; 1786 1787 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1788 1789 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1790 desc->key_mem = CCP_MEMTYPE_SB; 1791 1792 qp->cq_tail = (qp->cq_tail + 1) % 1793 (1 << qp->cq_softc->ring_size_order); 1794 return (0); 1795 } 1796 1797 static int __must_check 1798 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) 1799 { 1800 struct ccp_desc *desc; 1801 1802 if (ccp_queue_get_ring_space(qp) == 0) 1803 return (EAGAIN); 1804 1805 desc = &qp->desc_ring[qp->cq_tail]; 1806 1807 desc->engine = CCP_ENGINE_AES; 1808 desc->aes.mode = CCP_AES_MODE_GHASH; 1809 desc->aes.type = s->blkcipher.cipher_type; 1810 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; 1811 1812 desc->length = GMAC_BLOCK_LEN; 1813 1814 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); 1815 desc->src_mem = CCP_MEMTYPE_SB; 1816 1817 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1818 1819 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1820 desc->key_mem = CCP_MEMTYPE_SB; 1821 1822 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); 1823 desc->dst_mem = CCP_MEMTYPE_SB; 1824 1825 qp->cq_tail = (qp->cq_tail + 1) % 1826 (1 << qp->cq_softc->ring_size_order); 1827 return (0); 1828 } 1829 1830 static void 1831 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1832 int error) 1833 { 1834 char tag[GMAC_DIGEST_LEN]; 1835 struct cryptop *crp; 1836 1837 crp = vcrp; 1838 1839 s->pending--; 1840 1841 if (error != 0) { 1842 crp->crp_etype = error; 1843 goto out; 1844 } 1845 1846 /* Encrypt is done. Decrypt needs to verify tag. */ 1847 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1848 goto out; 1849 1850 /* Copy in message tag. */ 1851 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag); 1852 1853 /* Verify tag against computed GMAC */ 1854 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) 1855 crp->crp_etype = EBADMSG; 1856 1857 out: 1858 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1859 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block)); 1860 crypto_done(crp); 1861 } 1862 1863 int __must_check 1864 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1865 { 1866 const struct crypto_session_params *csp; 1867 struct ccp_completion_ctx ctx; 1868 enum ccp_cipher_dir dir; 1869 device_t dev; 1870 unsigned i; 1871 int error; 1872 1873 if (s->blkcipher.key_len == 0) 1874 return (EINVAL); 1875 1876 dev = qp->cq_softc->dev; 1877 1878 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1879 dir = CCP_CIPHER_DIR_ENCRYPT; 1880 else 1881 dir = CCP_CIPHER_DIR_DECRYPT; 1882 1883 /* Zero initial GHASH portion of context */ 1884 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); 1885 1886 /* Gather IV data */ 1887 csp = crypto_get_params(crp->crp_session); 1888 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1889 1890 /* Reverse order of key material for HW */ 1891 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); 1892 1893 /* Prepare input buffer of concatenated lengths for final GHASH */ 1894 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8); 1895 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8); 1896 1897 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ 1898 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1899 s->blkcipher.iv, 32); 1900 if (error != 0) 1901 return (error); 1902 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1903 s->blkcipher.enckey, s->blkcipher.key_len); 1904 if (error != 0) 1905 return (error); 1906 error = ccp_do_pst_to_lsb(qp, 1907 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, 1908 GMAC_BLOCK_LEN); 1909 if (error != 0) 1910 return (error); 1911 1912 /* First step - compute GHASH over AAD */ 1913 if (crp->crp_aad_length != 0) { 1914 sglist_reset(qp->cq_sg_ulptx); 1915 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1916 crp->crp_aad_start, crp->crp_aad_length); 1917 if (error != 0) 1918 return (error); 1919 1920 /* This engine cannot process non-block multiple AAD data. */ 1921 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1922 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % 1923 GMAC_BLOCK_LEN) != 0) { 1924 DPRINTF(dev, "%s: AD seg modulo: %zu\n", 1925 __func__, 1926 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1927 return (EINVAL); 1928 } 1929 1930 error = ccp_do_ghash_aad(qp, s); 1931 if (error != 0) 1932 return (error); 1933 } 1934 1935 /* Feed data piece by piece into GCTR */ 1936 sglist_reset(qp->cq_sg_ulptx); 1937 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1938 crp->crp_payload_start, crp->crp_payload_length); 1939 if (error != 0) 1940 return (error); 1941 1942 /* 1943 * All segments except the last must be even multiples of AES block 1944 * size for the HW to process it. Non-compliant inputs aren't bogus, 1945 * just not doable on this hardware. 1946 * 1947 * XXX: Well, the hardware will produce a valid tag for shorter final 1948 * segment inputs, but it will still write out a block-sized plaintext 1949 * or ciphertext chunk. For a typical CRP this tramples trailing data, 1950 * including the provided message tag. So, reject such inputs for now. 1951 */ 1952 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1953 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1954 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1955 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1956 return (EINVAL); 1957 } 1958 1959 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1960 struct sglist_seg *seg; 1961 1962 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1963 error = ccp_do_gctr(qp, s, dir, seg, 1964 (i == 0 && crp->crp_aad_length == 0), 1965 i == (qp->cq_sg_ulptx->sg_nseg - 1)); 1966 if (error != 0) 1967 return (error); 1968 } 1969 1970 /* Send just initial IV (not GHASH!) to LSB again */ 1971 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1972 s->blkcipher.iv, AES_BLOCK_LEN); 1973 if (error != 0) 1974 return (error); 1975 1976 ctx.callback_fn = ccp_gcm_done; 1977 ctx.session = s; 1978 ctx.callback_arg = crp; 1979 1980 /* Compute final hash and copy result back */ 1981 error = ccp_do_ghash_final(qp, s); 1982 if (error != 0) 1983 return (error); 1984 1985 /* When encrypting, copy computed tag out to caller buffer. */ 1986 sglist_reset(qp->cq_sg_ulptx); 1987 if (dir == CCP_CIPHER_DIR_ENCRYPT) 1988 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1989 crp->crp_digest_start, s->gmac.hash_len); 1990 else 1991 /* 1992 * For decrypting, copy the computed tag out to our session 1993 * buffer to verify in our callback. 1994 */ 1995 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, 1996 s->gmac.hash_len); 1997 if (error != 0) 1998 return (error); 1999 error = ccp_passthrough_sgl(qp, 2000 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, 2001 s->gmac.hash_len, true, &ctx); 2002 return (error); 2003 } 2004 2005 #define MAX_TRNG_RETRIES 10 2006 u_int 2007 random_ccp_read(void *v, u_int c) 2008 { 2009 uint32_t *buf; 2010 u_int i, j; 2011 2012 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); 2013 2014 buf = v; 2015 for (i = c; i > 0; i -= sizeof(*buf)) { 2016 for (j = 0; j < MAX_TRNG_RETRIES; j++) { 2017 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); 2018 if (*buf != 0) 2019 break; 2020 } 2021 if (j == MAX_TRNG_RETRIES) 2022 return (0); 2023 buf++; 2024 } 2025 return (c); 2026 2027 } 2028 2029 #ifdef DDB 2030 void 2031 db_ccp_show_hw(struct ccp_softc *sc) 2032 { 2033 2034 db_printf(" queue mask: 0x%x\n", 2035 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); 2036 db_printf(" queue prio: 0x%x\n", 2037 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); 2038 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); 2039 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); 2040 db_printf(" cmd timeout: 0x%x\n", 2041 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); 2042 db_printf(" lsb public mask lo: 0x%x\n", 2043 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); 2044 db_printf(" lsb public mask hi: 0x%x\n", 2045 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); 2046 db_printf(" lsb private mask lo: 0x%x\n", 2047 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); 2048 db_printf(" lsb private mask hi: 0x%x\n", 2049 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); 2050 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); 2051 } 2052 2053 void 2054 db_ccp_show_queue_hw(struct ccp_queue *qp) 2055 { 2056 const struct ccp_error_code *ec; 2057 struct ccp_softc *sc; 2058 uint32_t status, error, esource, faultblock, headlo, qcontrol; 2059 unsigned q, i; 2060 2061 sc = qp->cq_softc; 2062 q = qp->cq_qindex; 2063 2064 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); 2065 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, 2066 (qcontrol & CMD_Q_RUN) ? " RUN" : "", 2067 (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); 2068 db_printf(" tail_lo: 0x%x\n", 2069 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); 2070 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 2071 db_printf(" head_lo: 0x%x\n", headlo); 2072 db_printf(" int enable: 0x%x\n", 2073 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); 2074 db_printf(" interrupt status: 0x%x\n", 2075 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); 2076 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 2077 db_printf(" status: 0x%x\n", status); 2078 db_printf(" int stats: 0x%x\n", 2079 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); 2080 2081 error = status & STATUS_ERROR_MASK; 2082 if (error == 0) 2083 return; 2084 2085 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 2086 STATUS_ERRORSOURCE_MASK; 2087 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 2088 STATUS_VLSB_FAULTBLOCK_MASK; 2089 2090 ec = NULL; 2091 for (i = 0; i < nitems(ccp_error_codes); i++) 2092 if (ccp_error_codes[i].ce_code == error) 2093 break; 2094 if (i < nitems(ccp_error_codes)) 2095 ec = &ccp_error_codes[i]; 2096 2097 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", 2098 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 2099 faultblock); 2100 if (ec != NULL) 2101 db_printf(" Error description: %s\n", ec->ce_desc); 2102 2103 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 2104 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, 2105 (void *)&qp->desc_ring[i], " "); 2106 } 2107 #endif 2108