1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017 Chelsio Communications, Inc. 5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> 6 * All rights reserved. 7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 36 #include <sys/param.h> 37 #include <sys/bus.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/sglist.h> 45 #include <sys/sysctl.h> 46 47 #ifdef DDB 48 #include <ddb/ddb.h> 49 #endif 50 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 54 #include <machine/bus.h> 55 #include <machine/resource.h> 56 #include <machine/vmparam.h> 57 58 #include <opencrypto/cryptodev.h> 59 #include <opencrypto/xform.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include "cryptodev_if.h" 65 66 #include "ccp.h" 67 #include "ccp_hardware.h" 68 #include "ccp_lsb.h" 69 70 CTASSERT(sizeof(struct ccp_desc) == 32); 71 72 static struct ccp_xts_unitsize_map_entry { 73 enum ccp_xts_unitsize cxu_id; 74 unsigned cxu_size; 75 } ccp_xts_unitsize_map[] = { 76 { CCP_XTS_AES_UNIT_SIZE_16, 16 }, 77 { CCP_XTS_AES_UNIT_SIZE_512, 512 }, 78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 }, 79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 }, 80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 }, 81 }; 82 83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 84 "ccp node"); 85 86 unsigned g_ccp_ring_order = 11; 87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, 88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); 89 90 /* 91 * Zero buffer, sufficient for padding LSB entries, that does not span a page 92 * boundary 93 */ 94 static const char g_zeroes[32] __aligned(32); 95 96 static inline uint32_t 97 ccp_read_4(struct ccp_softc *sc, uint32_t offset) 98 { 99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); 100 } 101 102 static inline void 103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) 104 { 105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); 106 } 107 108 static inline uint32_t 109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) 110 { 111 /* 112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. 113 */ 114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); 115 } 116 117 static inline void 118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, 119 uint32_t value) 120 { 121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); 122 } 123 124 void 125 ccp_queue_write_tail(struct ccp_queue *qp) 126 { 127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, 128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); 129 } 130 131 /* 132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of 133 * that entry for the queue's private LSB region. 134 */ 135 static inline uint8_t 136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) 137 { 138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); 139 } 140 141 /* 142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of 143 * that entry for the queue's private LSB region. 144 */ 145 static inline uint32_t 146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) 147 { 148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); 149 } 150 151 /* 152 * Some terminology: 153 * 154 * LSB - Local Storage Block 155 * ========================= 156 * 157 * 8 segments/regions, each containing 16 entries. 158 * 159 * Each entry contains 256 bits (32 bytes). 160 * 161 * Segments are virtually addressed in commands, but accesses cannot cross 162 * segment boundaries. Virtual map uses an identity mapping by default 163 * (virtual segment N corresponds to physical segment N). 164 * 165 * Access to a physical region can be restricted to any subset of all five 166 * queues. 167 * 168 * "Pass-through" mode 169 * =================== 170 * 171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice 172 * features: 173 * 174 * - Supports byte-swapping for endian conversion (32- or 256-bit words) 175 * - AND, OR, XOR with fixed 256-bit mask 176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations) 177 * - Read/write of LSB 178 * - Memset 179 * 180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits 181 * (32 bytes). 182 * 183 * If byte-swapping is enabled, input must be a multiple of the word size. 184 * 185 * Zlib mode -- only usable from one queue at a time, single job at a time. 186 * ======================================================================== 187 * 188 * Only usable from private host, aka PSP? Not host processor? 189 * 190 * RNG. 191 * ==== 192 * 193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in 194 * a ring buffer readable by software. 195 * 196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are 197 * implemented on the raw input stream and may be enabled to verify min-entropy 198 * of 0.5 bits per bit. 199 */ 200 201 static void 202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 203 { 204 bus_addr_t *baddr; 205 206 KASSERT(error == 0, ("%s: error:%d", __func__, error)); 207 baddr = arg; 208 *baddr = segs->ds_addr; 209 } 210 211 static int 212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) 213 { 214 struct ccp_softc *sc; 215 struct ccp_queue *qp; 216 void *desc; 217 size_t ringsz, num_descriptors; 218 int error; 219 220 desc = NULL; 221 sc = device_get_softc(dev); 222 qp = &sc->queues[queue]; 223 224 /* 225 * Don't bother allocating a ring for queues the host isn't allowed to 226 * drive. 227 */ 228 if ((sc->valid_queues & (1 << queue)) == 0) 229 return (0); 230 231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue); 232 233 /* Ignore queues that do not have any LSB access. */ 234 if (qp->lsb_mask == 0) { 235 device_printf(dev, "Ignoring queue %u with no LSB access\n", 236 queue); 237 sc->valid_queues &= ~(1 << queue); 238 return (0); 239 } 240 241 num_descriptors = 1 << sc->ring_size_order; 242 ringsz = sizeof(struct ccp_desc) * num_descriptors; 243 244 /* 245 * "Queue_Size" is order - 1. 246 * 247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. 248 */ 249 error = bus_dma_tag_create(bus_get_dma_tag(dev), 250 1 << (5 + sc->ring_size_order), 251 #if defined(__i386__) && !defined(PAE) 252 0, BUS_SPACE_MAXADDR, 253 #else 254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, 255 #endif 256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, 257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag); 258 if (error != 0) 259 goto out; 260 261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, 262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); 263 if (error != 0) 264 goto out; 265 266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, 267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); 268 if (error != 0) 269 goto out; 270 271 qp->desc_ring = desc; 272 qp->completions_ring = malloc(num_descriptors * 273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); 274 275 /* Zero control register; among other things, clears the RUN flag. */ 276 qp->qcontrol = 0; 277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); 279 280 /* Clear any leftover interrupt status flags */ 281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, 282 ALL_INTERRUPTS); 283 284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; 285 286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, 287 (uint32_t)qp->desc_ring_bus_addr); 288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, 289 (uint32_t)qp->desc_ring_bus_addr); 290 291 /* 292 * Enable completion interrupts, as well as error or administrative 293 * halt interrupts. We don't use administrative halts, but they 294 * shouldn't trip unless we do, so it ought to be harmless. 295 */ 296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 298 299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; 300 qp->qcontrol |= CMD_Q_RUN; 301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 302 303 out: 304 if (error != 0) { 305 if (qp->desc_ring != NULL) 306 bus_dmamap_unload(qp->ring_desc_tag, 307 qp->ring_desc_map); 308 if (desc != NULL) 309 bus_dmamem_free(qp->ring_desc_tag, desc, 310 qp->ring_desc_map); 311 if (qp->ring_desc_tag != NULL) 312 bus_dma_tag_destroy(qp->ring_desc_tag); 313 } 314 return (error); 315 } 316 317 static void 318 ccp_hw_detach_queue(device_t dev, unsigned queue) 319 { 320 struct ccp_softc *sc; 321 struct ccp_queue *qp; 322 323 sc = device_get_softc(dev); 324 qp = &sc->queues[queue]; 325 326 /* 327 * Don't bother allocating a ring for queues the host isn't allowed to 328 * drive. 329 */ 330 if ((sc->valid_queues & (1 << queue)) == 0) 331 return; 332 333 free(qp->completions_ring, M_CCP); 334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); 335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); 336 bus_dma_tag_destroy(qp->ring_desc_tag); 337 } 338 339 static int 340 ccp_map_pci_bar(device_t dev) 341 { 342 struct ccp_softc *sc; 343 344 sc = device_get_softc(dev); 345 346 sc->pci_resource_id = PCIR_BAR(2); 347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 348 &sc->pci_resource_id, RF_ACTIVE); 349 if (sc->pci_resource == NULL) { 350 device_printf(dev, "unable to allocate pci resource\n"); 351 return (ENODEV); 352 } 353 354 sc->pci_resource_id_msix = PCIR_BAR(5); 355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 356 &sc->pci_resource_id_msix, RF_ACTIVE); 357 if (sc->pci_resource_msix == NULL) { 358 device_printf(dev, "unable to allocate pci resource msix\n"); 359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 360 sc->pci_resource); 361 return (ENODEV); 362 } 363 364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); 365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); 366 return (0); 367 } 368 369 static void 370 ccp_unmap_pci_bar(device_t dev) 371 { 372 struct ccp_softc *sc; 373 374 sc = device_get_softc(dev); 375 376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, 377 sc->pci_resource_msix); 378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 379 sc->pci_resource); 380 } 381 382 const static struct ccp_error_code { 383 uint8_t ce_code; 384 const char *ce_name; 385 int ce_errno; 386 const char *ce_desc; 387 } ccp_error_codes[] = { 388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, 389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, 390 "A non-supported function type was specified" }, 391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, 392 "A non-supported function mode was specified" }, 393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, 394 "A CMAC type was specified when ENCRYPT was not specified" }, 395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, 396 "A non-supported function size was specified.\n" 397 "AES-CFB: Size was not 127 or 7;\n" 398 "3DES-CFB: Size was not 7;\n" 399 "RSA: See supported size table (7.4.2);\n" 400 "ECC: Size was greater than 576 bits." }, 401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO, 402 "Zlib command does not have INIT and EOM set" }, 403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, 404 "Reserved bits in a function specification were not 0" }, 405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, 406 "The buffer length specified was not correct for the selected engine" 407 }, 408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" 409 "Undefined VLSB segment mapping or\n" 410 "mapping to unsupported LSB segment id" }, 411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, 412 "The specified source/destination buffer access was illegal:\n" 413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" 414 "Data buffer not completely contained within a single segment; or\n" 415 "Pointer with Fixed=1 is not 32-bit aligned; or\n" 416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." 417 }, 418 { 0x0C, "ILLEGAL_MEM_SEL", EIO, 419 "A src_mem, dst_mem, or key_mem field was illegal:\n" 420 "A field was set to a reserved value; or\n" 421 "A public command attempted to reference AXI1 (local) or GART memory; or\n" 422 "A Zlib command attmpted to use the LSB." }, 423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, 424 "The specified context location was illegal:\n" 425 "Context located in a LSB location disallowed by the LSB protection masks; or\n" 426 "Context not completely contained within a single segment." }, 427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO, 428 "The specified key location was illegal:\n" 429 "Key located in a LSB location disallowed by the LSB protection masks; or\n" 430 "Key not completely contained within a single segment." }, 431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, 432 /* XXX Could fill out these descriptions too */ 433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, 434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, 435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, 436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, 437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, 438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, 439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, 440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, 441 { 0x1E, "ZLIB_BTYPE", EIO, "" }, 442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, 443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, 444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, 445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, 446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, 447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, 448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, 449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, 450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, 451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, 452 { 0x2B, "LSB_PARITY_ERR", EIO, 453 "A read from the LSB encountered a parity error" }, 454 }; 455 456 static void 457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) 458 { 459 struct ccp_completion_ctx *cctx; 460 const struct ccp_error_code *ec; 461 struct ccp_softc *sc; 462 uint32_t status, error, esource, faultblock; 463 unsigned q, idx; 464 int errno; 465 466 sc = qp->cq_softc; 467 q = qp->cq_qindex; 468 469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 470 471 error = status & STATUS_ERROR_MASK; 472 473 /* Decode error status */ 474 ec = NULL; 475 for (idx = 0; idx < nitems(ccp_error_codes); idx++) 476 if (ccp_error_codes[idx].ce_code == error) { 477 ec = &ccp_error_codes[idx]; 478 break; 479 } 480 481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 482 STATUS_ERRORSOURCE_MASK; 483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 484 STATUS_VLSB_FAULTBLOCK_MASK; 485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", 486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 487 faultblock); 488 if (ec != NULL) 489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); 490 491 /* TODO Could format the desc nicely here */ 492 idx = desc - qp->desc_ring; 493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx, 494 (const void *)desc, " "); 495 496 /* 497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, 498 * Zlib Decompress status may be interesting. 499 */ 500 501 while (true) { 502 /* Keep unused descriptors zero for next use. */ 503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); 504 505 cctx = &qp->completions_ring[idx]; 506 507 /* 508 * Restart procedure described in § 14.2.5. Could be used by HoC if we 509 * used that. 510 * 511 * Advance HEAD_LO past bad descriptor + any remaining in 512 * transaction manually, then restart queue. 513 */ 514 idx = (idx + 1) % (1 << sc->ring_size_order); 515 516 /* Callback function signals end of transaction */ 517 if (cctx->callback_fn != NULL) { 518 if (ec == NULL) 519 errno = EIO; 520 else 521 errno = ec->ce_errno; 522 /* TODO More specific error code */ 523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); 524 cctx->callback_fn = NULL; 525 break; 526 } 527 } 528 529 qp->cq_head = idx; 530 qp->cq_waiting = false; 531 wakeup(&qp->cq_tail); 532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, 534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); 535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); 536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__); 537 } 538 539 static void 540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) 541 { 542 struct ccp_completion_ctx *cctx; 543 struct ccp_softc *sc; 544 const struct ccp_desc *desc; 545 uint32_t headlo, idx; 546 unsigned q, completed; 547 548 sc = qp->cq_softc; 549 q = qp->cq_qindex; 550 551 mtx_lock(&qp->cq_lock); 552 553 /* 554 * Hardware HEAD_LO points to the first incomplete descriptor. Process 555 * any submitted and completed descriptors, up to but not including 556 * HEAD_LO. 557 */ 558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 560 561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, 562 qp->cq_head); 563 completed = 0; 564 while (qp->cq_head != idx) { 565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head); 566 567 cctx = &qp->completions_ring[qp->cq_head]; 568 if (cctx->callback_fn != NULL) { 569 cctx->callback_fn(qp, cctx->session, 570 cctx->callback_arg, 0); 571 cctx->callback_fn = NULL; 572 } 573 574 /* Keep unused descriptors zero for next use. */ 575 memset(&qp->desc_ring[qp->cq_head], 0, 576 sizeof(qp->desc_ring[qp->cq_head])); 577 578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); 579 completed++; 580 } 581 if (completed > 0) { 582 qp->cq_waiting = false; 583 wakeup(&qp->cq_tail); 584 } 585 586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 587 588 /* 589 * Desc points to the first incomplete descriptor, at the time we read 590 * HEAD_LO. If there was an error flagged in interrupt status, the HW 591 * will not proceed past the erroneous descriptor by itself. 592 */ 593 desc = &qp->desc_ring[idx]; 594 if ((ints & INT_ERROR) != 0) 595 ccp_intr_handle_error(qp, desc); 596 597 mtx_unlock(&qp->cq_lock); 598 } 599 600 static void 601 ccp_intr_handler(void *arg) 602 { 603 struct ccp_softc *sc = arg; 604 size_t i; 605 uint32_t ints; 606 607 DPRINTF(sc->dev, "%s: interrupt\n", __func__); 608 609 /* 610 * We get one global interrupt per PCI device, shared over all of 611 * its queues. Scan each valid queue on interrupt for flags indicating 612 * activity. 613 */ 614 for (i = 0; i < nitems(sc->queues); i++) { 615 if ((sc->valid_queues & (1 << i)) == 0) 616 continue; 617 618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); 619 if (ints == 0) 620 continue; 621 622 #if 0 623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__, 624 (unsigned)ints, i); 625 #endif 626 /* Write back 1s to clear interrupt status bits. */ 627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); 628 629 /* 630 * If there was an error, we still need to run completions on 631 * any descriptors prior to the error. The completions handler 632 * invoked below will also handle the error descriptor. 633 */ 634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) 635 ccp_intr_run_completions(&sc->queues[i], ints); 636 637 if ((ints & INT_QUEUE_STOPPED) != 0) 638 device_printf(sc->dev, "%s: queue %zu stopped\n", 639 __func__, i); 640 } 641 642 /* Re-enable interrupts after processing */ 643 for (i = 0; i < nitems(sc->queues); i++) { 644 if ((sc->valid_queues & (1 << i)) == 0) 645 continue; 646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 648 } 649 } 650 651 static int 652 ccp_intr_filter(void *arg) 653 { 654 struct ccp_softc *sc = arg; 655 size_t i; 656 657 /* TODO: Split individual queues into separate taskqueues? */ 658 for (i = 0; i < nitems(sc->queues); i++) { 659 if ((sc->valid_queues & (1 << i)) == 0) 660 continue; 661 662 /* Mask interrupt until task completes */ 663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); 664 } 665 666 return (FILTER_SCHEDULE_THREAD); 667 } 668 669 static int 670 ccp_setup_interrupts(struct ccp_softc *sc) 671 { 672 uint32_t nvec; 673 int rid, error, n, ridcopy; 674 675 n = pci_msix_count(sc->dev); 676 if (n < 1) { 677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n); 678 return (ENXIO); 679 } 680 681 nvec = n; 682 error = pci_alloc_msix(sc->dev, &nvec); 683 if (error != 0) { 684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__, 685 error); 686 return (error); 687 } 688 if (nvec < 1) { 689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n", 690 __func__); 691 return (ENXIO); 692 } 693 if (nvec > nitems(sc->intr_res)) { 694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__, 695 nvec); 696 nvec = nitems(sc->intr_res); 697 } 698 699 for (rid = 1; rid < 1 + nvec; rid++) { 700 ridcopy = rid; 701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, 702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE); 703 if (sc->intr_res[rid - 1] == NULL) { 704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", 705 __func__); 706 return (ENXIO); 707 } 708 709 sc->intr_tag[rid - 1] = NULL; 710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], 711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, 712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); 713 if (error != 0) 714 device_printf(sc->dev, "%s: setup_intr: %d\n", 715 __func__, error); 716 } 717 sc->intr_count = nvec; 718 719 return (error); 720 } 721 722 static void 723 ccp_release_interrupts(struct ccp_softc *sc) 724 { 725 unsigned i; 726 727 for (i = 0; i < sc->intr_count; i++) { 728 if (sc->intr_tag[i] != NULL) 729 bus_teardown_intr(sc->dev, sc->intr_res[i], 730 sc->intr_tag[i]); 731 if (sc->intr_res[i] != NULL) 732 bus_release_resource(sc->dev, SYS_RES_IRQ, 733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); 734 } 735 736 pci_release_msi(sc->dev); 737 } 738 739 int 740 ccp_hw_attach(device_t dev) 741 { 742 struct ccp_softc *sc; 743 uint64_t lsbmask; 744 uint32_t version, lsbmasklo, lsbmaskhi; 745 unsigned queue_idx, j; 746 int error; 747 bool bars_mapped, interrupts_setup; 748 749 queue_idx = 0; 750 bars_mapped = interrupts_setup = false; 751 sc = device_get_softc(dev); 752 753 error = ccp_map_pci_bar(dev); 754 if (error != 0) { 755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__); 756 goto out; 757 } 758 bars_mapped = true; 759 760 error = pci_enable_busmaster(dev); 761 if (error != 0) { 762 device_printf(dev, "%s: couldn't enable busmaster\n", 763 __func__); 764 goto out; 765 } 766 767 sc->ring_size_order = g_ccp_ring_order; 768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { 769 device_printf(dev, "bogus hw.ccp.ring_order\n"); 770 error = EINVAL; 771 goto out; 772 } 773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); 774 775 version = ccp_read_4(sc, VERSION_REG); 776 if ((version & VERSION_NUM_MASK) < 5) { 777 device_printf(dev, 778 "driver supports version 5 and later hardware\n"); 779 error = ENXIO; 780 goto out; 781 } 782 783 error = ccp_setup_interrupts(sc); 784 if (error != 0) 785 goto out; 786 interrupts_setup = true; 787 788 sc->hw_version = version & VERSION_NUM_MASK; 789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & 790 VERSION_NUMVQM_MASK; 791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & 792 VERSION_LSBSIZE_MASK; 793 sc->hw_features = version & VERSION_CAP_MASK; 794 795 /* 796 * Copy private LSB mask to public registers to enable access to LSB 797 * from all queues allowed by BIOS. 798 */ 799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); 800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); 801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); 802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); 803 804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; 805 806 for (; queue_idx < nitems(sc->queues); queue_idx++) { 807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); 808 if (error != 0) { 809 device_printf(dev, "%s: couldn't attach queue %u\n", 810 __func__, queue_idx); 811 goto out; 812 } 813 } 814 ccp_assign_lsb_regions(sc, lsbmask); 815 816 out: 817 if (error != 0) { 818 if (interrupts_setup) 819 ccp_release_interrupts(sc); 820 for (j = 0; j < queue_idx; j++) 821 ccp_hw_detach_queue(dev, j); 822 if (sc->ring_size_order != 0) 823 pci_disable_busmaster(dev); 824 if (bars_mapped) 825 ccp_unmap_pci_bar(dev); 826 } 827 return (error); 828 } 829 830 void 831 ccp_hw_detach(device_t dev) 832 { 833 struct ccp_softc *sc; 834 unsigned i; 835 836 sc = device_get_softc(dev); 837 838 for (i = 0; i < nitems(sc->queues); i++) 839 ccp_hw_detach_queue(dev, i); 840 841 ccp_release_interrupts(sc); 842 pci_disable_busmaster(dev); 843 ccp_unmap_pci_bar(dev); 844 } 845 846 static int __must_check 847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, 848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, 849 bus_size_t len, enum ccp_passthru_byteswap swapmode, 850 enum ccp_passthru_bitwise bitmode, bool interrupt, 851 const struct ccp_completion_ctx *cctx) 852 { 853 struct ccp_desc *desc; 854 855 if (ccp_queue_get_ring_space(qp) == 0) 856 return (EAGAIN); 857 858 desc = &qp->desc_ring[qp->cq_tail]; 859 860 memset(desc, 0, sizeof(*desc)); 861 desc->engine = CCP_ENGINE_PASSTHRU; 862 863 desc->pt.ioc = interrupt; 864 desc->pt.byteswap = swapmode; 865 desc->pt.bitwise = bitmode; 866 desc->length = len; 867 868 desc->src_lo = (uint32_t)src; 869 desc->src_hi = src >> 32; 870 desc->src_mem = src_type; 871 872 desc->dst_lo = (uint32_t)dst; 873 desc->dst_hi = dst >> 32; 874 desc->dst_mem = dst_type; 875 876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) 877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); 878 879 if (cctx != NULL) 880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); 881 882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 883 return (0); 884 } 885 886 static int __must_check 887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, 888 struct sglist *sgl, bus_size_t len, bool interrupt, 889 const struct ccp_completion_ctx *cctx) 890 { 891 struct sglist_seg *seg; 892 size_t i, remain, nb; 893 int error; 894 895 remain = len; 896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { 897 seg = &sgl->sg_segs[i]; 898 /* crp lengths are int, so 32-bit min() is ok. */ 899 nb = min(remain, seg->ss_len); 900 901 if (tolsb) 902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, 903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, 904 CCP_PASSTHRU_BYTESWAP_NOOP, 905 CCP_PASSTHRU_BITWISE_NOOP, 906 (nb == remain) && interrupt, cctx); 907 else 908 error = ccp_passthrough(qp, seg->ss_paddr, 909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, 910 CCP_PASSTHRU_BYTESWAP_NOOP, 911 CCP_PASSTHRU_BITWISE_NOOP, 912 (nb == remain) && interrupt, cctx); 913 if (error != 0) 914 return (error); 915 916 remain -= nb; 917 } 918 return (0); 919 } 920 921 /* 922 * Note that these vectors are in reverse of the usual order. 923 */ 924 const struct SHA_vectors { 925 uint32_t SHA1[8]; 926 uint32_t SHA224[8]; 927 uint32_t SHA256[8]; 928 uint64_t SHA384[8]; 929 uint64_t SHA512[8]; 930 } SHA_H __aligned(PAGE_SIZE) = { 931 .SHA1 = { 932 0xc3d2e1f0ul, 933 0x10325476ul, 934 0x98badcfeul, 935 0xefcdab89ul, 936 0x67452301ul, 937 0, 938 0, 939 0, 940 }, 941 .SHA224 = { 942 0xbefa4fa4ul, 943 0x64f98fa7ul, 944 0x68581511ul, 945 0xffc00b31ul, 946 0xf70e5939ul, 947 0x3070dd17ul, 948 0x367cd507ul, 949 0xc1059ed8ul, 950 }, 951 .SHA256 = { 952 0x5be0cd19ul, 953 0x1f83d9abul, 954 0x9b05688cul, 955 0x510e527ful, 956 0xa54ff53aul, 957 0x3c6ef372ul, 958 0xbb67ae85ul, 959 0x6a09e667ul, 960 }, 961 .SHA384 = { 962 0x47b5481dbefa4fa4ull, 963 0xdb0c2e0d64f98fa7ull, 964 0x8eb44a8768581511ull, 965 0x67332667ffc00b31ull, 966 0x152fecd8f70e5939ull, 967 0x9159015a3070dd17ull, 968 0x629a292a367cd507ull, 969 0xcbbb9d5dc1059ed8ull, 970 }, 971 .SHA512 = { 972 0x5be0cd19137e2179ull, 973 0x1f83d9abfb41bd6bull, 974 0x9b05688c2b3e6c1full, 975 0x510e527fade682d1ull, 976 0xa54ff53a5f1d36f1ull, 977 0x3c6ef372fe94f82bull, 978 0xbb67ae8584caa73bull, 979 0x6a09e667f3bcc908ull, 980 }, 981 }; 982 /* 983 * Ensure vectors do not cross a page boundary. 984 * 985 * Disabled due to a new Clang error: "expression is not an integral constant 986 * expression." GCC (cross toolchain) seems to handle this assertion with 987 * _Static_assert just fine. 988 */ 989 #if 0 990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); 991 #endif 992 993 const struct SHA_Defn { 994 enum sha_version version; 995 const void *H_vectors; 996 size_t H_size; 997 const struct auth_hash *axf; 998 enum ccp_sha_type engine_type; 999 } SHA_definitions[] = { 1000 { 1001 .version = SHA1, 1002 .H_vectors = SHA_H.SHA1, 1003 .H_size = sizeof(SHA_H.SHA1), 1004 .axf = &auth_hash_hmac_sha1, 1005 .engine_type = CCP_SHA_TYPE_1, 1006 }, 1007 #if 0 1008 { 1009 .version = SHA2_224, 1010 .H_vectors = SHA_H.SHA224, 1011 .H_size = sizeof(SHA_H.SHA224), 1012 .axf = &auth_hash_hmac_sha2_224, 1013 .engine_type = CCP_SHA_TYPE_224, 1014 }, 1015 #endif 1016 { 1017 .version = SHA2_256, 1018 .H_vectors = SHA_H.SHA256, 1019 .H_size = sizeof(SHA_H.SHA256), 1020 .axf = &auth_hash_hmac_sha2_256, 1021 .engine_type = CCP_SHA_TYPE_256, 1022 }, 1023 { 1024 .version = SHA2_384, 1025 .H_vectors = SHA_H.SHA384, 1026 .H_size = sizeof(SHA_H.SHA384), 1027 .axf = &auth_hash_hmac_sha2_384, 1028 .engine_type = CCP_SHA_TYPE_384, 1029 }, 1030 { 1031 .version = SHA2_512, 1032 .H_vectors = SHA_H.SHA512, 1033 .H_size = sizeof(SHA_H.SHA512), 1034 .axf = &auth_hash_hmac_sha2_512, 1035 .engine_type = CCP_SHA_TYPE_512, 1036 }, 1037 }; 1038 1039 static int __must_check 1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, 1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) 1042 { 1043 struct ccp_desc *desc; 1044 1045 if (ccp_queue_get_ring_space(qp) == 0) 1046 return (EAGAIN); 1047 1048 desc = &qp->desc_ring[qp->cq_tail]; 1049 1050 memset(desc, 0, sizeof(*desc)); 1051 desc->engine = CCP_ENGINE_SHA; 1052 desc->som = start; 1053 desc->eom = end; 1054 1055 desc->sha.type = defn->engine_type; 1056 desc->length = len; 1057 1058 if (end) { 1059 desc->sha_len_lo = (uint32_t)msgbits; 1060 desc->sha_len_hi = msgbits >> 32; 1061 } 1062 1063 desc->src_lo = (uint32_t)addr; 1064 desc->src_hi = addr >> 32; 1065 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1066 1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); 1068 1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 1070 return (0); 1071 } 1072 1073 static int __must_check 1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, 1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) 1076 { 1077 const struct SHA_Defn *defn; 1078 struct sglist_seg *seg; 1079 size_t i, msgsize, remaining, nb; 1080 uint32_t lsbaddr; 1081 int error; 1082 1083 for (i = 0; i < nitems(SHA_definitions); i++) 1084 if (SHA_definitions[i].version == version) 1085 break; 1086 if (i == nitems(SHA_definitions)) 1087 return (EINVAL); 1088 defn = &SHA_definitions[i]; 1089 1090 /* XXX validate input ??? */ 1091 1092 /* Load initial SHA state into LSB */ 1093 /* XXX ensure H_vectors don't span page boundaries */ 1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), 1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), 1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), 1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, 1098 NULL); 1099 if (error != 0) 1100 return (error); 1101 1102 /* Execute series of SHA updates on correctly sized buffers */ 1103 msgsize = 0; 1104 for (i = 0; i < sgl_src->sg_nseg; i++) { 1105 seg = &sgl_src->sg_segs[i]; 1106 msgsize += seg->ss_len; 1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, 1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, 1109 msgsize << 3); 1110 if (error != 0) 1111 return (error); 1112 } 1113 1114 /* Copy result out to sgl_dst */ 1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); 1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); 1117 for (i = 0; i < sgl_dst->sg_nseg; i++) { 1118 seg = &sgl_dst->sg_segs[i]; 1119 /* crp lengths are int, so 32-bit min() is ok. */ 1120 nb = min(remaining, seg->ss_len); 1121 1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, 1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, 1124 CCP_PASSTHRU_BITWISE_NOOP, 1125 (cctx != NULL) ? (nb == remaining) : false, 1126 (nb == remaining) ? cctx : NULL); 1127 if (error != 0) 1128 return (error); 1129 1130 remaining -= nb; 1131 lsbaddr += nb; 1132 if (remaining == 0) 1133 break; 1134 } 1135 1136 return (0); 1137 } 1138 1139 static void 1140 byteswap256(uint64_t *buffer) 1141 { 1142 uint64_t t; 1143 1144 t = bswap64(buffer[3]); 1145 buffer[3] = bswap64(buffer[0]); 1146 buffer[0] = t; 1147 1148 t = bswap64(buffer[2]); 1149 buffer[2] = bswap64(buffer[1]); 1150 buffer[1] = t; 1151 } 1152 1153 /* 1154 * Translate CCP internal LSB hash format into a standard hash ouput. 1155 * 1156 * Manipulates input buffer with byteswap256 operation. 1157 */ 1158 static void 1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) 1160 { 1161 const struct SHA_Defn *defn; 1162 size_t i; 1163 1164 for (i = 0; i < nitems(SHA_definitions); i++) 1165 if (SHA_definitions[i].version == version) 1166 break; 1167 if (i == nitems(SHA_definitions)) 1168 panic("bogus sha version auth_mode %u\n", (unsigned)version); 1169 1170 defn = &SHA_definitions[i]; 1171 1172 /* Swap 256bit manually -- DMA engine can, but with limitations */ 1173 byteswap256((void *)buffer); 1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE) 1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); 1176 1177 switch (defn->version) { 1178 case SHA1: 1179 memcpy(output, buffer + 12, defn->axf->hashsize); 1180 break; 1181 #if 0 1182 case SHA2_224: 1183 memcpy(output, buffer + XXX, defn->axf->hashsize); 1184 break; 1185 #endif 1186 case SHA2_256: 1187 memcpy(output, buffer, defn->axf->hashsize); 1188 break; 1189 case SHA2_384: 1190 memcpy(output, 1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, 1192 defn->axf->hashsize - LSB_ENTRY_SIZE); 1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, 1194 LSB_ENTRY_SIZE); 1195 break; 1196 case SHA2_512: 1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); 1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); 1199 break; 1200 } 1201 } 1202 1203 static void 1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, 1205 struct cryptop *crp, int error) 1206 { 1207 char ihash[SHA2_512_HASH_LEN /* max hash len */]; 1208 union authctx auth_ctx; 1209 const struct auth_hash *axf; 1210 1211 axf = s->hmac.auth_hash; 1212 1213 s->pending--; 1214 1215 if (error != 0) { 1216 crp->crp_etype = error; 1217 goto out; 1218 } 1219 1220 /* Do remaining outer hash over small inner hash in software */ 1221 axf->Init(&auth_ctx); 1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); 1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode); 1224 #if 0 1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__, 1226 (u_char *)ihash, " "); 1227 #endif 1228 axf->Update(&auth_ctx, ihash, axf->hashsize); 1229 axf->Final(s->hmac.res, &auth_ctx); 1230 1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { 1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len, 1233 ihash); 1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0) 1235 crp->crp_etype = EBADMSG; 1236 } else 1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len, 1238 s->hmac.res); 1239 1240 /* Avoid leaking key material */ 1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx)); 1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res)); 1243 1244 out: 1245 crypto_done(crp); 1246 } 1247 1248 static void 1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1250 int error) 1251 { 1252 struct cryptop *crp; 1253 1254 crp = vcrp; 1255 ccp_do_hmac_done(qp, s, crp, error); 1256 } 1257 1258 static int __must_check 1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1260 const struct ccp_completion_ctx *cctx) 1261 { 1262 device_t dev; 1263 const struct auth_hash *axf; 1264 int error; 1265 1266 dev = qp->cq_softc->dev; 1267 axf = s->hmac.auth_hash; 1268 1269 /* 1270 * Populate the SGL describing inside hash contents. We want to hash 1271 * the ipad (key XOR fixed bit pattern) concatenated with the user 1272 * data. 1273 */ 1274 sglist_reset(qp->cq_sg_ulptx); 1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); 1276 if (error != 0) 1277 return (error); 1278 if (crp->crp_aad_length != 0) { 1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1280 crp->crp_aad_start, crp->crp_aad_length); 1281 if (error != 0) 1282 return (error); 1283 } 1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1285 crp->crp_payload_start, crp->crp_payload_length); 1286 if (error != 0) { 1287 DPRINTF(dev, "%s: sglist too short\n", __func__); 1288 return (error); 1289 } 1290 /* Populate SGL for output -- use hmac.res buffer. */ 1291 sglist_reset(qp->cq_sg_dst); 1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res, 1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE)); 1294 if (error != 0) 1295 return (error); 1296 1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, 1298 cctx); 1299 if (error != 0) { 1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__); 1301 return (error); 1302 } 1303 return (0); 1304 } 1305 1306 int __must_check 1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1308 { 1309 struct ccp_completion_ctx ctx; 1310 1311 ctx.callback_fn = ccp_hmac_done; 1312 ctx.callback_arg = crp; 1313 ctx.session = s; 1314 1315 return (ccp_do_hmac(qp, s, crp, &ctx)); 1316 } 1317 1318 static void 1319 ccp_byteswap(char *data, size_t len) 1320 { 1321 size_t i; 1322 char t; 1323 1324 len--; 1325 for (i = 0; i < len; i++, len--) { 1326 t = data[i]; 1327 data[i] = data[len]; 1328 data[len] = t; 1329 } 1330 } 1331 1332 static void 1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1334 int error) 1335 { 1336 struct cryptop *crp; 1337 1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1339 1340 crp = vcrp; 1341 1342 s->pending--; 1343 1344 if (error != 0) 1345 crp->crp_etype = error; 1346 1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp); 1348 crypto_done(crp); 1349 } 1350 1351 static void 1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp, 1353 char *iv) 1354 { 1355 1356 crypto_read_iv(crp, iv); 1357 1358 /* 1359 * Append an explicit counter of 1 for GCM. 1360 */ 1361 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) 1362 *(uint32_t *)&iv[12] = htobe32(1); 1363 1364 if (csp->csp_cipher_alg == CRYPTO_AES_XTS && 1365 csp->csp_ivlen < AES_BLOCK_LEN) 1366 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen); 1367 1368 /* Reverse order of IV material for HW */ 1369 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ", 1370 csp->csp_ivlen); 1371 1372 /* 1373 * For unknown reasons, XTS mode expects the IV in the reverse byte 1374 * order to every other AES mode. 1375 */ 1376 if (csp->csp_cipher_alg != CRYPTO_AES_XTS) 1377 ccp_byteswap(iv, AES_BLOCK_LEN); 1378 } 1379 1380 static int __must_check 1381 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, 1382 size_t len) 1383 { 1384 int error; 1385 1386 sglist_reset(qp->cq_sg_ulptx); 1387 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); 1388 if (error != 0) 1389 return (error); 1390 1391 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, 1392 false, NULL); 1393 return (error); 1394 } 1395 1396 static int __must_check 1397 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1398 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx) 1399 { 1400 struct ccp_desc *desc; 1401 device_t dev; 1402 unsigned i; 1403 enum ccp_xts_unitsize usize; 1404 1405 /* IV and Key data are already loaded */ 1406 1407 dev = qp->cq_softc->dev; 1408 1409 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++) 1410 if (ccp_xts_unitsize_map[i].cxu_size == 1411 crp->crp_payload_length) { 1412 usize = ccp_xts_unitsize_map[i].cxu_id; 1413 break; 1414 } 1415 if (i >= nitems(ccp_xts_unitsize_map)) 1416 return (EINVAL); 1417 1418 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1419 struct sglist_seg *seg; 1420 1421 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1422 1423 desc = &qp->desc_ring[qp->cq_tail]; 1424 desc->engine = CCP_ENGINE_XTS_AES; 1425 desc->som = (i == 0); 1426 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1427 desc->ioc = (desc->eom && cctx != NULL); 1428 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n", 1429 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1430 (int)desc->ioc, (int)dir); 1431 1432 if (desc->ioc) 1433 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1434 sizeof(*cctx)); 1435 1436 desc->aes_xts.encrypt = dir; 1437 desc->aes_xts.type = s->blkcipher.cipher_type; 1438 desc->aes_xts.size = usize; 1439 1440 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__, 1441 qp->cq_tail, (unsigned)desc->aes_xts.type, 1442 (unsigned)desc->aes_xts.size); 1443 1444 desc->length = seg->ss_len; 1445 desc->src_lo = (uint32_t)seg->ss_paddr; 1446 desc->src_hi = (seg->ss_paddr >> 32); 1447 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1448 1449 /* Crypt in-place */ 1450 desc->dst_lo = desc->src_lo; 1451 desc->dst_hi = desc->src_hi; 1452 desc->dst_mem = desc->src_mem; 1453 1454 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1455 desc->key_hi = 0; 1456 desc->key_mem = CCP_MEMTYPE_SB; 1457 1458 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1459 1460 qp->cq_tail = (qp->cq_tail + 1) % 1461 (1 << qp->cq_softc->ring_size_order); 1462 } 1463 return (0); 1464 } 1465 1466 static int __must_check 1467 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, 1468 struct cryptop *crp, const struct ccp_completion_ctx *cctx) 1469 { 1470 const struct crypto_session_params *csp; 1471 struct ccp_desc *desc; 1472 char *keydata; 1473 device_t dev; 1474 enum ccp_cipher_dir dir; 1475 int error, iv_len; 1476 size_t keydata_len; 1477 unsigned i, j; 1478 1479 dev = qp->cq_softc->dev; 1480 1481 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) { 1482 DPRINTF(dev, "%s: empty\n", __func__); 1483 return (EINVAL); 1484 } 1485 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { 1486 DPRINTF(dev, "%s: len modulo: %d\n", __func__, 1487 crp->crp_payload_length); 1488 return (EINVAL); 1489 } 1490 1491 /* 1492 * Individual segments must be multiples of AES block size for the HW 1493 * to process it. Non-compliant inputs aren't bogus, just not doable 1494 * on this hardware. 1495 */ 1496 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) 1497 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1498 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1499 qp->cq_sg_crp->sg_segs[i].ss_len); 1500 return (EINVAL); 1501 } 1502 1503 /* Gather IV/nonce data */ 1504 csp = crypto_get_params(crp->crp_session); 1505 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1506 iv_len = csp->csp_ivlen; 1507 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1508 iv_len = AES_BLOCK_LEN; 1509 1510 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1511 dir = CCP_CIPHER_DIR_ENCRYPT; 1512 else 1513 dir = CCP_CIPHER_DIR_DECRYPT; 1514 1515 /* Set up passthrough op(s) to copy IV into LSB */ 1516 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1517 s->blkcipher.iv, iv_len); 1518 if (error != 0) 1519 return (error); 1520 1521 /* 1522 * Initialize keydata and keydata_len for GCC. The default case of the 1523 * following switch is impossible to reach, but GCC doesn't know that. 1524 */ 1525 keydata_len = 0; 1526 keydata = NULL; 1527 1528 switch (csp->csp_cipher_alg) { 1529 case CRYPTO_AES_XTS: 1530 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++) 1531 if (ccp_xts_unitsize_map[j].cxu_size == 1532 crp->crp_payload_length) 1533 break; 1534 /* Input buffer must be a supported UnitSize */ 1535 if (j >= nitems(ccp_xts_unitsize_map)) { 1536 device_printf(dev, "%s: rejected block size: %u\n", 1537 __func__, crp->crp_payload_length); 1538 return (EOPNOTSUPP); 1539 } 1540 /* FALLTHROUGH */ 1541 case CRYPTO_AES_CBC: 1542 case CRYPTO_AES_ICM: 1543 keydata = s->blkcipher.enckey; 1544 keydata_len = s->blkcipher.key_len; 1545 break; 1546 } 1547 1548 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len, 1549 keydata, " "); 1550 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1551 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " "); 1552 1553 /* Reverse order of key material for HW */ 1554 ccp_byteswap(keydata, keydata_len); 1555 1556 /* Store key material into LSB to avoid page boundaries */ 1557 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) { 1558 /* 1559 * XTS mode uses 2 256-bit vectors for the primary key and the 1560 * tweak key. For 128-bit keys, the vectors are zero-padded. 1561 * 1562 * After byteswapping the combined OCF-provided K1:K2 vector 1563 * above, we need to reverse the order again so the hardware 1564 * gets the swapped keys in the order K1':K2'. 1565 */ 1566 error = ccp_do_pst_to_lsb(qp, 1567 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata, 1568 keydata_len / 2); 1569 if (error != 0) 1570 return (error); 1571 error = ccp_do_pst_to_lsb(qp, 1572 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1573 keydata + (keydata_len / 2), keydata_len / 2); 1574 1575 /* Zero-pad 128 bit keys */ 1576 if (keydata_len == 32) { 1577 if (error != 0) 1578 return (error); 1579 error = ccp_do_pst_to_lsb(qp, 1580 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) + 1581 keydata_len / 2, g_zeroes, keydata_len / 2); 1582 if (error != 0) 1583 return (error); 1584 error = ccp_do_pst_to_lsb(qp, 1585 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) + 1586 keydata_len / 2, g_zeroes, keydata_len / 2); 1587 } 1588 } else 1589 error = ccp_do_pst_to_lsb(qp, 1590 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata, 1591 keydata_len); 1592 if (error != 0) 1593 return (error); 1594 1595 /* 1596 * Point SGLs at the subset of cryptop buffer contents representing the 1597 * data. 1598 */ 1599 sglist_reset(qp->cq_sg_ulptx); 1600 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1601 crp->crp_payload_start, crp->crp_payload_length); 1602 if (error != 0) 1603 return (error); 1604 1605 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__, 1606 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); 1607 1608 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail); 1609 1610 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1611 return (EAGAIN); 1612 1613 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1614 return (ccp_do_xts(qp, s, crp, dir, cctx)); 1615 1616 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1617 struct sglist_seg *seg; 1618 1619 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1620 1621 desc = &qp->desc_ring[qp->cq_tail]; 1622 desc->engine = CCP_ENGINE_AES; 1623 desc->som = (i == 0); 1624 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1625 desc->ioc = (desc->eom && cctx != NULL); 1626 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", 1627 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1628 (int)desc->ioc, (int)dir); 1629 1630 if (desc->ioc) 1631 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1632 sizeof(*cctx)); 1633 1634 desc->aes.encrypt = dir; 1635 desc->aes.mode = s->blkcipher.cipher_mode; 1636 desc->aes.type = s->blkcipher.cipher_type; 1637 if (csp->csp_cipher_alg == CRYPTO_AES_ICM) 1638 /* 1639 * Size of CTR value in bits, - 1. ICM mode uses all 1640 * 128 bits as counter. 1641 */ 1642 desc->aes.size = 127; 1643 1644 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__, 1645 qp->cq_tail, (unsigned)desc->aes.mode, 1646 (unsigned)desc->aes.type, (unsigned)desc->aes.size); 1647 1648 desc->length = seg->ss_len; 1649 desc->src_lo = (uint32_t)seg->ss_paddr; 1650 desc->src_hi = (seg->ss_paddr >> 32); 1651 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1652 1653 /* Crypt in-place */ 1654 desc->dst_lo = desc->src_lo; 1655 desc->dst_hi = desc->src_hi; 1656 desc->dst_mem = desc->src_mem; 1657 1658 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1659 desc->key_hi = 0; 1660 desc->key_mem = CCP_MEMTYPE_SB; 1661 1662 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1663 1664 qp->cq_tail = (qp->cq_tail + 1) % 1665 (1 << qp->cq_softc->ring_size_order); 1666 } 1667 return (0); 1668 } 1669 1670 int __must_check 1671 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1672 { 1673 struct ccp_completion_ctx ctx; 1674 1675 ctx.callback_fn = ccp_blkcipher_done; 1676 ctx.session = s; 1677 ctx.callback_arg = crp; 1678 1679 return (ccp_do_blkcipher(qp, s, crp, &ctx)); 1680 } 1681 1682 static void 1683 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1684 int error) 1685 { 1686 struct cryptop *crp; 1687 1688 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1689 1690 crp = vcrp; 1691 1692 ccp_do_hmac_done(qp, s, crp, error); 1693 } 1694 1695 int __must_check 1696 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1697 { 1698 struct ccp_completion_ctx ctx; 1699 int error; 1700 1701 ctx.callback_fn = ccp_authenc_done; 1702 ctx.session = s; 1703 ctx.callback_arg = crp; 1704 1705 /* Perform first operation */ 1706 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1707 error = ccp_do_blkcipher(qp, s, crp, NULL); 1708 else 1709 error = ccp_do_hmac(qp, s, crp, NULL); 1710 if (error != 0) 1711 return (error); 1712 1713 /* Perform second operation */ 1714 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1715 error = ccp_do_hmac(qp, s, crp, &ctx); 1716 else 1717 error = ccp_do_blkcipher(qp, s, crp, &ctx); 1718 return (error); 1719 } 1720 1721 static int __must_check 1722 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) 1723 { 1724 struct ccp_desc *desc; 1725 struct sglist_seg *seg; 1726 unsigned i; 1727 1728 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1729 return (EAGAIN); 1730 1731 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1732 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1733 1734 desc = &qp->desc_ring[qp->cq_tail]; 1735 1736 desc->engine = CCP_ENGINE_AES; 1737 desc->aes.mode = CCP_AES_MODE_GHASH; 1738 desc->aes.type = s->blkcipher.cipher_type; 1739 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; 1740 1741 desc->som = (i == 0); 1742 desc->length = seg->ss_len; 1743 1744 desc->src_lo = (uint32_t)seg->ss_paddr; 1745 desc->src_hi = (seg->ss_paddr >> 32); 1746 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1747 1748 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1749 1750 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1751 desc->key_mem = CCP_MEMTYPE_SB; 1752 1753 qp->cq_tail = (qp->cq_tail + 1) % 1754 (1 << qp->cq_softc->ring_size_order); 1755 } 1756 return (0); 1757 } 1758 1759 static int __must_check 1760 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, 1761 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) 1762 { 1763 struct ccp_desc *desc; 1764 1765 if (ccp_queue_get_ring_space(qp) == 0) 1766 return (EAGAIN); 1767 1768 desc = &qp->desc_ring[qp->cq_tail]; 1769 1770 desc->engine = CCP_ENGINE_AES; 1771 desc->aes.mode = CCP_AES_MODE_GCTR; 1772 desc->aes.type = s->blkcipher.cipher_type; 1773 desc->aes.encrypt = dir; 1774 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; 1775 1776 desc->som = som; 1777 desc->eom = eom; 1778 1779 /* Trailing bytes will be masked off by aes.size above. */ 1780 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); 1781 1782 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; 1783 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; 1784 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; 1785 1786 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1787 1788 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1789 desc->key_mem = CCP_MEMTYPE_SB; 1790 1791 qp->cq_tail = (qp->cq_tail + 1) % 1792 (1 << qp->cq_softc->ring_size_order); 1793 return (0); 1794 } 1795 1796 static int __must_check 1797 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) 1798 { 1799 struct ccp_desc *desc; 1800 1801 if (ccp_queue_get_ring_space(qp) == 0) 1802 return (EAGAIN); 1803 1804 desc = &qp->desc_ring[qp->cq_tail]; 1805 1806 desc->engine = CCP_ENGINE_AES; 1807 desc->aes.mode = CCP_AES_MODE_GHASH; 1808 desc->aes.type = s->blkcipher.cipher_type; 1809 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; 1810 1811 desc->length = GMAC_BLOCK_LEN; 1812 1813 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); 1814 desc->src_mem = CCP_MEMTYPE_SB; 1815 1816 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1817 1818 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1819 desc->key_mem = CCP_MEMTYPE_SB; 1820 1821 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); 1822 desc->dst_mem = CCP_MEMTYPE_SB; 1823 1824 qp->cq_tail = (qp->cq_tail + 1) % 1825 (1 << qp->cq_softc->ring_size_order); 1826 return (0); 1827 } 1828 1829 static void 1830 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1831 int error) 1832 { 1833 char tag[GMAC_DIGEST_LEN]; 1834 struct cryptop *crp; 1835 1836 crp = vcrp; 1837 1838 s->pending--; 1839 1840 if (error != 0) { 1841 crp->crp_etype = error; 1842 goto out; 1843 } 1844 1845 /* Encrypt is done. Decrypt needs to verify tag. */ 1846 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1847 goto out; 1848 1849 /* Copy in message tag. */ 1850 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag); 1851 1852 /* Verify tag against computed GMAC */ 1853 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) 1854 crp->crp_etype = EBADMSG; 1855 1856 out: 1857 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1858 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block)); 1859 crypto_done(crp); 1860 } 1861 1862 int __must_check 1863 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1864 { 1865 const struct crypto_session_params *csp; 1866 struct ccp_completion_ctx ctx; 1867 enum ccp_cipher_dir dir; 1868 device_t dev; 1869 unsigned i; 1870 int error; 1871 1872 if (s->blkcipher.key_len == 0) 1873 return (EINVAL); 1874 1875 dev = qp->cq_softc->dev; 1876 1877 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1878 dir = CCP_CIPHER_DIR_ENCRYPT; 1879 else 1880 dir = CCP_CIPHER_DIR_DECRYPT; 1881 1882 /* Zero initial GHASH portion of context */ 1883 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); 1884 1885 /* Gather IV data */ 1886 csp = crypto_get_params(crp->crp_session); 1887 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1888 1889 /* Reverse order of key material for HW */ 1890 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); 1891 1892 /* Prepare input buffer of concatenated lengths for final GHASH */ 1893 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8); 1894 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8); 1895 1896 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ 1897 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1898 s->blkcipher.iv, 32); 1899 if (error != 0) 1900 return (error); 1901 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1902 s->blkcipher.enckey, s->blkcipher.key_len); 1903 if (error != 0) 1904 return (error); 1905 error = ccp_do_pst_to_lsb(qp, 1906 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, 1907 GMAC_BLOCK_LEN); 1908 if (error != 0) 1909 return (error); 1910 1911 /* First step - compute GHASH over AAD */ 1912 if (crp->crp_aad_length != 0) { 1913 sglist_reset(qp->cq_sg_ulptx); 1914 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1915 crp->crp_aad_start, crp->crp_aad_length); 1916 if (error != 0) 1917 return (error); 1918 1919 /* This engine cannot process non-block multiple AAD data. */ 1920 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1921 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % 1922 GMAC_BLOCK_LEN) != 0) { 1923 DPRINTF(dev, "%s: AD seg modulo: %zu\n", 1924 __func__, 1925 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1926 return (EINVAL); 1927 } 1928 1929 error = ccp_do_ghash_aad(qp, s); 1930 if (error != 0) 1931 return (error); 1932 } 1933 1934 /* Feed data piece by piece into GCTR */ 1935 sglist_reset(qp->cq_sg_ulptx); 1936 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1937 crp->crp_payload_start, crp->crp_payload_length); 1938 if (error != 0) 1939 return (error); 1940 1941 /* 1942 * All segments except the last must be even multiples of AES block 1943 * size for the HW to process it. Non-compliant inputs aren't bogus, 1944 * just not doable on this hardware. 1945 * 1946 * XXX: Well, the hardware will produce a valid tag for shorter final 1947 * segment inputs, but it will still write out a block-sized plaintext 1948 * or ciphertext chunk. For a typical CRP this tramples trailing data, 1949 * including the provided message tag. So, reject such inputs for now. 1950 */ 1951 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1952 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1953 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1954 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1955 return (EINVAL); 1956 } 1957 1958 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1959 struct sglist_seg *seg; 1960 1961 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1962 error = ccp_do_gctr(qp, s, dir, seg, 1963 (i == 0 && crp->crp_aad_length == 0), 1964 i == (qp->cq_sg_ulptx->sg_nseg - 1)); 1965 if (error != 0) 1966 return (error); 1967 } 1968 1969 /* Send just initial IV (not GHASH!) to LSB again */ 1970 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1971 s->blkcipher.iv, AES_BLOCK_LEN); 1972 if (error != 0) 1973 return (error); 1974 1975 ctx.callback_fn = ccp_gcm_done; 1976 ctx.session = s; 1977 ctx.callback_arg = crp; 1978 1979 /* Compute final hash and copy result back */ 1980 error = ccp_do_ghash_final(qp, s); 1981 if (error != 0) 1982 return (error); 1983 1984 /* When encrypting, copy computed tag out to caller buffer. */ 1985 sglist_reset(qp->cq_sg_ulptx); 1986 if (dir == CCP_CIPHER_DIR_ENCRYPT) 1987 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1988 crp->crp_digest_start, s->gmac.hash_len); 1989 else 1990 /* 1991 * For decrypting, copy the computed tag out to our session 1992 * buffer to verify in our callback. 1993 */ 1994 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, 1995 s->gmac.hash_len); 1996 if (error != 0) 1997 return (error); 1998 error = ccp_passthrough_sgl(qp, 1999 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, 2000 s->gmac.hash_len, true, &ctx); 2001 return (error); 2002 } 2003 2004 #define MAX_TRNG_RETRIES 10 2005 u_int 2006 random_ccp_read(void *v, u_int c) 2007 { 2008 uint32_t *buf; 2009 u_int i, j; 2010 2011 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); 2012 2013 buf = v; 2014 for (i = c; i > 0; i -= sizeof(*buf)) { 2015 for (j = 0; j < MAX_TRNG_RETRIES; j++) { 2016 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); 2017 if (*buf != 0) 2018 break; 2019 } 2020 if (j == MAX_TRNG_RETRIES) 2021 return (0); 2022 buf++; 2023 } 2024 return (c); 2025 2026 } 2027 2028 #ifdef DDB 2029 void 2030 db_ccp_show_hw(struct ccp_softc *sc) 2031 { 2032 2033 db_printf(" queue mask: 0x%x\n", 2034 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); 2035 db_printf(" queue prio: 0x%x\n", 2036 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); 2037 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); 2038 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); 2039 db_printf(" cmd timeout: 0x%x\n", 2040 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); 2041 db_printf(" lsb public mask lo: 0x%x\n", 2042 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); 2043 db_printf(" lsb public mask hi: 0x%x\n", 2044 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); 2045 db_printf(" lsb private mask lo: 0x%x\n", 2046 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); 2047 db_printf(" lsb private mask hi: 0x%x\n", 2048 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); 2049 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); 2050 } 2051 2052 void 2053 db_ccp_show_queue_hw(struct ccp_queue *qp) 2054 { 2055 const struct ccp_error_code *ec; 2056 struct ccp_softc *sc; 2057 uint32_t status, error, esource, faultblock, headlo, qcontrol; 2058 unsigned q, i; 2059 2060 sc = qp->cq_softc; 2061 q = qp->cq_qindex; 2062 2063 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); 2064 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, 2065 (qcontrol & CMD_Q_RUN) ? " RUN" : "", 2066 (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); 2067 db_printf(" tail_lo: 0x%x\n", 2068 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); 2069 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 2070 db_printf(" head_lo: 0x%x\n", headlo); 2071 db_printf(" int enable: 0x%x\n", 2072 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); 2073 db_printf(" interrupt status: 0x%x\n", 2074 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); 2075 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 2076 db_printf(" status: 0x%x\n", status); 2077 db_printf(" int stats: 0x%x\n", 2078 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); 2079 2080 error = status & STATUS_ERROR_MASK; 2081 if (error == 0) 2082 return; 2083 2084 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 2085 STATUS_ERRORSOURCE_MASK; 2086 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 2087 STATUS_VLSB_FAULTBLOCK_MASK; 2088 2089 ec = NULL; 2090 for (i = 0; i < nitems(ccp_error_codes); i++) 2091 if (ccp_error_codes[i].ce_code == error) 2092 break; 2093 if (i < nitems(ccp_error_codes)) 2094 ec = &ccp_error_codes[i]; 2095 2096 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", 2097 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 2098 faultblock); 2099 if (ec != NULL) 2100 db_printf(" Error description: %s\n", ec->ce_desc); 2101 2102 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 2103 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, 2104 (void *)&qp->desc_ring[i], " "); 2105 } 2106 #endif 2107