1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017 Chelsio Communications, Inc. 5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> 6 * All rights reserved. 7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_ddb.h" 33 34 #include <sys/param.h> 35 #include <sys/bus.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/module.h> 41 #include <sys/rman.h> 42 #include <sys/sglist.h> 43 #include <sys/sysctl.h> 44 45 #ifdef DDB 46 #include <ddb/ddb.h> 47 #endif 48 49 #include <dev/pci/pcireg.h> 50 #include <dev/pci/pcivar.h> 51 52 #include <machine/bus.h> 53 #include <machine/resource.h> 54 #include <machine/vmparam.h> 55 56 #include <opencrypto/cryptodev.h> 57 #include <opencrypto/xform.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include "cryptodev_if.h" 63 64 #include "ccp.h" 65 #include "ccp_hardware.h" 66 #include "ccp_lsb.h" 67 68 CTASSERT(sizeof(struct ccp_desc) == 32); 69 70 static struct ccp_xts_unitsize_map_entry { 71 enum ccp_xts_unitsize cxu_id; 72 unsigned cxu_size; 73 } ccp_xts_unitsize_map[] = { 74 { CCP_XTS_AES_UNIT_SIZE_16, 16 }, 75 { CCP_XTS_AES_UNIT_SIZE_512, 512 }, 76 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 }, 77 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 }, 78 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 }, 79 }; 80 81 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 82 "ccp node"); 83 84 unsigned g_ccp_ring_order = 11; 85 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, 86 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); 87 88 /* 89 * Zero buffer, sufficient for padding LSB entries, that does not span a page 90 * boundary 91 */ 92 static const char g_zeroes[32] __aligned(32); 93 94 static inline uint32_t 95 ccp_read_4(struct ccp_softc *sc, uint32_t offset) 96 { 97 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); 98 } 99 100 static inline void 101 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) 102 { 103 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); 104 } 105 106 static inline uint32_t 107 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) 108 { 109 /* 110 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. 111 */ 112 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); 113 } 114 115 static inline void 116 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, 117 uint32_t value) 118 { 119 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); 120 } 121 122 void 123 ccp_queue_write_tail(struct ccp_queue *qp) 124 { 125 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, 126 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); 127 } 128 129 /* 130 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of 131 * that entry for the queue's private LSB region. 132 */ 133 static inline uint8_t 134 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) 135 { 136 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); 137 } 138 139 /* 140 * Given a queue and a reserved LSB entry index, compute the LSB *address* of 141 * that entry for the queue's private LSB region. 142 */ 143 static inline uint32_t 144 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) 145 { 146 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); 147 } 148 149 /* 150 * Some terminology: 151 * 152 * LSB - Local Storage Block 153 * ========================= 154 * 155 * 8 segments/regions, each containing 16 entries. 156 * 157 * Each entry contains 256 bits (32 bytes). 158 * 159 * Segments are virtually addressed in commands, but accesses cannot cross 160 * segment boundaries. Virtual map uses an identity mapping by default 161 * (virtual segment N corresponds to physical segment N). 162 * 163 * Access to a physical region can be restricted to any subset of all five 164 * queues. 165 * 166 * "Pass-through" mode 167 * =================== 168 * 169 * Pass-through is a generic DMA engine, much like ioat(4). Some nice 170 * features: 171 * 172 * - Supports byte-swapping for endian conversion (32- or 256-bit words) 173 * - AND, OR, XOR with fixed 256-bit mask 174 * - CRC32 of data (may be used in tandem with bswap, but not bit operations) 175 * - Read/write of LSB 176 * - Memset 177 * 178 * If bit manipulation mode is enabled, input must be a multiple of 256 bits 179 * (32 bytes). 180 * 181 * If byte-swapping is enabled, input must be a multiple of the word size. 182 * 183 * Zlib mode -- only usable from one queue at a time, single job at a time. 184 * ======================================================================== 185 * 186 * Only usable from private host, aka PSP? Not host processor? 187 * 188 * RNG. 189 * ==== 190 * 191 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in 192 * a ring buffer readable by software. 193 * 194 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are 195 * implemented on the raw input stream and may be enabled to verify min-entropy 196 * of 0.5 bits per bit. 197 */ 198 199 static void 200 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 201 { 202 bus_addr_t *baddr; 203 204 KASSERT(error == 0, ("%s: error:%d", __func__, error)); 205 baddr = arg; 206 *baddr = segs->ds_addr; 207 } 208 209 static int 210 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) 211 { 212 struct ccp_softc *sc; 213 struct ccp_queue *qp; 214 void *desc; 215 size_t ringsz, num_descriptors; 216 int error; 217 218 desc = NULL; 219 sc = device_get_softc(dev); 220 qp = &sc->queues[queue]; 221 222 /* 223 * Don't bother allocating a ring for queues the host isn't allowed to 224 * drive. 225 */ 226 if ((sc->valid_queues & (1 << queue)) == 0) 227 return (0); 228 229 ccp_queue_decode_lsb_regions(sc, lsbmask, queue); 230 231 /* Ignore queues that do not have any LSB access. */ 232 if (qp->lsb_mask == 0) { 233 device_printf(dev, "Ignoring queue %u with no LSB access\n", 234 queue); 235 sc->valid_queues &= ~(1 << queue); 236 return (0); 237 } 238 239 num_descriptors = 1 << sc->ring_size_order; 240 ringsz = sizeof(struct ccp_desc) * num_descriptors; 241 242 /* 243 * "Queue_Size" is order - 1. 244 * 245 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. 246 */ 247 error = bus_dma_tag_create(bus_get_dma_tag(dev), 248 1 << (5 + sc->ring_size_order), 249 #if defined(__i386__) && !defined(PAE) 250 0, BUS_SPACE_MAXADDR, 251 #else 252 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, 253 #endif 254 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, 255 ringsz, 0, NULL, NULL, &qp->ring_desc_tag); 256 if (error != 0) 257 goto out; 258 259 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, 260 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); 261 if (error != 0) 262 goto out; 263 264 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, 265 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); 266 if (error != 0) 267 goto out; 268 269 qp->desc_ring = desc; 270 qp->completions_ring = malloc(num_descriptors * 271 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); 272 273 /* Zero control register; among other things, clears the RUN flag. */ 274 qp->qcontrol = 0; 275 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 276 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); 277 278 /* Clear any leftover interrupt status flags */ 279 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, 280 ALL_INTERRUPTS); 281 282 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; 283 284 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, 285 (uint32_t)qp->desc_ring_bus_addr); 286 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, 287 (uint32_t)qp->desc_ring_bus_addr); 288 289 /* 290 * Enable completion interrupts, as well as error or administrative 291 * halt interrupts. We don't use administrative halts, but they 292 * shouldn't trip unless we do, so it ought to be harmless. 293 */ 294 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 295 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 296 297 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; 298 qp->qcontrol |= CMD_Q_RUN; 299 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 300 301 out: 302 if (error != 0) { 303 if (qp->desc_ring != NULL) 304 bus_dmamap_unload(qp->ring_desc_tag, 305 qp->ring_desc_map); 306 if (desc != NULL) 307 bus_dmamem_free(qp->ring_desc_tag, desc, 308 qp->ring_desc_map); 309 if (qp->ring_desc_tag != NULL) 310 bus_dma_tag_destroy(qp->ring_desc_tag); 311 } 312 return (error); 313 } 314 315 static void 316 ccp_hw_detach_queue(device_t dev, unsigned queue) 317 { 318 struct ccp_softc *sc; 319 struct ccp_queue *qp; 320 321 sc = device_get_softc(dev); 322 qp = &sc->queues[queue]; 323 324 /* 325 * Don't bother allocating a ring for queues the host isn't allowed to 326 * drive. 327 */ 328 if ((sc->valid_queues & (1 << queue)) == 0) 329 return; 330 331 free(qp->completions_ring, M_CCP); 332 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); 333 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); 334 bus_dma_tag_destroy(qp->ring_desc_tag); 335 } 336 337 static int 338 ccp_map_pci_bar(device_t dev) 339 { 340 struct ccp_softc *sc; 341 342 sc = device_get_softc(dev); 343 344 sc->pci_resource_id = PCIR_BAR(2); 345 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 346 &sc->pci_resource_id, RF_ACTIVE); 347 if (sc->pci_resource == NULL) { 348 device_printf(dev, "unable to allocate pci resource\n"); 349 return (ENODEV); 350 } 351 352 sc->pci_resource_id_msix = PCIR_BAR(5); 353 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 354 &sc->pci_resource_id_msix, RF_ACTIVE); 355 if (sc->pci_resource_msix == NULL) { 356 device_printf(dev, "unable to allocate pci resource msix\n"); 357 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 358 sc->pci_resource); 359 return (ENODEV); 360 } 361 362 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); 363 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); 364 return (0); 365 } 366 367 static void 368 ccp_unmap_pci_bar(device_t dev) 369 { 370 struct ccp_softc *sc; 371 372 sc = device_get_softc(dev); 373 374 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, 375 sc->pci_resource_msix); 376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 377 sc->pci_resource); 378 } 379 380 const static struct ccp_error_code { 381 uint8_t ce_code; 382 const char *ce_name; 383 int ce_errno; 384 const char *ce_desc; 385 } ccp_error_codes[] = { 386 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, 387 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, 388 "A non-supported function type was specified" }, 389 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, 390 "A non-supported function mode was specified" }, 391 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, 392 "A CMAC type was specified when ENCRYPT was not specified" }, 393 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, 394 "A non-supported function size was specified.\n" 395 "AES-CFB: Size was not 127 or 7;\n" 396 "3DES-CFB: Size was not 7;\n" 397 "RSA: See supported size table (7.4.2);\n" 398 "ECC: Size was greater than 576 bits." }, 399 { 0x07, "Zlib_MISSING_INIT_EOM", EIO, 400 "Zlib command does not have INIT and EOM set" }, 401 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, 402 "Reserved bits in a function specification were not 0" }, 403 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, 404 "The buffer length specified was not correct for the selected engine" 405 }, 406 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" 407 "Undefined VLSB segment mapping or\n" 408 "mapping to unsupported LSB segment id" }, 409 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, 410 "The specified source/destination buffer access was illegal:\n" 411 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" 412 "Data buffer not completely contained within a single segment; or\n" 413 "Pointer with Fixed=1 is not 32-bit aligned; or\n" 414 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." 415 }, 416 { 0x0C, "ILLEGAL_MEM_SEL", EIO, 417 "A src_mem, dst_mem, or key_mem field was illegal:\n" 418 "A field was set to a reserved value; or\n" 419 "A public command attempted to reference AXI1 (local) or GART memory; or\n" 420 "A Zlib command attmpted to use the LSB." }, 421 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, 422 "The specified context location was illegal:\n" 423 "Context located in a LSB location disallowed by the LSB protection masks; or\n" 424 "Context not completely contained within a single segment." }, 425 { 0x0E, "ILLEGAL_KEY_ADDR", EIO, 426 "The specified key location was illegal:\n" 427 "Key located in a LSB location disallowed by the LSB protection masks; or\n" 428 "Key not completely contained within a single segment." }, 429 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, 430 /* XXX Could fill out these descriptions too */ 431 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, 432 { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, 433 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, 434 { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, 435 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, 436 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, 437 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, 438 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, 439 { 0x1E, "ZLIB_BTYPE", EIO, "" }, 440 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, 441 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, 442 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, 443 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, 444 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, 445 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, 446 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, 447 { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, 448 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, 449 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, 450 { 0x2B, "LSB_PARITY_ERR", EIO, 451 "A read from the LSB encountered a parity error" }, 452 }; 453 454 static void 455 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) 456 { 457 struct ccp_completion_ctx *cctx; 458 const struct ccp_error_code *ec; 459 struct ccp_softc *sc; 460 uint32_t status, error, esource, faultblock; 461 unsigned q, idx; 462 int errno; 463 464 sc = qp->cq_softc; 465 q = qp->cq_qindex; 466 467 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 468 469 error = status & STATUS_ERROR_MASK; 470 471 /* Decode error status */ 472 ec = NULL; 473 for (idx = 0; idx < nitems(ccp_error_codes); idx++) 474 if (ccp_error_codes[idx].ce_code == error) { 475 ec = &ccp_error_codes[idx]; 476 break; 477 } 478 479 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 480 STATUS_ERRORSOURCE_MASK; 481 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 482 STATUS_VLSB_FAULTBLOCK_MASK; 483 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", 484 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 485 faultblock); 486 if (ec != NULL) 487 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); 488 489 /* TODO Could format the desc nicely here */ 490 idx = desc - qp->desc_ring; 491 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx, 492 (const void *)desc, " "); 493 494 /* 495 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, 496 * Zlib Decompress status may be interesting. 497 */ 498 499 while (true) { 500 /* Keep unused descriptors zero for next use. */ 501 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); 502 503 cctx = &qp->completions_ring[idx]; 504 505 /* 506 * Restart procedure described in § 14.2.5. Could be used by HoC if we 507 * used that. 508 * 509 * Advance HEAD_LO past bad descriptor + any remaining in 510 * transaction manually, then restart queue. 511 */ 512 idx = (idx + 1) % (1 << sc->ring_size_order); 513 514 /* Callback function signals end of transaction */ 515 if (cctx->callback_fn != NULL) { 516 if (ec == NULL) 517 errno = EIO; 518 else 519 errno = ec->ce_errno; 520 /* TODO More specific error code */ 521 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); 522 cctx->callback_fn = NULL; 523 break; 524 } 525 } 526 527 qp->cq_head = idx; 528 qp->cq_waiting = false; 529 wakeup(&qp->cq_tail); 530 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 531 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, 532 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); 533 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); 534 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__); 535 } 536 537 static void 538 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) 539 { 540 struct ccp_completion_ctx *cctx; 541 struct ccp_softc *sc; 542 const struct ccp_desc *desc; 543 uint32_t headlo, idx; 544 unsigned q, completed; 545 546 sc = qp->cq_softc; 547 q = qp->cq_qindex; 548 549 mtx_lock(&qp->cq_lock); 550 551 /* 552 * Hardware HEAD_LO points to the first incomplete descriptor. Process 553 * any submitted and completed descriptors, up to but not including 554 * HEAD_LO. 555 */ 556 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 557 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 558 559 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, 560 qp->cq_head); 561 completed = 0; 562 while (qp->cq_head != idx) { 563 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head); 564 565 cctx = &qp->completions_ring[qp->cq_head]; 566 if (cctx->callback_fn != NULL) { 567 cctx->callback_fn(qp, cctx->session, 568 cctx->callback_arg, 0); 569 cctx->callback_fn = NULL; 570 } 571 572 /* Keep unused descriptors zero for next use. */ 573 memset(&qp->desc_ring[qp->cq_head], 0, 574 sizeof(qp->desc_ring[qp->cq_head])); 575 576 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); 577 completed++; 578 } 579 if (completed > 0) { 580 qp->cq_waiting = false; 581 wakeup(&qp->cq_tail); 582 } 583 584 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 585 586 /* 587 * Desc points to the first incomplete descriptor, at the time we read 588 * HEAD_LO. If there was an error flagged in interrupt status, the HW 589 * will not proceed past the erroneous descriptor by itself. 590 */ 591 desc = &qp->desc_ring[idx]; 592 if ((ints & INT_ERROR) != 0) 593 ccp_intr_handle_error(qp, desc); 594 595 mtx_unlock(&qp->cq_lock); 596 } 597 598 static void 599 ccp_intr_handler(void *arg) 600 { 601 struct ccp_softc *sc = arg; 602 size_t i; 603 uint32_t ints; 604 605 DPRINTF(sc->dev, "%s: interrupt\n", __func__); 606 607 /* 608 * We get one global interrupt per PCI device, shared over all of 609 * its queues. Scan each valid queue on interrupt for flags indicating 610 * activity. 611 */ 612 for (i = 0; i < nitems(sc->queues); i++) { 613 if ((sc->valid_queues & (1 << i)) == 0) 614 continue; 615 616 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); 617 if (ints == 0) 618 continue; 619 620 #if 0 621 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__, 622 (unsigned)ints, i); 623 #endif 624 /* Write back 1s to clear interrupt status bits. */ 625 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); 626 627 /* 628 * If there was an error, we still need to run completions on 629 * any descriptors prior to the error. The completions handler 630 * invoked below will also handle the error descriptor. 631 */ 632 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) 633 ccp_intr_run_completions(&sc->queues[i], ints); 634 635 if ((ints & INT_QUEUE_STOPPED) != 0) 636 device_printf(sc->dev, "%s: queue %zu stopped\n", 637 __func__, i); 638 } 639 640 /* Re-enable interrupts after processing */ 641 for (i = 0; i < nitems(sc->queues); i++) { 642 if ((sc->valid_queues & (1 << i)) == 0) 643 continue; 644 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 645 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 646 } 647 } 648 649 static int 650 ccp_intr_filter(void *arg) 651 { 652 struct ccp_softc *sc = arg; 653 size_t i; 654 655 /* TODO: Split individual queues into separate taskqueues? */ 656 for (i = 0; i < nitems(sc->queues); i++) { 657 if ((sc->valid_queues & (1 << i)) == 0) 658 continue; 659 660 /* Mask interrupt until task completes */ 661 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); 662 } 663 664 return (FILTER_SCHEDULE_THREAD); 665 } 666 667 static int 668 ccp_setup_interrupts(struct ccp_softc *sc) 669 { 670 uint32_t nvec; 671 int rid, error, n, ridcopy; 672 673 n = pci_msix_count(sc->dev); 674 if (n < 1) { 675 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n); 676 return (ENXIO); 677 } 678 679 nvec = n; 680 error = pci_alloc_msix(sc->dev, &nvec); 681 if (error != 0) { 682 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__, 683 error); 684 return (error); 685 } 686 if (nvec < 1) { 687 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n", 688 __func__); 689 return (ENXIO); 690 } 691 if (nvec > nitems(sc->intr_res)) { 692 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__, 693 nvec); 694 nvec = nitems(sc->intr_res); 695 } 696 697 for (rid = 1; rid < 1 + nvec; rid++) { 698 ridcopy = rid; 699 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, 700 SYS_RES_IRQ, &ridcopy, RF_ACTIVE); 701 if (sc->intr_res[rid - 1] == NULL) { 702 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", 703 __func__); 704 return (ENXIO); 705 } 706 707 sc->intr_tag[rid - 1] = NULL; 708 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], 709 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, 710 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); 711 if (error != 0) 712 device_printf(sc->dev, "%s: setup_intr: %d\n", 713 __func__, error); 714 } 715 sc->intr_count = nvec; 716 717 return (error); 718 } 719 720 static void 721 ccp_release_interrupts(struct ccp_softc *sc) 722 { 723 unsigned i; 724 725 for (i = 0; i < sc->intr_count; i++) { 726 if (sc->intr_tag[i] != NULL) 727 bus_teardown_intr(sc->dev, sc->intr_res[i], 728 sc->intr_tag[i]); 729 if (sc->intr_res[i] != NULL) 730 bus_release_resource(sc->dev, SYS_RES_IRQ, 731 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); 732 } 733 734 pci_release_msi(sc->dev); 735 } 736 737 int 738 ccp_hw_attach(device_t dev) 739 { 740 struct ccp_softc *sc; 741 uint64_t lsbmask; 742 uint32_t version, lsbmasklo, lsbmaskhi; 743 unsigned queue_idx, j; 744 int error; 745 bool bars_mapped, interrupts_setup; 746 747 queue_idx = 0; 748 bars_mapped = interrupts_setup = false; 749 sc = device_get_softc(dev); 750 751 error = ccp_map_pci_bar(dev); 752 if (error != 0) { 753 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__); 754 goto out; 755 } 756 bars_mapped = true; 757 758 error = pci_enable_busmaster(dev); 759 if (error != 0) { 760 device_printf(dev, "%s: couldn't enable busmaster\n", 761 __func__); 762 goto out; 763 } 764 765 sc->ring_size_order = g_ccp_ring_order; 766 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { 767 device_printf(dev, "bogus hw.ccp.ring_order\n"); 768 error = EINVAL; 769 goto out; 770 } 771 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); 772 773 version = ccp_read_4(sc, VERSION_REG); 774 if ((version & VERSION_NUM_MASK) < 5) { 775 device_printf(dev, 776 "driver supports version 5 and later hardware\n"); 777 error = ENXIO; 778 goto out; 779 } 780 781 error = ccp_setup_interrupts(sc); 782 if (error != 0) 783 goto out; 784 interrupts_setup = true; 785 786 sc->hw_version = version & VERSION_NUM_MASK; 787 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & 788 VERSION_NUMVQM_MASK; 789 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & 790 VERSION_LSBSIZE_MASK; 791 sc->hw_features = version & VERSION_CAP_MASK; 792 793 /* 794 * Copy private LSB mask to public registers to enable access to LSB 795 * from all queues allowed by BIOS. 796 */ 797 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); 798 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); 799 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); 800 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); 801 802 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; 803 804 for (; queue_idx < nitems(sc->queues); queue_idx++) { 805 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); 806 if (error != 0) { 807 device_printf(dev, "%s: couldn't attach queue %u\n", 808 __func__, queue_idx); 809 goto out; 810 } 811 } 812 ccp_assign_lsb_regions(sc, lsbmask); 813 814 out: 815 if (error != 0) { 816 if (interrupts_setup) 817 ccp_release_interrupts(sc); 818 for (j = 0; j < queue_idx; j++) 819 ccp_hw_detach_queue(dev, j); 820 if (sc->ring_size_order != 0) 821 pci_disable_busmaster(dev); 822 if (bars_mapped) 823 ccp_unmap_pci_bar(dev); 824 } 825 return (error); 826 } 827 828 void 829 ccp_hw_detach(device_t dev) 830 { 831 struct ccp_softc *sc; 832 unsigned i; 833 834 sc = device_get_softc(dev); 835 836 for (i = 0; i < nitems(sc->queues); i++) 837 ccp_hw_detach_queue(dev, i); 838 839 ccp_release_interrupts(sc); 840 pci_disable_busmaster(dev); 841 ccp_unmap_pci_bar(dev); 842 } 843 844 static int __must_check 845 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, 846 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, 847 bus_size_t len, enum ccp_passthru_byteswap swapmode, 848 enum ccp_passthru_bitwise bitmode, bool interrupt, 849 const struct ccp_completion_ctx *cctx) 850 { 851 struct ccp_desc *desc; 852 853 if (ccp_queue_get_ring_space(qp) == 0) 854 return (EAGAIN); 855 856 desc = &qp->desc_ring[qp->cq_tail]; 857 858 memset(desc, 0, sizeof(*desc)); 859 desc->engine = CCP_ENGINE_PASSTHRU; 860 861 desc->pt.ioc = interrupt; 862 desc->pt.byteswap = swapmode; 863 desc->pt.bitwise = bitmode; 864 desc->length = len; 865 866 desc->src_lo = (uint32_t)src; 867 desc->src_hi = src >> 32; 868 desc->src_mem = src_type; 869 870 desc->dst_lo = (uint32_t)dst; 871 desc->dst_hi = dst >> 32; 872 desc->dst_mem = dst_type; 873 874 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) 875 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); 876 877 if (cctx != NULL) 878 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); 879 880 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 881 return (0); 882 } 883 884 static int __must_check 885 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, 886 struct sglist *sgl, bus_size_t len, bool interrupt, 887 const struct ccp_completion_ctx *cctx) 888 { 889 struct sglist_seg *seg; 890 size_t i, remain, nb; 891 int error; 892 893 remain = len; 894 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { 895 seg = &sgl->sg_segs[i]; 896 /* crp lengths are int, so 32-bit min() is ok. */ 897 nb = min(remain, seg->ss_len); 898 899 if (tolsb) 900 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, 901 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, 902 CCP_PASSTHRU_BYTESWAP_NOOP, 903 CCP_PASSTHRU_BITWISE_NOOP, 904 (nb == remain) && interrupt, cctx); 905 else 906 error = ccp_passthrough(qp, seg->ss_paddr, 907 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, 908 CCP_PASSTHRU_BYTESWAP_NOOP, 909 CCP_PASSTHRU_BITWISE_NOOP, 910 (nb == remain) && interrupt, cctx); 911 if (error != 0) 912 return (error); 913 914 remain -= nb; 915 } 916 return (0); 917 } 918 919 /* 920 * Note that these vectors are in reverse of the usual order. 921 */ 922 const struct SHA_vectors { 923 uint32_t SHA1[8]; 924 uint32_t SHA224[8]; 925 uint32_t SHA256[8]; 926 uint64_t SHA384[8]; 927 uint64_t SHA512[8]; 928 } SHA_H __aligned(PAGE_SIZE) = { 929 .SHA1 = { 930 0xc3d2e1f0ul, 931 0x10325476ul, 932 0x98badcfeul, 933 0xefcdab89ul, 934 0x67452301ul, 935 0, 936 0, 937 0, 938 }, 939 .SHA224 = { 940 0xbefa4fa4ul, 941 0x64f98fa7ul, 942 0x68581511ul, 943 0xffc00b31ul, 944 0xf70e5939ul, 945 0x3070dd17ul, 946 0x367cd507ul, 947 0xc1059ed8ul, 948 }, 949 .SHA256 = { 950 0x5be0cd19ul, 951 0x1f83d9abul, 952 0x9b05688cul, 953 0x510e527ful, 954 0xa54ff53aul, 955 0x3c6ef372ul, 956 0xbb67ae85ul, 957 0x6a09e667ul, 958 }, 959 .SHA384 = { 960 0x47b5481dbefa4fa4ull, 961 0xdb0c2e0d64f98fa7ull, 962 0x8eb44a8768581511ull, 963 0x67332667ffc00b31ull, 964 0x152fecd8f70e5939ull, 965 0x9159015a3070dd17ull, 966 0x629a292a367cd507ull, 967 0xcbbb9d5dc1059ed8ull, 968 }, 969 .SHA512 = { 970 0x5be0cd19137e2179ull, 971 0x1f83d9abfb41bd6bull, 972 0x9b05688c2b3e6c1full, 973 0x510e527fade682d1ull, 974 0xa54ff53a5f1d36f1ull, 975 0x3c6ef372fe94f82bull, 976 0xbb67ae8584caa73bull, 977 0x6a09e667f3bcc908ull, 978 }, 979 }; 980 /* 981 * Ensure vectors do not cross a page boundary. 982 * 983 * Disabled due to a new Clang error: "expression is not an integral constant 984 * expression." GCC (cross toolchain) seems to handle this assertion with 985 * _Static_assert just fine. 986 */ 987 #if 0 988 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); 989 #endif 990 991 const struct SHA_Defn { 992 enum sha_version version; 993 const void *H_vectors; 994 size_t H_size; 995 const struct auth_hash *axf; 996 enum ccp_sha_type engine_type; 997 } SHA_definitions[] = { 998 { 999 .version = SHA1, 1000 .H_vectors = SHA_H.SHA1, 1001 .H_size = sizeof(SHA_H.SHA1), 1002 .axf = &auth_hash_hmac_sha1, 1003 .engine_type = CCP_SHA_TYPE_1, 1004 }, 1005 #if 0 1006 { 1007 .version = SHA2_224, 1008 .H_vectors = SHA_H.SHA224, 1009 .H_size = sizeof(SHA_H.SHA224), 1010 .axf = &auth_hash_hmac_sha2_224, 1011 .engine_type = CCP_SHA_TYPE_224, 1012 }, 1013 #endif 1014 { 1015 .version = SHA2_256, 1016 .H_vectors = SHA_H.SHA256, 1017 .H_size = sizeof(SHA_H.SHA256), 1018 .axf = &auth_hash_hmac_sha2_256, 1019 .engine_type = CCP_SHA_TYPE_256, 1020 }, 1021 { 1022 .version = SHA2_384, 1023 .H_vectors = SHA_H.SHA384, 1024 .H_size = sizeof(SHA_H.SHA384), 1025 .axf = &auth_hash_hmac_sha2_384, 1026 .engine_type = CCP_SHA_TYPE_384, 1027 }, 1028 { 1029 .version = SHA2_512, 1030 .H_vectors = SHA_H.SHA512, 1031 .H_size = sizeof(SHA_H.SHA512), 1032 .axf = &auth_hash_hmac_sha2_512, 1033 .engine_type = CCP_SHA_TYPE_512, 1034 }, 1035 }; 1036 1037 static int __must_check 1038 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, 1039 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) 1040 { 1041 struct ccp_desc *desc; 1042 1043 if (ccp_queue_get_ring_space(qp) == 0) 1044 return (EAGAIN); 1045 1046 desc = &qp->desc_ring[qp->cq_tail]; 1047 1048 memset(desc, 0, sizeof(*desc)); 1049 desc->engine = CCP_ENGINE_SHA; 1050 desc->som = start; 1051 desc->eom = end; 1052 1053 desc->sha.type = defn->engine_type; 1054 desc->length = len; 1055 1056 if (end) { 1057 desc->sha_len_lo = (uint32_t)msgbits; 1058 desc->sha_len_hi = msgbits >> 32; 1059 } 1060 1061 desc->src_lo = (uint32_t)addr; 1062 desc->src_hi = addr >> 32; 1063 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1064 1065 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); 1066 1067 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 1068 return (0); 1069 } 1070 1071 static int __must_check 1072 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, 1073 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) 1074 { 1075 const struct SHA_Defn *defn; 1076 struct sglist_seg *seg; 1077 size_t i, msgsize, remaining, nb; 1078 uint32_t lsbaddr; 1079 int error; 1080 1081 for (i = 0; i < nitems(SHA_definitions); i++) 1082 if (SHA_definitions[i].version == version) 1083 break; 1084 if (i == nitems(SHA_definitions)) 1085 return (EINVAL); 1086 defn = &SHA_definitions[i]; 1087 1088 /* XXX validate input ??? */ 1089 1090 /* Load initial SHA state into LSB */ 1091 /* XXX ensure H_vectors don't span page boundaries */ 1092 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), 1093 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), 1094 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), 1095 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, 1096 NULL); 1097 if (error != 0) 1098 return (error); 1099 1100 /* Execute series of SHA updates on correctly sized buffers */ 1101 msgsize = 0; 1102 for (i = 0; i < sgl_src->sg_nseg; i++) { 1103 seg = &sgl_src->sg_segs[i]; 1104 msgsize += seg->ss_len; 1105 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, 1106 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, 1107 msgsize << 3); 1108 if (error != 0) 1109 return (error); 1110 } 1111 1112 /* Copy result out to sgl_dst */ 1113 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); 1114 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); 1115 for (i = 0; i < sgl_dst->sg_nseg; i++) { 1116 seg = &sgl_dst->sg_segs[i]; 1117 /* crp lengths are int, so 32-bit min() is ok. */ 1118 nb = min(remaining, seg->ss_len); 1119 1120 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, 1121 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, 1122 CCP_PASSTHRU_BITWISE_NOOP, 1123 (cctx != NULL) ? (nb == remaining) : false, 1124 (nb == remaining) ? cctx : NULL); 1125 if (error != 0) 1126 return (error); 1127 1128 remaining -= nb; 1129 lsbaddr += nb; 1130 if (remaining == 0) 1131 break; 1132 } 1133 1134 return (0); 1135 } 1136 1137 static void 1138 byteswap256(uint64_t *buffer) 1139 { 1140 uint64_t t; 1141 1142 t = bswap64(buffer[3]); 1143 buffer[3] = bswap64(buffer[0]); 1144 buffer[0] = t; 1145 1146 t = bswap64(buffer[2]); 1147 buffer[2] = bswap64(buffer[1]); 1148 buffer[1] = t; 1149 } 1150 1151 /* 1152 * Translate CCP internal LSB hash format into a standard hash ouput. 1153 * 1154 * Manipulates input buffer with byteswap256 operation. 1155 */ 1156 static void 1157 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) 1158 { 1159 const struct SHA_Defn *defn; 1160 size_t i; 1161 1162 for (i = 0; i < nitems(SHA_definitions); i++) 1163 if (SHA_definitions[i].version == version) 1164 break; 1165 if (i == nitems(SHA_definitions)) 1166 panic("bogus sha version auth_mode %u\n", (unsigned)version); 1167 1168 defn = &SHA_definitions[i]; 1169 1170 /* Swap 256bit manually -- DMA engine can, but with limitations */ 1171 byteswap256((void *)buffer); 1172 if (defn->axf->hashsize > LSB_ENTRY_SIZE) 1173 byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); 1174 1175 switch (defn->version) { 1176 case SHA1: 1177 memcpy(output, buffer + 12, defn->axf->hashsize); 1178 break; 1179 #if 0 1180 case SHA2_224: 1181 memcpy(output, buffer + XXX, defn->axf->hashsize); 1182 break; 1183 #endif 1184 case SHA2_256: 1185 memcpy(output, buffer, defn->axf->hashsize); 1186 break; 1187 case SHA2_384: 1188 memcpy(output, 1189 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, 1190 defn->axf->hashsize - LSB_ENTRY_SIZE); 1191 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, 1192 LSB_ENTRY_SIZE); 1193 break; 1194 case SHA2_512: 1195 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); 1196 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); 1197 break; 1198 } 1199 } 1200 1201 static void 1202 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, 1203 struct cryptop *crp, int error) 1204 { 1205 char ihash[SHA2_512_HASH_LEN /* max hash len */]; 1206 union authctx auth_ctx; 1207 const struct auth_hash *axf; 1208 1209 axf = s->hmac.auth_hash; 1210 1211 s->pending--; 1212 1213 if (error != 0) { 1214 crp->crp_etype = error; 1215 goto out; 1216 } 1217 1218 /* Do remaining outer hash over small inner hash in software */ 1219 axf->Init(&auth_ctx); 1220 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); 1221 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode); 1222 #if 0 1223 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__, 1224 (u_char *)ihash, " "); 1225 #endif 1226 axf->Update(&auth_ctx, ihash, axf->hashsize); 1227 axf->Final(s->hmac.res, &auth_ctx); 1228 1229 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { 1230 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len, 1231 ihash); 1232 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0) 1233 crp->crp_etype = EBADMSG; 1234 } else 1235 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len, 1236 s->hmac.res); 1237 1238 /* Avoid leaking key material */ 1239 explicit_bzero(&auth_ctx, sizeof(auth_ctx)); 1240 explicit_bzero(s->hmac.res, sizeof(s->hmac.res)); 1241 1242 out: 1243 crypto_done(crp); 1244 } 1245 1246 static void 1247 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1248 int error) 1249 { 1250 struct cryptop *crp; 1251 1252 crp = vcrp; 1253 ccp_do_hmac_done(qp, s, crp, error); 1254 } 1255 1256 static int __must_check 1257 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1258 const struct ccp_completion_ctx *cctx) 1259 { 1260 device_t dev; 1261 const struct auth_hash *axf; 1262 int error; 1263 1264 dev = qp->cq_softc->dev; 1265 axf = s->hmac.auth_hash; 1266 1267 /* 1268 * Populate the SGL describing inside hash contents. We want to hash 1269 * the ipad (key XOR fixed bit pattern) concatenated with the user 1270 * data. 1271 */ 1272 sglist_reset(qp->cq_sg_ulptx); 1273 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); 1274 if (error != 0) 1275 return (error); 1276 if (crp->crp_aad_length != 0) { 1277 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1278 crp->crp_aad_start, crp->crp_aad_length); 1279 if (error != 0) 1280 return (error); 1281 } 1282 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1283 crp->crp_payload_start, crp->crp_payload_length); 1284 if (error != 0) { 1285 DPRINTF(dev, "%s: sglist too short\n", __func__); 1286 return (error); 1287 } 1288 /* Populate SGL for output -- use hmac.res buffer. */ 1289 sglist_reset(qp->cq_sg_dst); 1290 error = sglist_append(qp->cq_sg_dst, s->hmac.res, 1291 roundup2(axf->hashsize, LSB_ENTRY_SIZE)); 1292 if (error != 0) 1293 return (error); 1294 1295 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, 1296 cctx); 1297 if (error != 0) { 1298 DPRINTF(dev, "%s: ccp_sha error\n", __func__); 1299 return (error); 1300 } 1301 return (0); 1302 } 1303 1304 int __must_check 1305 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1306 { 1307 struct ccp_completion_ctx ctx; 1308 1309 ctx.callback_fn = ccp_hmac_done; 1310 ctx.callback_arg = crp; 1311 ctx.session = s; 1312 1313 return (ccp_do_hmac(qp, s, crp, &ctx)); 1314 } 1315 1316 static void 1317 ccp_byteswap(char *data, size_t len) 1318 { 1319 size_t i; 1320 char t; 1321 1322 len--; 1323 for (i = 0; i < len; i++, len--) { 1324 t = data[i]; 1325 data[i] = data[len]; 1326 data[len] = t; 1327 } 1328 } 1329 1330 static void 1331 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1332 int error) 1333 { 1334 struct cryptop *crp; 1335 1336 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1337 1338 crp = vcrp; 1339 1340 s->pending--; 1341 1342 if (error != 0) 1343 crp->crp_etype = error; 1344 1345 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp); 1346 crypto_done(crp); 1347 } 1348 1349 static void 1350 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp, 1351 char *iv) 1352 { 1353 1354 crypto_read_iv(crp, iv); 1355 1356 /* 1357 * Append an explicit counter of 1 for GCM. 1358 */ 1359 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) 1360 *(uint32_t *)&iv[12] = htobe32(1); 1361 1362 if (csp->csp_cipher_alg == CRYPTO_AES_XTS && 1363 csp->csp_ivlen < AES_BLOCK_LEN) 1364 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen); 1365 1366 /* Reverse order of IV material for HW */ 1367 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ", 1368 csp->csp_ivlen); 1369 1370 /* 1371 * For unknown reasons, XTS mode expects the IV in the reverse byte 1372 * order to every other AES mode. 1373 */ 1374 if (csp->csp_cipher_alg != CRYPTO_AES_XTS) 1375 ccp_byteswap(iv, AES_BLOCK_LEN); 1376 } 1377 1378 static int __must_check 1379 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, 1380 size_t len) 1381 { 1382 int error; 1383 1384 sglist_reset(qp->cq_sg_ulptx); 1385 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); 1386 if (error != 0) 1387 return (error); 1388 1389 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, 1390 false, NULL); 1391 return (error); 1392 } 1393 1394 static int __must_check 1395 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1396 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx) 1397 { 1398 struct ccp_desc *desc; 1399 device_t dev; 1400 unsigned i; 1401 enum ccp_xts_unitsize usize; 1402 1403 /* IV and Key data are already loaded */ 1404 1405 dev = qp->cq_softc->dev; 1406 1407 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++) 1408 if (ccp_xts_unitsize_map[i].cxu_size == 1409 crp->crp_payload_length) { 1410 usize = ccp_xts_unitsize_map[i].cxu_id; 1411 break; 1412 } 1413 if (i >= nitems(ccp_xts_unitsize_map)) 1414 return (EINVAL); 1415 1416 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1417 struct sglist_seg *seg; 1418 1419 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1420 1421 desc = &qp->desc_ring[qp->cq_tail]; 1422 desc->engine = CCP_ENGINE_XTS_AES; 1423 desc->som = (i == 0); 1424 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1425 desc->ioc = (desc->eom && cctx != NULL); 1426 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n", 1427 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1428 (int)desc->ioc, (int)dir); 1429 1430 if (desc->ioc) 1431 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1432 sizeof(*cctx)); 1433 1434 desc->aes_xts.encrypt = dir; 1435 desc->aes_xts.type = s->blkcipher.cipher_type; 1436 desc->aes_xts.size = usize; 1437 1438 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__, 1439 qp->cq_tail, (unsigned)desc->aes_xts.type, 1440 (unsigned)desc->aes_xts.size); 1441 1442 desc->length = seg->ss_len; 1443 desc->src_lo = (uint32_t)seg->ss_paddr; 1444 desc->src_hi = (seg->ss_paddr >> 32); 1445 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1446 1447 /* Crypt in-place */ 1448 desc->dst_lo = desc->src_lo; 1449 desc->dst_hi = desc->src_hi; 1450 desc->dst_mem = desc->src_mem; 1451 1452 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1453 desc->key_hi = 0; 1454 desc->key_mem = CCP_MEMTYPE_SB; 1455 1456 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1457 1458 qp->cq_tail = (qp->cq_tail + 1) % 1459 (1 << qp->cq_softc->ring_size_order); 1460 } 1461 return (0); 1462 } 1463 1464 static int __must_check 1465 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, 1466 struct cryptop *crp, const struct ccp_completion_ctx *cctx) 1467 { 1468 const struct crypto_session_params *csp; 1469 struct ccp_desc *desc; 1470 char *keydata; 1471 device_t dev; 1472 enum ccp_cipher_dir dir; 1473 int error, iv_len; 1474 size_t keydata_len; 1475 unsigned i, j; 1476 1477 dev = qp->cq_softc->dev; 1478 1479 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) { 1480 DPRINTF(dev, "%s: empty\n", __func__); 1481 return (EINVAL); 1482 } 1483 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { 1484 DPRINTF(dev, "%s: len modulo: %d\n", __func__, 1485 crp->crp_payload_length); 1486 return (EINVAL); 1487 } 1488 1489 /* 1490 * Individual segments must be multiples of AES block size for the HW 1491 * to process it. Non-compliant inputs aren't bogus, just not doable 1492 * on this hardware. 1493 */ 1494 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) 1495 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1496 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1497 qp->cq_sg_crp->sg_segs[i].ss_len); 1498 return (EINVAL); 1499 } 1500 1501 /* Gather IV/nonce data */ 1502 csp = crypto_get_params(crp->crp_session); 1503 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1504 iv_len = csp->csp_ivlen; 1505 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1506 iv_len = AES_BLOCK_LEN; 1507 1508 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1509 dir = CCP_CIPHER_DIR_ENCRYPT; 1510 else 1511 dir = CCP_CIPHER_DIR_DECRYPT; 1512 1513 /* Set up passthrough op(s) to copy IV into LSB */ 1514 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1515 s->blkcipher.iv, iv_len); 1516 if (error != 0) 1517 return (error); 1518 1519 /* 1520 * Initialize keydata and keydata_len for GCC. The default case of the 1521 * following switch is impossible to reach, but GCC doesn't know that. 1522 */ 1523 keydata_len = 0; 1524 keydata = NULL; 1525 1526 switch (csp->csp_cipher_alg) { 1527 case CRYPTO_AES_XTS: 1528 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++) 1529 if (ccp_xts_unitsize_map[j].cxu_size == 1530 crp->crp_payload_length) 1531 break; 1532 /* Input buffer must be a supported UnitSize */ 1533 if (j >= nitems(ccp_xts_unitsize_map)) { 1534 device_printf(dev, "%s: rejected block size: %u\n", 1535 __func__, crp->crp_payload_length); 1536 return (EOPNOTSUPP); 1537 } 1538 /* FALLTHROUGH */ 1539 case CRYPTO_AES_CBC: 1540 case CRYPTO_AES_ICM: 1541 keydata = s->blkcipher.enckey; 1542 keydata_len = s->blkcipher.key_len; 1543 break; 1544 } 1545 1546 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len, 1547 keydata, " "); 1548 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1549 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " "); 1550 1551 /* Reverse order of key material for HW */ 1552 ccp_byteswap(keydata, keydata_len); 1553 1554 /* Store key material into LSB to avoid page boundaries */ 1555 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) { 1556 /* 1557 * XTS mode uses 2 256-bit vectors for the primary key and the 1558 * tweak key. For 128-bit keys, the vectors are zero-padded. 1559 * 1560 * After byteswapping the combined OCF-provided K1:K2 vector 1561 * above, we need to reverse the order again so the hardware 1562 * gets the swapped keys in the order K1':K2'. 1563 */ 1564 error = ccp_do_pst_to_lsb(qp, 1565 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata, 1566 keydata_len / 2); 1567 if (error != 0) 1568 return (error); 1569 error = ccp_do_pst_to_lsb(qp, 1570 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1571 keydata + (keydata_len / 2), keydata_len / 2); 1572 1573 /* Zero-pad 128 bit keys */ 1574 if (keydata_len == 32) { 1575 if (error != 0) 1576 return (error); 1577 error = ccp_do_pst_to_lsb(qp, 1578 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) + 1579 keydata_len / 2, g_zeroes, keydata_len / 2); 1580 if (error != 0) 1581 return (error); 1582 error = ccp_do_pst_to_lsb(qp, 1583 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) + 1584 keydata_len / 2, g_zeroes, keydata_len / 2); 1585 } 1586 } else 1587 error = ccp_do_pst_to_lsb(qp, 1588 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata, 1589 keydata_len); 1590 if (error != 0) 1591 return (error); 1592 1593 /* 1594 * Point SGLs at the subset of cryptop buffer contents representing the 1595 * data. 1596 */ 1597 sglist_reset(qp->cq_sg_ulptx); 1598 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1599 crp->crp_payload_start, crp->crp_payload_length); 1600 if (error != 0) 1601 return (error); 1602 1603 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__, 1604 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); 1605 1606 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail); 1607 1608 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1609 return (EAGAIN); 1610 1611 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1612 return (ccp_do_xts(qp, s, crp, dir, cctx)); 1613 1614 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1615 struct sglist_seg *seg; 1616 1617 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1618 1619 desc = &qp->desc_ring[qp->cq_tail]; 1620 desc->engine = CCP_ENGINE_AES; 1621 desc->som = (i == 0); 1622 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1623 desc->ioc = (desc->eom && cctx != NULL); 1624 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", 1625 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1626 (int)desc->ioc, (int)dir); 1627 1628 if (desc->ioc) 1629 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1630 sizeof(*cctx)); 1631 1632 desc->aes.encrypt = dir; 1633 desc->aes.mode = s->blkcipher.cipher_mode; 1634 desc->aes.type = s->blkcipher.cipher_type; 1635 if (csp->csp_cipher_alg == CRYPTO_AES_ICM) 1636 /* 1637 * Size of CTR value in bits, - 1. ICM mode uses all 1638 * 128 bits as counter. 1639 */ 1640 desc->aes.size = 127; 1641 1642 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__, 1643 qp->cq_tail, (unsigned)desc->aes.mode, 1644 (unsigned)desc->aes.type, (unsigned)desc->aes.size); 1645 1646 desc->length = seg->ss_len; 1647 desc->src_lo = (uint32_t)seg->ss_paddr; 1648 desc->src_hi = (seg->ss_paddr >> 32); 1649 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1650 1651 /* Crypt in-place */ 1652 desc->dst_lo = desc->src_lo; 1653 desc->dst_hi = desc->src_hi; 1654 desc->dst_mem = desc->src_mem; 1655 1656 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1657 desc->key_hi = 0; 1658 desc->key_mem = CCP_MEMTYPE_SB; 1659 1660 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1661 1662 qp->cq_tail = (qp->cq_tail + 1) % 1663 (1 << qp->cq_softc->ring_size_order); 1664 } 1665 return (0); 1666 } 1667 1668 int __must_check 1669 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1670 { 1671 struct ccp_completion_ctx ctx; 1672 1673 ctx.callback_fn = ccp_blkcipher_done; 1674 ctx.session = s; 1675 ctx.callback_arg = crp; 1676 1677 return (ccp_do_blkcipher(qp, s, crp, &ctx)); 1678 } 1679 1680 static void 1681 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1682 int error) 1683 { 1684 struct cryptop *crp; 1685 1686 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1687 1688 crp = vcrp; 1689 1690 ccp_do_hmac_done(qp, s, crp, error); 1691 } 1692 1693 int __must_check 1694 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1695 { 1696 struct ccp_completion_ctx ctx; 1697 int error; 1698 1699 ctx.callback_fn = ccp_authenc_done; 1700 ctx.session = s; 1701 ctx.callback_arg = crp; 1702 1703 /* Perform first operation */ 1704 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1705 error = ccp_do_blkcipher(qp, s, crp, NULL); 1706 else 1707 error = ccp_do_hmac(qp, s, crp, NULL); 1708 if (error != 0) 1709 return (error); 1710 1711 /* Perform second operation */ 1712 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1713 error = ccp_do_hmac(qp, s, crp, &ctx); 1714 else 1715 error = ccp_do_blkcipher(qp, s, crp, &ctx); 1716 return (error); 1717 } 1718 1719 static int __must_check 1720 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) 1721 { 1722 struct ccp_desc *desc; 1723 struct sglist_seg *seg; 1724 unsigned i; 1725 1726 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1727 return (EAGAIN); 1728 1729 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1730 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1731 1732 desc = &qp->desc_ring[qp->cq_tail]; 1733 1734 desc->engine = CCP_ENGINE_AES; 1735 desc->aes.mode = CCP_AES_MODE_GHASH; 1736 desc->aes.type = s->blkcipher.cipher_type; 1737 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; 1738 1739 desc->som = (i == 0); 1740 desc->length = seg->ss_len; 1741 1742 desc->src_lo = (uint32_t)seg->ss_paddr; 1743 desc->src_hi = (seg->ss_paddr >> 32); 1744 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1745 1746 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1747 1748 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1749 desc->key_mem = CCP_MEMTYPE_SB; 1750 1751 qp->cq_tail = (qp->cq_tail + 1) % 1752 (1 << qp->cq_softc->ring_size_order); 1753 } 1754 return (0); 1755 } 1756 1757 static int __must_check 1758 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, 1759 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) 1760 { 1761 struct ccp_desc *desc; 1762 1763 if (ccp_queue_get_ring_space(qp) == 0) 1764 return (EAGAIN); 1765 1766 desc = &qp->desc_ring[qp->cq_tail]; 1767 1768 desc->engine = CCP_ENGINE_AES; 1769 desc->aes.mode = CCP_AES_MODE_GCTR; 1770 desc->aes.type = s->blkcipher.cipher_type; 1771 desc->aes.encrypt = dir; 1772 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; 1773 1774 desc->som = som; 1775 desc->eom = eom; 1776 1777 /* Trailing bytes will be masked off by aes.size above. */ 1778 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); 1779 1780 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; 1781 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; 1782 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; 1783 1784 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1785 1786 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1787 desc->key_mem = CCP_MEMTYPE_SB; 1788 1789 qp->cq_tail = (qp->cq_tail + 1) % 1790 (1 << qp->cq_softc->ring_size_order); 1791 return (0); 1792 } 1793 1794 static int __must_check 1795 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) 1796 { 1797 struct ccp_desc *desc; 1798 1799 if (ccp_queue_get_ring_space(qp) == 0) 1800 return (EAGAIN); 1801 1802 desc = &qp->desc_ring[qp->cq_tail]; 1803 1804 desc->engine = CCP_ENGINE_AES; 1805 desc->aes.mode = CCP_AES_MODE_GHASH; 1806 desc->aes.type = s->blkcipher.cipher_type; 1807 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; 1808 1809 desc->length = GMAC_BLOCK_LEN; 1810 1811 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); 1812 desc->src_mem = CCP_MEMTYPE_SB; 1813 1814 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1815 1816 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1817 desc->key_mem = CCP_MEMTYPE_SB; 1818 1819 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); 1820 desc->dst_mem = CCP_MEMTYPE_SB; 1821 1822 qp->cq_tail = (qp->cq_tail + 1) % 1823 (1 << qp->cq_softc->ring_size_order); 1824 return (0); 1825 } 1826 1827 static void 1828 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1829 int error) 1830 { 1831 char tag[GMAC_DIGEST_LEN]; 1832 struct cryptop *crp; 1833 1834 crp = vcrp; 1835 1836 s->pending--; 1837 1838 if (error != 0) { 1839 crp->crp_etype = error; 1840 goto out; 1841 } 1842 1843 /* Encrypt is done. Decrypt needs to verify tag. */ 1844 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1845 goto out; 1846 1847 /* Copy in message tag. */ 1848 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag); 1849 1850 /* Verify tag against computed GMAC */ 1851 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) 1852 crp->crp_etype = EBADMSG; 1853 1854 out: 1855 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1856 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block)); 1857 crypto_done(crp); 1858 } 1859 1860 int __must_check 1861 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1862 { 1863 const struct crypto_session_params *csp; 1864 struct ccp_completion_ctx ctx; 1865 enum ccp_cipher_dir dir; 1866 device_t dev; 1867 unsigned i; 1868 int error; 1869 1870 if (s->blkcipher.key_len == 0) 1871 return (EINVAL); 1872 1873 dev = qp->cq_softc->dev; 1874 1875 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1876 dir = CCP_CIPHER_DIR_ENCRYPT; 1877 else 1878 dir = CCP_CIPHER_DIR_DECRYPT; 1879 1880 /* Zero initial GHASH portion of context */ 1881 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); 1882 1883 /* Gather IV data */ 1884 csp = crypto_get_params(crp->crp_session); 1885 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1886 1887 /* Reverse order of key material for HW */ 1888 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); 1889 1890 /* Prepare input buffer of concatenated lengths for final GHASH */ 1891 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8); 1892 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8); 1893 1894 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ 1895 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1896 s->blkcipher.iv, 32); 1897 if (error != 0) 1898 return (error); 1899 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1900 s->blkcipher.enckey, s->blkcipher.key_len); 1901 if (error != 0) 1902 return (error); 1903 error = ccp_do_pst_to_lsb(qp, 1904 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, 1905 GMAC_BLOCK_LEN); 1906 if (error != 0) 1907 return (error); 1908 1909 /* First step - compute GHASH over AAD */ 1910 if (crp->crp_aad_length != 0) { 1911 sglist_reset(qp->cq_sg_ulptx); 1912 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1913 crp->crp_aad_start, crp->crp_aad_length); 1914 if (error != 0) 1915 return (error); 1916 1917 /* This engine cannot process non-block multiple AAD data. */ 1918 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1919 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % 1920 GMAC_BLOCK_LEN) != 0) { 1921 DPRINTF(dev, "%s: AD seg modulo: %zu\n", 1922 __func__, 1923 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1924 return (EINVAL); 1925 } 1926 1927 error = ccp_do_ghash_aad(qp, s); 1928 if (error != 0) 1929 return (error); 1930 } 1931 1932 /* Feed data piece by piece into GCTR */ 1933 sglist_reset(qp->cq_sg_ulptx); 1934 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1935 crp->crp_payload_start, crp->crp_payload_length); 1936 if (error != 0) 1937 return (error); 1938 1939 /* 1940 * All segments except the last must be even multiples of AES block 1941 * size for the HW to process it. Non-compliant inputs aren't bogus, 1942 * just not doable on this hardware. 1943 * 1944 * XXX: Well, the hardware will produce a valid tag for shorter final 1945 * segment inputs, but it will still write out a block-sized plaintext 1946 * or ciphertext chunk. For a typical CRP this tramples trailing data, 1947 * including the provided message tag. So, reject such inputs for now. 1948 */ 1949 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1950 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1951 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1952 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1953 return (EINVAL); 1954 } 1955 1956 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1957 struct sglist_seg *seg; 1958 1959 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1960 error = ccp_do_gctr(qp, s, dir, seg, 1961 (i == 0 && crp->crp_aad_length == 0), 1962 i == (qp->cq_sg_ulptx->sg_nseg - 1)); 1963 if (error != 0) 1964 return (error); 1965 } 1966 1967 /* Send just initial IV (not GHASH!) to LSB again */ 1968 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1969 s->blkcipher.iv, AES_BLOCK_LEN); 1970 if (error != 0) 1971 return (error); 1972 1973 ctx.callback_fn = ccp_gcm_done; 1974 ctx.session = s; 1975 ctx.callback_arg = crp; 1976 1977 /* Compute final hash and copy result back */ 1978 error = ccp_do_ghash_final(qp, s); 1979 if (error != 0) 1980 return (error); 1981 1982 /* When encrypting, copy computed tag out to caller buffer. */ 1983 sglist_reset(qp->cq_sg_ulptx); 1984 if (dir == CCP_CIPHER_DIR_ENCRYPT) 1985 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1986 crp->crp_digest_start, s->gmac.hash_len); 1987 else 1988 /* 1989 * For decrypting, copy the computed tag out to our session 1990 * buffer to verify in our callback. 1991 */ 1992 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, 1993 s->gmac.hash_len); 1994 if (error != 0) 1995 return (error); 1996 error = ccp_passthrough_sgl(qp, 1997 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, 1998 s->gmac.hash_len, true, &ctx); 1999 return (error); 2000 } 2001 2002 #define MAX_TRNG_RETRIES 10 2003 u_int 2004 random_ccp_read(void *v, u_int c) 2005 { 2006 uint32_t *buf; 2007 u_int i, j; 2008 2009 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); 2010 2011 buf = v; 2012 for (i = c; i > 0; i -= sizeof(*buf)) { 2013 for (j = 0; j < MAX_TRNG_RETRIES; j++) { 2014 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); 2015 if (*buf != 0) 2016 break; 2017 } 2018 if (j == MAX_TRNG_RETRIES) 2019 return (0); 2020 buf++; 2021 } 2022 return (c); 2023 2024 } 2025 2026 #ifdef DDB 2027 void 2028 db_ccp_show_hw(struct ccp_softc *sc) 2029 { 2030 2031 db_printf(" queue mask: 0x%x\n", 2032 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); 2033 db_printf(" queue prio: 0x%x\n", 2034 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); 2035 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); 2036 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); 2037 db_printf(" cmd timeout: 0x%x\n", 2038 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); 2039 db_printf(" lsb public mask lo: 0x%x\n", 2040 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); 2041 db_printf(" lsb public mask hi: 0x%x\n", 2042 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); 2043 db_printf(" lsb private mask lo: 0x%x\n", 2044 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); 2045 db_printf(" lsb private mask hi: 0x%x\n", 2046 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); 2047 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); 2048 } 2049 2050 void 2051 db_ccp_show_queue_hw(struct ccp_queue *qp) 2052 { 2053 const struct ccp_error_code *ec; 2054 struct ccp_softc *sc; 2055 uint32_t status, error, esource, faultblock, headlo, qcontrol; 2056 unsigned q, i; 2057 2058 sc = qp->cq_softc; 2059 q = qp->cq_qindex; 2060 2061 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); 2062 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, 2063 (qcontrol & CMD_Q_RUN) ? " RUN" : "", 2064 (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); 2065 db_printf(" tail_lo: 0x%x\n", 2066 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); 2067 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 2068 db_printf(" head_lo: 0x%x\n", headlo); 2069 db_printf(" int enable: 0x%x\n", 2070 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); 2071 db_printf(" interrupt status: 0x%x\n", 2072 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); 2073 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 2074 db_printf(" status: 0x%x\n", status); 2075 db_printf(" int stats: 0x%x\n", 2076 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); 2077 2078 error = status & STATUS_ERROR_MASK; 2079 if (error == 0) 2080 return; 2081 2082 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 2083 STATUS_ERRORSOURCE_MASK; 2084 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 2085 STATUS_VLSB_FAULTBLOCK_MASK; 2086 2087 ec = NULL; 2088 for (i = 0; i < nitems(ccp_error_codes); i++) 2089 if (ccp_error_codes[i].ce_code == error) 2090 break; 2091 if (i < nitems(ccp_error_codes)) 2092 ec = &ccp_error_codes[i]; 2093 2094 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", 2095 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 2096 faultblock); 2097 if (ec != NULL) 2098 db_printf(" Error description: %s\n", ec->ce_desc); 2099 2100 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 2101 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, 2102 (void *)&qp->desc_ring[i], " "); 2103 } 2104 #endif 2105