1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017 Chelsio Communications, Inc. 5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> 6 * All rights reserved. 7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 36 #include <sys/param.h> 37 #include <sys/bus.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/sglist.h> 45 #include <sys/sysctl.h> 46 47 #ifdef DDB 48 #include <ddb/ddb.h> 49 #endif 50 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 54 #include <machine/bus.h> 55 #include <machine/resource.h> 56 #include <machine/vmparam.h> 57 58 #include <opencrypto/cryptodev.h> 59 #include <opencrypto/xform.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include "cryptodev_if.h" 65 66 #include "ccp.h" 67 #include "ccp_hardware.h" 68 #include "ccp_lsb.h" 69 70 CTASSERT(sizeof(struct ccp_desc) == 32); 71 72 static struct ccp_xts_unitsize_map_entry { 73 enum ccp_xts_unitsize cxu_id; 74 unsigned cxu_size; 75 } ccp_xts_unitsize_map[] = { 76 { CCP_XTS_AES_UNIT_SIZE_16, 16 }, 77 { CCP_XTS_AES_UNIT_SIZE_512, 512 }, 78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 }, 79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 }, 80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 }, 81 }; 82 83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 84 "ccp node"); 85 86 unsigned g_ccp_ring_order = 11; 87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, 88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); 89 90 /* 91 * Zero buffer, sufficient for padding LSB entries, that does not span a page 92 * boundary 93 */ 94 static const char g_zeroes[32] __aligned(32); 95 96 static inline uint32_t 97 ccp_read_4(struct ccp_softc *sc, uint32_t offset) 98 { 99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); 100 } 101 102 static inline void 103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) 104 { 105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); 106 } 107 108 static inline uint32_t 109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) 110 { 111 /* 112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. 113 */ 114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); 115 } 116 117 static inline void 118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, 119 uint32_t value) 120 { 121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); 122 } 123 124 void 125 ccp_queue_write_tail(struct ccp_queue *qp) 126 { 127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, 128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); 129 } 130 131 /* 132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of 133 * that entry for the queue's private LSB region. 134 */ 135 static inline uint8_t 136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) 137 { 138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); 139 } 140 141 /* 142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of 143 * that entry for the queue's private LSB region. 144 */ 145 static inline uint32_t 146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) 147 { 148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); 149 } 150 151 /* 152 * Some terminology: 153 * 154 * LSB - Local Storage Block 155 * ========================= 156 * 157 * 8 segments/regions, each containing 16 entries. 158 * 159 * Each entry contains 256 bits (32 bytes). 160 * 161 * Segments are virtually addressed in commands, but accesses cannot cross 162 * segment boundaries. Virtual map uses an identity mapping by default 163 * (virtual segment N corresponds to physical segment N). 164 * 165 * Access to a physical region can be restricted to any subset of all five 166 * queues. 167 * 168 * "Pass-through" mode 169 * =================== 170 * 171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice 172 * features: 173 * 174 * - Supports byte-swapping for endian conversion (32- or 256-bit words) 175 * - AND, OR, XOR with fixed 256-bit mask 176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations) 177 * - Read/write of LSB 178 * - Memset 179 * 180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits 181 * (32 bytes). 182 * 183 * If byte-swapping is enabled, input must be a multiple of the word size. 184 * 185 * Zlib mode -- only usable from one queue at a time, single job at a time. 186 * ======================================================================== 187 * 188 * Only usable from private host, aka PSP? Not host processor? 189 * 190 * RNG. 191 * ==== 192 * 193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in 194 * a ring buffer readable by software. 195 * 196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are 197 * implemented on the raw input stream and may be enabled to verify min-entropy 198 * of 0.5 bits per bit. 199 */ 200 201 static void 202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 203 { 204 bus_addr_t *baddr; 205 206 KASSERT(error == 0, ("%s: error:%d", __func__, error)); 207 baddr = arg; 208 *baddr = segs->ds_addr; 209 } 210 211 static int 212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) 213 { 214 struct ccp_softc *sc; 215 struct ccp_queue *qp; 216 void *desc; 217 size_t ringsz, num_descriptors; 218 int error; 219 220 desc = NULL; 221 sc = device_get_softc(dev); 222 qp = &sc->queues[queue]; 223 224 /* 225 * Don't bother allocating a ring for queues the host isn't allowed to 226 * drive. 227 */ 228 if ((sc->valid_queues & (1 << queue)) == 0) 229 return (0); 230 231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue); 232 233 /* Ignore queues that do not have any LSB access. */ 234 if (qp->lsb_mask == 0) { 235 device_printf(dev, "Ignoring queue %u with no LSB access\n", 236 queue); 237 sc->valid_queues &= ~(1 << queue); 238 return (0); 239 } 240 241 num_descriptors = 1 << sc->ring_size_order; 242 ringsz = sizeof(struct ccp_desc) * num_descriptors; 243 244 /* 245 * "Queue_Size" is order - 1. 246 * 247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. 248 */ 249 error = bus_dma_tag_create(bus_get_dma_tag(dev), 250 1 << (5 + sc->ring_size_order), 251 #if defined(__i386__) && !defined(PAE) 252 0, BUS_SPACE_MAXADDR, 253 #else 254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, 255 #endif 256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, 257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag); 258 if (error != 0) 259 goto out; 260 261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, 262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); 263 if (error != 0) 264 goto out; 265 266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, 267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); 268 if (error != 0) 269 goto out; 270 271 qp->desc_ring = desc; 272 qp->completions_ring = malloc(num_descriptors * 273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); 274 275 /* Zero control register; among other things, clears the RUN flag. */ 276 qp->qcontrol = 0; 277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); 279 280 /* Clear any leftover interrupt status flags */ 281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, 282 ALL_INTERRUPTS); 283 284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; 285 286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, 287 (uint32_t)qp->desc_ring_bus_addr); 288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, 289 (uint32_t)qp->desc_ring_bus_addr); 290 291 /* 292 * Enable completion interrupts, as well as error or administrative 293 * halt interrupts. We don't use administrative halts, but they 294 * shouldn't trip unless we do, so it ought to be harmless. 295 */ 296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 298 299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; 300 qp->qcontrol |= CMD_Q_RUN; 301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 302 303 out: 304 if (error != 0) { 305 if (qp->desc_ring != NULL) 306 bus_dmamap_unload(qp->ring_desc_tag, 307 qp->ring_desc_map); 308 if (desc != NULL) 309 bus_dmamem_free(qp->ring_desc_tag, desc, 310 qp->ring_desc_map); 311 if (qp->ring_desc_tag != NULL) 312 bus_dma_tag_destroy(qp->ring_desc_tag); 313 } 314 return (error); 315 } 316 317 static void 318 ccp_hw_detach_queue(device_t dev, unsigned queue) 319 { 320 struct ccp_softc *sc; 321 struct ccp_queue *qp; 322 323 sc = device_get_softc(dev); 324 qp = &sc->queues[queue]; 325 326 /* 327 * Don't bother allocating a ring for queues the host isn't allowed to 328 * drive. 329 */ 330 if ((sc->valid_queues & (1 << queue)) == 0) 331 return; 332 333 free(qp->completions_ring, M_CCP); 334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); 335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); 336 bus_dma_tag_destroy(qp->ring_desc_tag); 337 } 338 339 static int 340 ccp_map_pci_bar(device_t dev) 341 { 342 struct ccp_softc *sc; 343 344 sc = device_get_softc(dev); 345 346 sc->pci_resource_id = PCIR_BAR(2); 347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 348 &sc->pci_resource_id, RF_ACTIVE); 349 if (sc->pci_resource == NULL) { 350 device_printf(dev, "unable to allocate pci resource\n"); 351 return (ENODEV); 352 } 353 354 sc->pci_resource_id_msix = PCIR_BAR(5); 355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 356 &sc->pci_resource_id_msix, RF_ACTIVE); 357 if (sc->pci_resource_msix == NULL) { 358 device_printf(dev, "unable to allocate pci resource msix\n"); 359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 360 sc->pci_resource); 361 return (ENODEV); 362 } 363 364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); 365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); 366 return (0); 367 } 368 369 static void 370 ccp_unmap_pci_bar(device_t dev) 371 { 372 struct ccp_softc *sc; 373 374 sc = device_get_softc(dev); 375 376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, 377 sc->pci_resource_msix); 378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 379 sc->pci_resource); 380 } 381 382 const static struct ccp_error_code { 383 uint8_t ce_code; 384 const char *ce_name; 385 int ce_errno; 386 const char *ce_desc; 387 } ccp_error_codes[] = { 388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, 389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, 390 "A non-supported function type was specified" }, 391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, 392 "A non-supported function mode was specified" }, 393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, 394 "A CMAC type was specified when ENCRYPT was not specified" }, 395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, 396 "A non-supported function size was specified.\n" 397 "AES-CFB: Size was not 127 or 7;\n" 398 "3DES-CFB: Size was not 7;\n" 399 "RSA: See supported size table (7.4.2);\n" 400 "ECC: Size was greater than 576 bits." }, 401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO, 402 "Zlib command does not have INIT and EOM set" }, 403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, 404 "Reserved bits in a function specification were not 0" }, 405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, 406 "The buffer length specified was not correct for the selected engine" 407 }, 408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" 409 "Undefined VLSB segment mapping or\n" 410 "mapping to unsupported LSB segment id" }, 411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, 412 "The specified source/destination buffer access was illegal:\n" 413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" 414 "Data buffer not completely contained within a single segment; or\n" 415 "Pointer with Fixed=1 is not 32-bit aligned; or\n" 416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." 417 }, 418 { 0x0C, "ILLEGAL_MEM_SEL", EIO, 419 "A src_mem, dst_mem, or key_mem field was illegal:\n" 420 "A field was set to a reserved value; or\n" 421 "A public command attempted to reference AXI1 (local) or GART memory; or\n" 422 "A Zlib command attmpted to use the LSB." }, 423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, 424 "The specified context location was illegal:\n" 425 "Context located in a LSB location disallowed by the LSB protection masks; or\n" 426 "Context not completely contained within a single segment." }, 427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO, 428 "The specified key location was illegal:\n" 429 "Key located in a LSB location disallowed by the LSB protection masks; or\n" 430 "Key not completely contained within a single segment." }, 431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, 432 /* XXX Could fill out these descriptions too */ 433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, 434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, 435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, 436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, 437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, 438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, 439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, 440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, 441 { 0x1E, "ZLIB_BTYPE", EIO, "" }, 442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, 443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, 444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, 445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, 446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, 447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, 448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, 449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, 450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, 451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, 452 { 0x2B, "LSB_PARITY_ERR", EIO, 453 "A read from the LSB encountered a parity error" }, 454 }; 455 456 static void 457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) 458 { 459 struct ccp_completion_ctx *cctx; 460 const struct ccp_error_code *ec; 461 struct ccp_softc *sc; 462 uint32_t status, error, esource, faultblock; 463 unsigned q, idx; 464 int errno; 465 466 sc = qp->cq_softc; 467 q = qp->cq_qindex; 468 469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 470 471 error = status & STATUS_ERROR_MASK; 472 473 /* Decode error status */ 474 ec = NULL; 475 for (idx = 0; idx < nitems(ccp_error_codes); idx++) 476 if (ccp_error_codes[idx].ce_code == error) { 477 ec = &ccp_error_codes[idx]; 478 break; 479 } 480 481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 482 STATUS_ERRORSOURCE_MASK; 483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 484 STATUS_VLSB_FAULTBLOCK_MASK; 485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", 486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 487 faultblock); 488 if (ec != NULL) 489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); 490 491 /* TODO Could format the desc nicely here */ 492 idx = desc - qp->desc_ring; 493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx, 494 (const void *)desc, " "); 495 496 /* 497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, 498 * Zlib Decompress status may be interesting. 499 */ 500 501 while (true) { 502 /* Keep unused descriptors zero for next use. */ 503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); 504 505 cctx = &qp->completions_ring[idx]; 506 507 /* 508 * Restart procedure described in § 14.2.5. Could be used by HoC if we 509 * used that. 510 * 511 * Advance HEAD_LO past bad descriptor + any remaining in 512 * transaction manually, then restart queue. 513 */ 514 idx = (idx + 1) % (1 << sc->ring_size_order); 515 516 /* Callback function signals end of transaction */ 517 if (cctx->callback_fn != NULL) { 518 if (ec == NULL) 519 errno = EIO; 520 else 521 errno = ec->ce_errno; 522 /* TODO More specific error code */ 523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); 524 cctx->callback_fn = NULL; 525 break; 526 } 527 } 528 529 qp->cq_head = idx; 530 qp->cq_waiting = false; 531 wakeup(&qp->cq_tail); 532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, 534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); 535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); 536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__); 537 } 538 539 static void 540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) 541 { 542 struct ccp_completion_ctx *cctx; 543 struct ccp_softc *sc; 544 const struct ccp_desc *desc; 545 uint32_t headlo, idx; 546 unsigned q, completed; 547 548 sc = qp->cq_softc; 549 q = qp->cq_qindex; 550 551 mtx_lock(&qp->cq_lock); 552 553 /* 554 * Hardware HEAD_LO points to the first incomplete descriptor. Process 555 * any submitted and completed descriptors, up to but not including 556 * HEAD_LO. 557 */ 558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 560 561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, 562 qp->cq_head); 563 completed = 0; 564 while (qp->cq_head != idx) { 565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head); 566 567 cctx = &qp->completions_ring[qp->cq_head]; 568 if (cctx->callback_fn != NULL) { 569 cctx->callback_fn(qp, cctx->session, 570 cctx->callback_arg, 0); 571 cctx->callback_fn = NULL; 572 } 573 574 /* Keep unused descriptors zero for next use. */ 575 memset(&qp->desc_ring[qp->cq_head], 0, 576 sizeof(qp->desc_ring[qp->cq_head])); 577 578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); 579 completed++; 580 } 581 if (completed > 0) { 582 qp->cq_waiting = false; 583 wakeup(&qp->cq_tail); 584 } 585 586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 587 588 /* 589 * Desc points to the first incomplete descriptor, at the time we read 590 * HEAD_LO. If there was an error flagged in interrupt status, the HW 591 * will not proceed past the erroneous descriptor by itself. 592 */ 593 desc = &qp->desc_ring[idx]; 594 if ((ints & INT_ERROR) != 0) 595 ccp_intr_handle_error(qp, desc); 596 597 mtx_unlock(&qp->cq_lock); 598 } 599 600 static void 601 ccp_intr_handler(void *arg) 602 { 603 struct ccp_softc *sc = arg; 604 size_t i; 605 uint32_t ints; 606 607 DPRINTF(sc->dev, "%s: interrupt\n", __func__); 608 609 /* 610 * We get one global interrupt per PCI device, shared over all of 611 * its queues. Scan each valid queue on interrupt for flags indicating 612 * activity. 613 */ 614 for (i = 0; i < nitems(sc->queues); i++) { 615 if ((sc->valid_queues & (1 << i)) == 0) 616 continue; 617 618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); 619 if (ints == 0) 620 continue; 621 622 #if 0 623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__, 624 (unsigned)ints, i); 625 #endif 626 /* Write back 1s to clear interrupt status bits. */ 627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); 628 629 /* 630 * If there was an error, we still need to run completions on 631 * any descriptors prior to the error. The completions handler 632 * invoked below will also handle the error descriptor. 633 */ 634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) 635 ccp_intr_run_completions(&sc->queues[i], ints); 636 637 if ((ints & INT_QUEUE_STOPPED) != 0) 638 device_printf(sc->dev, "%s: queue %zu stopped\n", 639 __func__, i); 640 } 641 642 /* Re-enable interrupts after processing */ 643 for (i = 0; i < nitems(sc->queues); i++) { 644 if ((sc->valid_queues & (1 << i)) == 0) 645 continue; 646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 648 } 649 } 650 651 static int 652 ccp_intr_filter(void *arg) 653 { 654 struct ccp_softc *sc = arg; 655 size_t i; 656 657 /* TODO: Split individual queues into separate taskqueues? */ 658 for (i = 0; i < nitems(sc->queues); i++) { 659 if ((sc->valid_queues & (1 << i)) == 0) 660 continue; 661 662 /* Mask interrupt until task completes */ 663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); 664 } 665 666 return (FILTER_SCHEDULE_THREAD); 667 } 668 669 static int 670 ccp_setup_interrupts(struct ccp_softc *sc) 671 { 672 uint32_t nvec; 673 int rid, error, n, ridcopy; 674 675 n = pci_msix_count(sc->dev); 676 if (n < 1) { 677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n); 678 return (ENXIO); 679 } 680 681 nvec = n; 682 error = pci_alloc_msix(sc->dev, &nvec); 683 if (error != 0) { 684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__, 685 error); 686 return (error); 687 } 688 if (nvec < 1) { 689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n", 690 __func__); 691 return (ENXIO); 692 } 693 if (nvec > nitems(sc->intr_res)) { 694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__, 695 nvec); 696 nvec = nitems(sc->intr_res); 697 } 698 699 for (rid = 1; rid < 1 + nvec; rid++) { 700 ridcopy = rid; 701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, 702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE); 703 if (sc->intr_res[rid - 1] == NULL) { 704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", 705 __func__); 706 return (ENXIO); 707 } 708 709 sc->intr_tag[rid - 1] = NULL; 710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], 711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, 712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); 713 if (error != 0) 714 device_printf(sc->dev, "%s: setup_intr: %d\n", 715 __func__, error); 716 } 717 sc->intr_count = nvec; 718 719 return (error); 720 } 721 722 static void 723 ccp_release_interrupts(struct ccp_softc *sc) 724 { 725 unsigned i; 726 727 for (i = 0; i < sc->intr_count; i++) { 728 if (sc->intr_tag[i] != NULL) 729 bus_teardown_intr(sc->dev, sc->intr_res[i], 730 sc->intr_tag[i]); 731 if (sc->intr_res[i] != NULL) 732 bus_release_resource(sc->dev, SYS_RES_IRQ, 733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); 734 } 735 736 pci_release_msi(sc->dev); 737 } 738 739 int 740 ccp_hw_attach(device_t dev) 741 { 742 struct ccp_softc *sc; 743 uint64_t lsbmask; 744 uint32_t version, lsbmasklo, lsbmaskhi; 745 unsigned queue_idx, j; 746 int error; 747 bool bars_mapped, interrupts_setup; 748 749 queue_idx = 0; 750 bars_mapped = interrupts_setup = false; 751 sc = device_get_softc(dev); 752 753 error = ccp_map_pci_bar(dev); 754 if (error != 0) { 755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__); 756 goto out; 757 } 758 bars_mapped = true; 759 760 error = pci_enable_busmaster(dev); 761 if (error != 0) { 762 device_printf(dev, "%s: couldn't enable busmaster\n", 763 __func__); 764 goto out; 765 } 766 767 sc->ring_size_order = g_ccp_ring_order; 768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { 769 device_printf(dev, "bogus hw.ccp.ring_order\n"); 770 error = EINVAL; 771 goto out; 772 } 773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); 774 775 version = ccp_read_4(sc, VERSION_REG); 776 if ((version & VERSION_NUM_MASK) < 5) { 777 device_printf(dev, 778 "driver supports version 5 and later hardware\n"); 779 error = ENXIO; 780 goto out; 781 } 782 783 error = ccp_setup_interrupts(sc); 784 if (error != 0) 785 goto out; 786 interrupts_setup = true; 787 788 sc->hw_version = version & VERSION_NUM_MASK; 789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & 790 VERSION_NUMVQM_MASK; 791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & 792 VERSION_LSBSIZE_MASK; 793 sc->hw_features = version & VERSION_CAP_MASK; 794 795 /* 796 * Copy private LSB mask to public registers to enable access to LSB 797 * from all queues allowed by BIOS. 798 */ 799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); 800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); 801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); 802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); 803 804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; 805 806 for (; queue_idx < nitems(sc->queues); queue_idx++) { 807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); 808 if (error != 0) { 809 device_printf(dev, "%s: couldn't attach queue %u\n", 810 __func__, queue_idx); 811 goto out; 812 } 813 } 814 ccp_assign_lsb_regions(sc, lsbmask); 815 816 out: 817 if (error != 0) { 818 if (interrupts_setup) 819 ccp_release_interrupts(sc); 820 for (j = 0; j < queue_idx; j++) 821 ccp_hw_detach_queue(dev, j); 822 if (sc->ring_size_order != 0) 823 pci_disable_busmaster(dev); 824 if (bars_mapped) 825 ccp_unmap_pci_bar(dev); 826 } 827 return (error); 828 } 829 830 void 831 ccp_hw_detach(device_t dev) 832 { 833 struct ccp_softc *sc; 834 unsigned i; 835 836 sc = device_get_softc(dev); 837 838 for (i = 0; i < nitems(sc->queues); i++) 839 ccp_hw_detach_queue(dev, i); 840 841 ccp_release_interrupts(sc); 842 pci_disable_busmaster(dev); 843 ccp_unmap_pci_bar(dev); 844 } 845 846 static int __must_check 847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, 848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, 849 bus_size_t len, enum ccp_passthru_byteswap swapmode, 850 enum ccp_passthru_bitwise bitmode, bool interrupt, 851 const struct ccp_completion_ctx *cctx) 852 { 853 struct ccp_desc *desc; 854 855 if (ccp_queue_get_ring_space(qp) == 0) 856 return (EAGAIN); 857 858 desc = &qp->desc_ring[qp->cq_tail]; 859 860 memset(desc, 0, sizeof(*desc)); 861 desc->engine = CCP_ENGINE_PASSTHRU; 862 863 desc->pt.ioc = interrupt; 864 desc->pt.byteswap = swapmode; 865 desc->pt.bitwise = bitmode; 866 desc->length = len; 867 868 desc->src_lo = (uint32_t)src; 869 desc->src_hi = src >> 32; 870 desc->src_mem = src_type; 871 872 desc->dst_lo = (uint32_t)dst; 873 desc->dst_hi = dst >> 32; 874 desc->dst_mem = dst_type; 875 876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) 877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); 878 879 if (cctx != NULL) 880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); 881 882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 883 return (0); 884 } 885 886 static int __must_check 887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, 888 struct sglist *sgl, bus_size_t len, bool interrupt, 889 const struct ccp_completion_ctx *cctx) 890 { 891 struct sglist_seg *seg; 892 size_t i, remain, nb; 893 int error; 894 895 remain = len; 896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { 897 seg = &sgl->sg_segs[i]; 898 /* crp lengths are int, so 32-bit min() is ok. */ 899 nb = min(remain, seg->ss_len); 900 901 if (tolsb) 902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, 903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, 904 CCP_PASSTHRU_BYTESWAP_NOOP, 905 CCP_PASSTHRU_BITWISE_NOOP, 906 (nb == remain) && interrupt, cctx); 907 else 908 error = ccp_passthrough(qp, seg->ss_paddr, 909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, 910 CCP_PASSTHRU_BYTESWAP_NOOP, 911 CCP_PASSTHRU_BITWISE_NOOP, 912 (nb == remain) && interrupt, cctx); 913 if (error != 0) 914 return (error); 915 916 remain -= nb; 917 } 918 return (0); 919 } 920 921 /* 922 * Note that these vectors are in reverse of the usual order. 923 */ 924 const struct SHA_vectors { 925 uint32_t SHA1[8]; 926 uint32_t SHA224[8]; 927 uint32_t SHA256[8]; 928 uint64_t SHA384[8]; 929 uint64_t SHA512[8]; 930 } SHA_H __aligned(PAGE_SIZE) = { 931 .SHA1 = { 932 0xc3d2e1f0ul, 933 0x10325476ul, 934 0x98badcfeul, 935 0xefcdab89ul, 936 0x67452301ul, 937 0, 938 0, 939 0, 940 }, 941 .SHA224 = { 942 0xbefa4fa4ul, 943 0x64f98fa7ul, 944 0x68581511ul, 945 0xffc00b31ul, 946 0xf70e5939ul, 947 0x3070dd17ul, 948 0x367cd507ul, 949 0xc1059ed8ul, 950 }, 951 .SHA256 = { 952 0x5be0cd19ul, 953 0x1f83d9abul, 954 0x9b05688cul, 955 0x510e527ful, 956 0xa54ff53aul, 957 0x3c6ef372ul, 958 0xbb67ae85ul, 959 0x6a09e667ul, 960 }, 961 .SHA384 = { 962 0x47b5481dbefa4fa4ull, 963 0xdb0c2e0d64f98fa7ull, 964 0x8eb44a8768581511ull, 965 0x67332667ffc00b31ull, 966 0x152fecd8f70e5939ull, 967 0x9159015a3070dd17ull, 968 0x629a292a367cd507ull, 969 0xcbbb9d5dc1059ed8ull, 970 }, 971 .SHA512 = { 972 0x5be0cd19137e2179ull, 973 0x1f83d9abfb41bd6bull, 974 0x9b05688c2b3e6c1full, 975 0x510e527fade682d1ull, 976 0xa54ff53a5f1d36f1ull, 977 0x3c6ef372fe94f82bull, 978 0xbb67ae8584caa73bull, 979 0x6a09e667f3bcc908ull, 980 }, 981 }; 982 /* 983 * Ensure vectors do not cross a page boundary. 984 * 985 * Disabled due to a new Clang error: "expression is not an integral constant 986 * expression." GCC (cross toolchain) seems to handle this assertion with 987 * _Static_assert just fine. 988 */ 989 #if 0 990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); 991 #endif 992 993 const struct SHA_Defn { 994 enum sha_version version; 995 const void *H_vectors; 996 size_t H_size; 997 struct auth_hash *axf; 998 enum ccp_sha_type engine_type; 999 } SHA_definitions[] = { 1000 { 1001 .version = SHA1, 1002 .H_vectors = SHA_H.SHA1, 1003 .H_size = sizeof(SHA_H.SHA1), 1004 .axf = &auth_hash_hmac_sha1, 1005 .engine_type = CCP_SHA_TYPE_1, 1006 }, 1007 #if 0 1008 { 1009 .version = SHA2_224, 1010 .H_vectors = SHA_H.SHA224, 1011 .H_size = sizeof(SHA_H.SHA224), 1012 .axf = &auth_hash_hmac_sha2_224, 1013 .engine_type = CCP_SHA_TYPE_224, 1014 }, 1015 #endif 1016 { 1017 .version = SHA2_256, 1018 .H_vectors = SHA_H.SHA256, 1019 .H_size = sizeof(SHA_H.SHA256), 1020 .axf = &auth_hash_hmac_sha2_256, 1021 .engine_type = CCP_SHA_TYPE_256, 1022 }, 1023 { 1024 .version = SHA2_384, 1025 .H_vectors = SHA_H.SHA384, 1026 .H_size = sizeof(SHA_H.SHA384), 1027 .axf = &auth_hash_hmac_sha2_384, 1028 .engine_type = CCP_SHA_TYPE_384, 1029 }, 1030 { 1031 .version = SHA2_512, 1032 .H_vectors = SHA_H.SHA512, 1033 .H_size = sizeof(SHA_H.SHA512), 1034 .axf = &auth_hash_hmac_sha2_512, 1035 .engine_type = CCP_SHA_TYPE_512, 1036 }, 1037 }; 1038 1039 static int __must_check 1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, 1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) 1042 { 1043 struct ccp_desc *desc; 1044 1045 if (ccp_queue_get_ring_space(qp) == 0) 1046 return (EAGAIN); 1047 1048 desc = &qp->desc_ring[qp->cq_tail]; 1049 1050 memset(desc, 0, sizeof(*desc)); 1051 desc->engine = CCP_ENGINE_SHA; 1052 desc->som = start; 1053 desc->eom = end; 1054 1055 desc->sha.type = defn->engine_type; 1056 desc->length = len; 1057 1058 if (end) { 1059 desc->sha_len_lo = (uint32_t)msgbits; 1060 desc->sha_len_hi = msgbits >> 32; 1061 } 1062 1063 desc->src_lo = (uint32_t)addr; 1064 desc->src_hi = addr >> 32; 1065 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1066 1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); 1068 1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 1070 return (0); 1071 } 1072 1073 static int __must_check 1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, 1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) 1076 { 1077 const struct SHA_Defn *defn; 1078 struct sglist_seg *seg; 1079 size_t i, msgsize, remaining, nb; 1080 uint32_t lsbaddr; 1081 int error; 1082 1083 for (i = 0; i < nitems(SHA_definitions); i++) 1084 if (SHA_definitions[i].version == version) 1085 break; 1086 if (i == nitems(SHA_definitions)) 1087 return (EINVAL); 1088 defn = &SHA_definitions[i]; 1089 1090 /* XXX validate input ??? */ 1091 1092 /* Load initial SHA state into LSB */ 1093 /* XXX ensure H_vectors don't span page boundaries */ 1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), 1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), 1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), 1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, 1098 NULL); 1099 if (error != 0) 1100 return (error); 1101 1102 /* Execute series of SHA updates on correctly sized buffers */ 1103 msgsize = 0; 1104 for (i = 0; i < sgl_src->sg_nseg; i++) { 1105 seg = &sgl_src->sg_segs[i]; 1106 msgsize += seg->ss_len; 1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, 1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, 1109 msgsize << 3); 1110 if (error != 0) 1111 return (error); 1112 } 1113 1114 /* Copy result out to sgl_dst */ 1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); 1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); 1117 for (i = 0; i < sgl_dst->sg_nseg; i++) { 1118 seg = &sgl_dst->sg_segs[i]; 1119 /* crp lengths are int, so 32-bit min() is ok. */ 1120 nb = min(remaining, seg->ss_len); 1121 1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, 1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, 1124 CCP_PASSTHRU_BITWISE_NOOP, 1125 (cctx != NULL) ? (nb == remaining) : false, 1126 (nb == remaining) ? cctx : NULL); 1127 if (error != 0) 1128 return (error); 1129 1130 remaining -= nb; 1131 lsbaddr += nb; 1132 if (remaining == 0) 1133 break; 1134 } 1135 1136 return (0); 1137 } 1138 1139 static void 1140 byteswap256(uint64_t *buffer) 1141 { 1142 uint64_t t; 1143 1144 t = bswap64(buffer[3]); 1145 buffer[3] = bswap64(buffer[0]); 1146 buffer[0] = t; 1147 1148 t = bswap64(buffer[2]); 1149 buffer[2] = bswap64(buffer[1]); 1150 buffer[1] = t; 1151 } 1152 1153 /* 1154 * Translate CCP internal LSB hash format into a standard hash ouput. 1155 * 1156 * Manipulates input buffer with byteswap256 operation. 1157 */ 1158 static void 1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) 1160 { 1161 const struct SHA_Defn *defn; 1162 size_t i; 1163 1164 for (i = 0; i < nitems(SHA_definitions); i++) 1165 if (SHA_definitions[i].version == version) 1166 break; 1167 if (i == nitems(SHA_definitions)) 1168 panic("bogus sha version auth_mode %u\n", (unsigned)version); 1169 1170 defn = &SHA_definitions[i]; 1171 1172 /* Swap 256bit manually -- DMA engine can, but with limitations */ 1173 byteswap256((void *)buffer); 1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE) 1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); 1176 1177 switch (defn->version) { 1178 case SHA1: 1179 memcpy(output, buffer + 12, defn->axf->hashsize); 1180 break; 1181 #if 0 1182 case SHA2_224: 1183 memcpy(output, buffer + XXX, defn->axf->hashsize); 1184 break; 1185 #endif 1186 case SHA2_256: 1187 memcpy(output, buffer, defn->axf->hashsize); 1188 break; 1189 case SHA2_384: 1190 memcpy(output, 1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, 1192 defn->axf->hashsize - LSB_ENTRY_SIZE); 1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, 1194 LSB_ENTRY_SIZE); 1195 break; 1196 case SHA2_512: 1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); 1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); 1199 break; 1200 } 1201 } 1202 1203 static void 1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, 1205 struct cryptop *crp, int error) 1206 { 1207 char ihash[SHA2_512_HASH_LEN /* max hash len */]; 1208 union authctx auth_ctx; 1209 struct auth_hash *axf; 1210 1211 axf = s->hmac.auth_hash; 1212 1213 s->pending--; 1214 1215 if (error != 0) { 1216 crp->crp_etype = error; 1217 goto out; 1218 } 1219 1220 /* Do remaining outer hash over small inner hash in software */ 1221 axf->Init(&auth_ctx); 1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); 1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode); 1224 #if 0 1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__, 1226 (u_char *)ihash, " "); 1227 #endif 1228 axf->Update(&auth_ctx, ihash, axf->hashsize); 1229 axf->Final(s->hmac.res, &auth_ctx); 1230 1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { 1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len, 1233 ihash); 1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0) 1235 crp->crp_etype = EBADMSG; 1236 } else 1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len, 1238 s->hmac.res); 1239 1240 /* Avoid leaking key material */ 1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx)); 1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res)); 1243 1244 out: 1245 crypto_done(crp); 1246 } 1247 1248 static void 1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1250 int error) 1251 { 1252 struct cryptop *crp; 1253 1254 crp = vcrp; 1255 ccp_do_hmac_done(qp, s, crp, error); 1256 } 1257 1258 static int __must_check 1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1260 const struct ccp_completion_ctx *cctx) 1261 { 1262 device_t dev; 1263 struct auth_hash *axf; 1264 int error; 1265 1266 dev = qp->cq_softc->dev; 1267 axf = s->hmac.auth_hash; 1268 1269 /* 1270 * Populate the SGL describing inside hash contents. We want to hash 1271 * the ipad (key XOR fixed bit pattern) concatenated with the user 1272 * data. 1273 */ 1274 sglist_reset(qp->cq_sg_ulptx); 1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); 1276 if (error != 0) 1277 return (error); 1278 if (crp->crp_aad_length != 0) { 1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1280 crp->crp_aad_start, crp->crp_aad_length); 1281 if (error != 0) 1282 return (error); 1283 } 1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1285 crp->crp_payload_start, crp->crp_payload_length); 1286 if (error != 0) { 1287 DPRINTF(dev, "%s: sglist too short\n", __func__); 1288 return (error); 1289 } 1290 /* Populate SGL for output -- use hmac.res buffer. */ 1291 sglist_reset(qp->cq_sg_dst); 1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res, 1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE)); 1294 if (error != 0) 1295 return (error); 1296 1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, 1298 cctx); 1299 if (error != 0) { 1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__); 1301 return (error); 1302 } 1303 return (0); 1304 } 1305 1306 int __must_check 1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1308 { 1309 struct ccp_completion_ctx ctx; 1310 1311 ctx.callback_fn = ccp_hmac_done; 1312 ctx.callback_arg = crp; 1313 ctx.session = s; 1314 1315 return (ccp_do_hmac(qp, s, crp, &ctx)); 1316 } 1317 1318 static void 1319 ccp_byteswap(char *data, size_t len) 1320 { 1321 size_t i; 1322 char t; 1323 1324 len--; 1325 for (i = 0; i < len; i++, len--) { 1326 t = data[i]; 1327 data[i] = data[len]; 1328 data[len] = t; 1329 } 1330 } 1331 1332 static void 1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1334 int error) 1335 { 1336 struct cryptop *crp; 1337 1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1339 1340 crp = vcrp; 1341 1342 s->pending--; 1343 1344 if (error != 0) 1345 crp->crp_etype = error; 1346 1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp); 1348 crypto_done(crp); 1349 } 1350 1351 static void 1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp, 1353 char *iv) 1354 { 1355 1356 if (crp->crp_flags & CRYPTO_F_IV_GENERATE) { 1357 arc4rand(iv, csp->csp_ivlen, 0); 1358 crypto_copyback(crp, crp->crp_iv_start, csp->csp_ivlen, iv); 1359 } else if (crp->crp_flags & CRYPTO_F_IV_SEPARATE) 1360 memcpy(iv, crp->crp_iv, csp->csp_ivlen); 1361 else 1362 crypto_copydata(crp, crp->crp_iv_start, csp->csp_ivlen, iv); 1363 1364 /* 1365 * If the input IV is 12 bytes, append an explicit counter of 1. 1366 */ 1367 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 && 1368 csp->csp_ivlen == 12) 1369 *(uint32_t *)&iv[12] = htobe32(1); 1370 1371 if (csp->csp_cipher_alg == CRYPTO_AES_XTS && 1372 csp->csp_ivlen < AES_BLOCK_LEN) 1373 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen); 1374 1375 /* Reverse order of IV material for HW */ 1376 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ", 1377 csp->csp_ivlen); 1378 1379 /* 1380 * For unknown reasons, XTS mode expects the IV in the reverse byte 1381 * order to every other AES mode. 1382 */ 1383 if (csp->csp_cipher_alg != CRYPTO_AES_XTS) 1384 ccp_byteswap(iv, AES_BLOCK_LEN); 1385 } 1386 1387 static int __must_check 1388 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, 1389 size_t len) 1390 { 1391 int error; 1392 1393 sglist_reset(qp->cq_sg_ulptx); 1394 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); 1395 if (error != 0) 1396 return (error); 1397 1398 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, 1399 false, NULL); 1400 return (error); 1401 } 1402 1403 static int __must_check 1404 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1405 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx) 1406 { 1407 struct ccp_desc *desc; 1408 device_t dev; 1409 unsigned i; 1410 enum ccp_xts_unitsize usize; 1411 1412 /* IV and Key data are already loaded */ 1413 1414 dev = qp->cq_softc->dev; 1415 1416 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++) 1417 if (ccp_xts_unitsize_map[i].cxu_size == 1418 crp->crp_payload_length) { 1419 usize = ccp_xts_unitsize_map[i].cxu_id; 1420 break; 1421 } 1422 if (i >= nitems(ccp_xts_unitsize_map)) 1423 return (EINVAL); 1424 1425 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1426 struct sglist_seg *seg; 1427 1428 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1429 1430 desc = &qp->desc_ring[qp->cq_tail]; 1431 desc->engine = CCP_ENGINE_XTS_AES; 1432 desc->som = (i == 0); 1433 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1434 desc->ioc = (desc->eom && cctx != NULL); 1435 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n", 1436 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1437 (int)desc->ioc, (int)dir); 1438 1439 if (desc->ioc) 1440 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1441 sizeof(*cctx)); 1442 1443 desc->aes_xts.encrypt = dir; 1444 desc->aes_xts.type = s->blkcipher.cipher_type; 1445 desc->aes_xts.size = usize; 1446 1447 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__, 1448 qp->cq_tail, (unsigned)desc->aes_xts.type, 1449 (unsigned)desc->aes_xts.size); 1450 1451 desc->length = seg->ss_len; 1452 desc->src_lo = (uint32_t)seg->ss_paddr; 1453 desc->src_hi = (seg->ss_paddr >> 32); 1454 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1455 1456 /* Crypt in-place */ 1457 desc->dst_lo = desc->src_lo; 1458 desc->dst_hi = desc->src_hi; 1459 desc->dst_mem = desc->src_mem; 1460 1461 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1462 desc->key_hi = 0; 1463 desc->key_mem = CCP_MEMTYPE_SB; 1464 1465 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1466 1467 qp->cq_tail = (qp->cq_tail + 1) % 1468 (1 << qp->cq_softc->ring_size_order); 1469 } 1470 return (0); 1471 } 1472 1473 static int __must_check 1474 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, 1475 struct cryptop *crp, const struct ccp_completion_ctx *cctx) 1476 { 1477 const struct crypto_session_params *csp; 1478 struct ccp_desc *desc; 1479 char *keydata; 1480 device_t dev; 1481 enum ccp_cipher_dir dir; 1482 int error, iv_len; 1483 size_t keydata_len; 1484 unsigned i, j; 1485 1486 dev = qp->cq_softc->dev; 1487 1488 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) { 1489 DPRINTF(dev, "%s: empty\n", __func__); 1490 return (EINVAL); 1491 } 1492 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { 1493 DPRINTF(dev, "%s: len modulo: %d\n", __func__, 1494 crp->crp_payload_length); 1495 return (EINVAL); 1496 } 1497 1498 /* 1499 * Individual segments must be multiples of AES block size for the HW 1500 * to process it. Non-compliant inputs aren't bogus, just not doable 1501 * on this hardware. 1502 */ 1503 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) 1504 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1505 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1506 qp->cq_sg_crp->sg_segs[i].ss_len); 1507 return (EINVAL); 1508 } 1509 1510 /* Gather IV/nonce data */ 1511 csp = crypto_get_params(crp->crp_session); 1512 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1513 iv_len = csp->csp_ivlen; 1514 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1515 iv_len = AES_BLOCK_LEN; 1516 1517 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1518 dir = CCP_CIPHER_DIR_ENCRYPT; 1519 else 1520 dir = CCP_CIPHER_DIR_DECRYPT; 1521 1522 /* Set up passthrough op(s) to copy IV into LSB */ 1523 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1524 s->blkcipher.iv, iv_len); 1525 if (error != 0) 1526 return (error); 1527 1528 /* 1529 * Initialize keydata and keydata_len for GCC. The default case of the 1530 * following switch is impossible to reach, but GCC doesn't know that. 1531 */ 1532 keydata_len = 0; 1533 keydata = NULL; 1534 1535 switch (csp->csp_cipher_alg) { 1536 case CRYPTO_AES_XTS: 1537 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++) 1538 if (ccp_xts_unitsize_map[j].cxu_size == 1539 crp->crp_payload_length) 1540 break; 1541 /* Input buffer must be a supported UnitSize */ 1542 if (j >= nitems(ccp_xts_unitsize_map)) { 1543 device_printf(dev, "%s: rejected block size: %u\n", 1544 __func__, crp->crp_payload_length); 1545 return (EOPNOTSUPP); 1546 } 1547 /* FALLTHROUGH */ 1548 case CRYPTO_AES_CBC: 1549 case CRYPTO_AES_ICM: 1550 keydata = s->blkcipher.enckey; 1551 keydata_len = s->blkcipher.key_len; 1552 break; 1553 } 1554 1555 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len, 1556 keydata, " "); 1557 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1558 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " "); 1559 1560 /* Reverse order of key material for HW */ 1561 ccp_byteswap(keydata, keydata_len); 1562 1563 /* Store key material into LSB to avoid page boundaries */ 1564 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) { 1565 /* 1566 * XTS mode uses 2 256-bit vectors for the primary key and the 1567 * tweak key. For 128-bit keys, the vectors are zero-padded. 1568 * 1569 * After byteswapping the combined OCF-provided K1:K2 vector 1570 * above, we need to reverse the order again so the hardware 1571 * gets the swapped keys in the order K1':K2'. 1572 */ 1573 error = ccp_do_pst_to_lsb(qp, 1574 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata, 1575 keydata_len / 2); 1576 if (error != 0) 1577 return (error); 1578 error = ccp_do_pst_to_lsb(qp, 1579 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1580 keydata + (keydata_len / 2), keydata_len / 2); 1581 1582 /* Zero-pad 128 bit keys */ 1583 if (keydata_len == 32) { 1584 if (error != 0) 1585 return (error); 1586 error = ccp_do_pst_to_lsb(qp, 1587 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) + 1588 keydata_len / 2, g_zeroes, keydata_len / 2); 1589 if (error != 0) 1590 return (error); 1591 error = ccp_do_pst_to_lsb(qp, 1592 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) + 1593 keydata_len / 2, g_zeroes, keydata_len / 2); 1594 } 1595 } else 1596 error = ccp_do_pst_to_lsb(qp, 1597 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata, 1598 keydata_len); 1599 if (error != 0) 1600 return (error); 1601 1602 /* 1603 * Point SGLs at the subset of cryptop buffer contents representing the 1604 * data. 1605 */ 1606 sglist_reset(qp->cq_sg_ulptx); 1607 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1608 crp->crp_payload_start, crp->crp_payload_length); 1609 if (error != 0) 1610 return (error); 1611 1612 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__, 1613 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); 1614 1615 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail); 1616 1617 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1618 return (EAGAIN); 1619 1620 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) 1621 return (ccp_do_xts(qp, s, crp, dir, cctx)); 1622 1623 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1624 struct sglist_seg *seg; 1625 1626 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1627 1628 desc = &qp->desc_ring[qp->cq_tail]; 1629 desc->engine = CCP_ENGINE_AES; 1630 desc->som = (i == 0); 1631 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1632 desc->ioc = (desc->eom && cctx != NULL); 1633 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", 1634 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1635 (int)desc->ioc, (int)dir); 1636 1637 if (desc->ioc) 1638 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1639 sizeof(*cctx)); 1640 1641 desc->aes.encrypt = dir; 1642 desc->aes.mode = s->blkcipher.cipher_mode; 1643 desc->aes.type = s->blkcipher.cipher_type; 1644 if (csp->csp_cipher_alg == CRYPTO_AES_ICM) 1645 /* 1646 * Size of CTR value in bits, - 1. ICM mode uses all 1647 * 128 bits as counter. 1648 */ 1649 desc->aes.size = 127; 1650 1651 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__, 1652 qp->cq_tail, (unsigned)desc->aes.mode, 1653 (unsigned)desc->aes.type, (unsigned)desc->aes.size); 1654 1655 desc->length = seg->ss_len; 1656 desc->src_lo = (uint32_t)seg->ss_paddr; 1657 desc->src_hi = (seg->ss_paddr >> 32); 1658 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1659 1660 /* Crypt in-place */ 1661 desc->dst_lo = desc->src_lo; 1662 desc->dst_hi = desc->src_hi; 1663 desc->dst_mem = desc->src_mem; 1664 1665 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1666 desc->key_hi = 0; 1667 desc->key_mem = CCP_MEMTYPE_SB; 1668 1669 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1670 1671 qp->cq_tail = (qp->cq_tail + 1) % 1672 (1 << qp->cq_softc->ring_size_order); 1673 } 1674 return (0); 1675 } 1676 1677 int __must_check 1678 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1679 { 1680 struct ccp_completion_ctx ctx; 1681 1682 ctx.callback_fn = ccp_blkcipher_done; 1683 ctx.session = s; 1684 ctx.callback_arg = crp; 1685 1686 return (ccp_do_blkcipher(qp, s, crp, &ctx)); 1687 } 1688 1689 static void 1690 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1691 int error) 1692 { 1693 struct cryptop *crp; 1694 1695 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1696 1697 crp = vcrp; 1698 1699 ccp_do_hmac_done(qp, s, crp, error); 1700 } 1701 1702 int __must_check 1703 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1704 { 1705 struct ccp_completion_ctx ctx; 1706 int error; 1707 1708 ctx.callback_fn = ccp_authenc_done; 1709 ctx.session = s; 1710 ctx.callback_arg = crp; 1711 1712 /* Perform first operation */ 1713 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1714 error = ccp_do_blkcipher(qp, s, crp, NULL); 1715 else 1716 error = ccp_do_hmac(qp, s, crp, NULL); 1717 if (error != 0) 1718 return (error); 1719 1720 /* Perform second operation */ 1721 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1722 error = ccp_do_hmac(qp, s, crp, &ctx); 1723 else 1724 error = ccp_do_blkcipher(qp, s, crp, &ctx); 1725 return (error); 1726 } 1727 1728 static int __must_check 1729 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) 1730 { 1731 struct ccp_desc *desc; 1732 struct sglist_seg *seg; 1733 unsigned i; 1734 1735 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1736 return (EAGAIN); 1737 1738 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1739 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1740 1741 desc = &qp->desc_ring[qp->cq_tail]; 1742 1743 desc->engine = CCP_ENGINE_AES; 1744 desc->aes.mode = CCP_AES_MODE_GHASH; 1745 desc->aes.type = s->blkcipher.cipher_type; 1746 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; 1747 1748 desc->som = (i == 0); 1749 desc->length = seg->ss_len; 1750 1751 desc->src_lo = (uint32_t)seg->ss_paddr; 1752 desc->src_hi = (seg->ss_paddr >> 32); 1753 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1754 1755 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1756 1757 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1758 desc->key_mem = CCP_MEMTYPE_SB; 1759 1760 qp->cq_tail = (qp->cq_tail + 1) % 1761 (1 << qp->cq_softc->ring_size_order); 1762 } 1763 return (0); 1764 } 1765 1766 static int __must_check 1767 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, 1768 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) 1769 { 1770 struct ccp_desc *desc; 1771 1772 if (ccp_queue_get_ring_space(qp) == 0) 1773 return (EAGAIN); 1774 1775 desc = &qp->desc_ring[qp->cq_tail]; 1776 1777 desc->engine = CCP_ENGINE_AES; 1778 desc->aes.mode = CCP_AES_MODE_GCTR; 1779 desc->aes.type = s->blkcipher.cipher_type; 1780 desc->aes.encrypt = dir; 1781 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; 1782 1783 desc->som = som; 1784 desc->eom = eom; 1785 1786 /* Trailing bytes will be masked off by aes.size above. */ 1787 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); 1788 1789 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; 1790 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; 1791 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; 1792 1793 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1794 1795 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1796 desc->key_mem = CCP_MEMTYPE_SB; 1797 1798 qp->cq_tail = (qp->cq_tail + 1) % 1799 (1 << qp->cq_softc->ring_size_order); 1800 return (0); 1801 } 1802 1803 static int __must_check 1804 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) 1805 { 1806 struct ccp_desc *desc; 1807 1808 if (ccp_queue_get_ring_space(qp) == 0) 1809 return (EAGAIN); 1810 1811 desc = &qp->desc_ring[qp->cq_tail]; 1812 1813 desc->engine = CCP_ENGINE_AES; 1814 desc->aes.mode = CCP_AES_MODE_GHASH; 1815 desc->aes.type = s->blkcipher.cipher_type; 1816 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; 1817 1818 desc->length = GMAC_BLOCK_LEN; 1819 1820 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); 1821 desc->src_mem = CCP_MEMTYPE_SB; 1822 1823 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1824 1825 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1826 desc->key_mem = CCP_MEMTYPE_SB; 1827 1828 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); 1829 desc->dst_mem = CCP_MEMTYPE_SB; 1830 1831 qp->cq_tail = (qp->cq_tail + 1) % 1832 (1 << qp->cq_softc->ring_size_order); 1833 return (0); 1834 } 1835 1836 static void 1837 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1838 int error) 1839 { 1840 char tag[GMAC_DIGEST_LEN]; 1841 struct cryptop *crp; 1842 1843 crp = vcrp; 1844 1845 s->pending--; 1846 1847 if (error != 0) { 1848 crp->crp_etype = error; 1849 goto out; 1850 } 1851 1852 /* Encrypt is done. Decrypt needs to verify tag. */ 1853 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1854 goto out; 1855 1856 /* Copy in message tag. */ 1857 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag); 1858 1859 /* Verify tag against computed GMAC */ 1860 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) 1861 crp->crp_etype = EBADMSG; 1862 1863 out: 1864 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv)); 1865 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block)); 1866 crypto_done(crp); 1867 } 1868 1869 int __must_check 1870 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1871 { 1872 const struct crypto_session_params *csp; 1873 struct ccp_completion_ctx ctx; 1874 enum ccp_cipher_dir dir; 1875 device_t dev; 1876 unsigned i; 1877 int error; 1878 1879 if (s->blkcipher.key_len == 0) 1880 return (EINVAL); 1881 1882 dev = qp->cq_softc->dev; 1883 1884 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) 1885 dir = CCP_CIPHER_DIR_ENCRYPT; 1886 else 1887 dir = CCP_CIPHER_DIR_DECRYPT; 1888 1889 /* Zero initial GHASH portion of context */ 1890 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); 1891 1892 /* Gather IV data */ 1893 csp = crypto_get_params(crp->crp_session); 1894 ccp_collect_iv(crp, csp, s->blkcipher.iv); 1895 1896 /* Reverse order of key material for HW */ 1897 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); 1898 1899 /* Prepare input buffer of concatenated lengths for final GHASH */ 1900 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8); 1901 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8); 1902 1903 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ 1904 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1905 s->blkcipher.iv, 32); 1906 if (error != 0) 1907 return (error); 1908 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1909 s->blkcipher.enckey, s->blkcipher.key_len); 1910 if (error != 0) 1911 return (error); 1912 error = ccp_do_pst_to_lsb(qp, 1913 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, 1914 GMAC_BLOCK_LEN); 1915 if (error != 0) 1916 return (error); 1917 1918 /* First step - compute GHASH over AAD */ 1919 if (crp->crp_aad_length != 0) { 1920 sglist_reset(qp->cq_sg_ulptx); 1921 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1922 crp->crp_aad_start, crp->crp_aad_length); 1923 if (error != 0) 1924 return (error); 1925 1926 /* This engine cannot process non-block multiple AAD data. */ 1927 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1928 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % 1929 GMAC_BLOCK_LEN) != 0) { 1930 DPRINTF(dev, "%s: AD seg modulo: %zu\n", 1931 __func__, 1932 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1933 return (EINVAL); 1934 } 1935 1936 error = ccp_do_ghash_aad(qp, s); 1937 if (error != 0) 1938 return (error); 1939 } 1940 1941 /* Feed data piece by piece into GCTR */ 1942 sglist_reset(qp->cq_sg_ulptx); 1943 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1944 crp->crp_payload_start, crp->crp_payload_length); 1945 if (error != 0) 1946 return (error); 1947 1948 /* 1949 * All segments except the last must be even multiples of AES block 1950 * size for the HW to process it. Non-compliant inputs aren't bogus, 1951 * just not doable on this hardware. 1952 * 1953 * XXX: Well, the hardware will produce a valid tag for shorter final 1954 * segment inputs, but it will still write out a block-sized plaintext 1955 * or ciphertext chunk. For a typical CRP this tramples trailing data, 1956 * including the provided message tag. So, reject such inputs for now. 1957 */ 1958 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1959 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1960 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1961 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1962 return (EINVAL); 1963 } 1964 1965 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1966 struct sglist_seg *seg; 1967 1968 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1969 error = ccp_do_gctr(qp, s, dir, seg, 1970 (i == 0 && crp->crp_aad_length == 0), 1971 i == (qp->cq_sg_ulptx->sg_nseg - 1)); 1972 if (error != 0) 1973 return (error); 1974 } 1975 1976 /* Send just initial IV (not GHASH!) to LSB again */ 1977 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1978 s->blkcipher.iv, AES_BLOCK_LEN); 1979 if (error != 0) 1980 return (error); 1981 1982 ctx.callback_fn = ccp_gcm_done; 1983 ctx.session = s; 1984 ctx.callback_arg = crp; 1985 1986 /* Compute final hash and copy result back */ 1987 error = ccp_do_ghash_final(qp, s); 1988 if (error != 0) 1989 return (error); 1990 1991 /* When encrypting, copy computed tag out to caller buffer. */ 1992 sglist_reset(qp->cq_sg_ulptx); 1993 if (dir == CCP_CIPHER_DIR_ENCRYPT) 1994 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1995 crp->crp_digest_start, s->gmac.hash_len); 1996 else 1997 /* 1998 * For decrypting, copy the computed tag out to our session 1999 * buffer to verify in our callback. 2000 */ 2001 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, 2002 s->gmac.hash_len); 2003 if (error != 0) 2004 return (error); 2005 error = ccp_passthrough_sgl(qp, 2006 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, 2007 s->gmac.hash_len, true, &ctx); 2008 return (error); 2009 } 2010 2011 #define MAX_TRNG_RETRIES 10 2012 u_int 2013 random_ccp_read(void *v, u_int c) 2014 { 2015 uint32_t *buf; 2016 u_int i, j; 2017 2018 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); 2019 2020 buf = v; 2021 for (i = c; i > 0; i -= sizeof(*buf)) { 2022 for (j = 0; j < MAX_TRNG_RETRIES; j++) { 2023 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); 2024 if (*buf != 0) 2025 break; 2026 } 2027 if (j == MAX_TRNG_RETRIES) 2028 return (0); 2029 buf++; 2030 } 2031 return (c); 2032 2033 } 2034 2035 #ifdef DDB 2036 void 2037 db_ccp_show_hw(struct ccp_softc *sc) 2038 { 2039 2040 db_printf(" queue mask: 0x%x\n", 2041 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); 2042 db_printf(" queue prio: 0x%x\n", 2043 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); 2044 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); 2045 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); 2046 db_printf(" cmd timeout: 0x%x\n", 2047 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); 2048 db_printf(" lsb public mask lo: 0x%x\n", 2049 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); 2050 db_printf(" lsb public mask hi: 0x%x\n", 2051 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); 2052 db_printf(" lsb private mask lo: 0x%x\n", 2053 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); 2054 db_printf(" lsb private mask hi: 0x%x\n", 2055 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); 2056 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); 2057 } 2058 2059 void 2060 db_ccp_show_queue_hw(struct ccp_queue *qp) 2061 { 2062 const struct ccp_error_code *ec; 2063 struct ccp_softc *sc; 2064 uint32_t status, error, esource, faultblock, headlo, qcontrol; 2065 unsigned q, i; 2066 2067 sc = qp->cq_softc; 2068 q = qp->cq_qindex; 2069 2070 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); 2071 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, 2072 (qcontrol & CMD_Q_RUN) ? " RUN" : "", 2073 (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); 2074 db_printf(" tail_lo: 0x%x\n", 2075 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); 2076 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 2077 db_printf(" head_lo: 0x%x\n", headlo); 2078 db_printf(" int enable: 0x%x\n", 2079 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); 2080 db_printf(" interrupt status: 0x%x\n", 2081 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); 2082 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 2083 db_printf(" status: 0x%x\n", status); 2084 db_printf(" int stats: 0x%x\n", 2085 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); 2086 2087 error = status & STATUS_ERROR_MASK; 2088 if (error == 0) 2089 return; 2090 2091 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 2092 STATUS_ERRORSOURCE_MASK; 2093 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 2094 STATUS_VLSB_FAULTBLOCK_MASK; 2095 2096 ec = NULL; 2097 for (i = 0; i < nitems(ccp_error_codes); i++) 2098 if (ccp_error_codes[i].ce_code == error) 2099 break; 2100 if (i < nitems(ccp_error_codes)) 2101 ec = &ccp_error_codes[i]; 2102 2103 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", 2104 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 2105 faultblock); 2106 if (ec != NULL) 2107 db_printf(" Error description: %s\n", ec->ce_desc); 2108 2109 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 2110 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, 2111 (void *)&qp->desc_ring[i], " "); 2112 } 2113 #endif 2114