1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017 Chelsio Communications, Inc. 5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> 6 * All rights reserved. 7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 36 #include <sys/types.h> 37 #include <sys/bus.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/sglist.h> 45 #include <sys/sysctl.h> 46 47 #ifdef DDB 48 #include <ddb/ddb.h> 49 #endif 50 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 54 #include <machine/bus.h> 55 #include <machine/resource.h> 56 #include <machine/vmparam.h> 57 58 #include <opencrypto/cryptodev.h> 59 #include <opencrypto/xform.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include "cryptodev_if.h" 65 66 #include "ccp.h" 67 #include "ccp_hardware.h" 68 #include "ccp_lsb.h" 69 70 CTASSERT(sizeof(struct ccp_desc) == 32); 71 72 static struct ccp_xts_unitsize_map_entry { 73 enum ccp_xts_unitsize cxu_id; 74 unsigned cxu_size; 75 } ccp_xts_unitsize_map[] = { 76 { CCP_XTS_AES_UNIT_SIZE_16, 16 }, 77 { CCP_XTS_AES_UNIT_SIZE_512, 512 }, 78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 }, 79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 }, 80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 }, 81 }; 82 83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD, 0, "ccp node"); 84 85 unsigned g_ccp_ring_order = 11; 86 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, 87 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); 88 89 /* 90 * Zero buffer, sufficient for padding LSB entries, that does not span a page 91 * boundary 92 */ 93 static const char g_zeroes[32] __aligned(32); 94 95 static inline uint32_t 96 ccp_read_4(struct ccp_softc *sc, uint32_t offset) 97 { 98 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); 99 } 100 101 static inline void 102 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) 103 { 104 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); 105 } 106 107 static inline uint32_t 108 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) 109 { 110 /* 111 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. 112 */ 113 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); 114 } 115 116 static inline void 117 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, 118 uint32_t value) 119 { 120 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); 121 } 122 123 void 124 ccp_queue_write_tail(struct ccp_queue *qp) 125 { 126 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, 127 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); 128 } 129 130 /* 131 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of 132 * that entry for the queue's private LSB region. 133 */ 134 static inline uint8_t 135 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) 136 { 137 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); 138 } 139 140 /* 141 * Given a queue and a reserved LSB entry index, compute the LSB *address* of 142 * that entry for the queue's private LSB region. 143 */ 144 static inline uint32_t 145 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) 146 { 147 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); 148 } 149 150 /* 151 * Some terminology: 152 * 153 * LSB - Local Storage Block 154 * ========================= 155 * 156 * 8 segments/regions, each containing 16 entries. 157 * 158 * Each entry contains 256 bits (32 bytes). 159 * 160 * Segments are virtually addressed in commands, but accesses cannot cross 161 * segment boundaries. Virtual map uses an identity mapping by default 162 * (virtual segment N corresponds to physical segment N). 163 * 164 * Access to a physical region can be restricted to any subset of all five 165 * queues. 166 * 167 * "Pass-through" mode 168 * =================== 169 * 170 * Pass-through is a generic DMA engine, much like ioat(4). Some nice 171 * features: 172 * 173 * - Supports byte-swapping for endian conversion (32- or 256-bit words) 174 * - AND, OR, XOR with fixed 256-bit mask 175 * - CRC32 of data (may be used in tandem with bswap, but not bit operations) 176 * - Read/write of LSB 177 * - Memset 178 * 179 * If bit manipulation mode is enabled, input must be a multiple of 256 bits 180 * (32 bytes). 181 * 182 * If byte-swapping is enabled, input must be a multiple of the word size. 183 * 184 * Zlib mode -- only usable from one queue at a time, single job at a time. 185 * ======================================================================== 186 * 187 * Only usable from private host, aka PSP? Not host processor? 188 * 189 * RNG. 190 * ==== 191 * 192 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in 193 * a ring buffer readable by software. 194 * 195 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are 196 * implemented on the raw input stream and may be enabled to verify min-entropy 197 * of 0.5 bits per bit. 198 */ 199 200 static void 201 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 202 { 203 bus_addr_t *baddr; 204 205 KASSERT(error == 0, ("%s: error:%d", __func__, error)); 206 baddr = arg; 207 *baddr = segs->ds_addr; 208 } 209 210 static int 211 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) 212 { 213 struct ccp_softc *sc; 214 struct ccp_queue *qp; 215 void *desc; 216 size_t ringsz, num_descriptors; 217 int error; 218 219 desc = NULL; 220 sc = device_get_softc(dev); 221 qp = &sc->queues[queue]; 222 223 /* 224 * Don't bother allocating a ring for queues the host isn't allowed to 225 * drive. 226 */ 227 if ((sc->valid_queues & (1 << queue)) == 0) 228 return (0); 229 230 ccp_queue_decode_lsb_regions(sc, lsbmask, queue); 231 232 /* Ignore queues that do not have any LSB access. */ 233 if (qp->lsb_mask == 0) { 234 device_printf(dev, "Ignoring queue %u with no LSB access\n", 235 queue); 236 sc->valid_queues &= ~(1 << queue); 237 return (0); 238 } 239 240 num_descriptors = 1 << sc->ring_size_order; 241 ringsz = sizeof(struct ccp_desc) * num_descriptors; 242 243 /* 244 * "Queue_Size" is order - 1. 245 * 246 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. 247 */ 248 error = bus_dma_tag_create(bus_get_dma_tag(dev), 249 1 << (5 + sc->ring_size_order), 250 #if defined(__i386__) && !defined(PAE) 251 0, BUS_SPACE_MAXADDR, 252 #else 253 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, 254 #endif 255 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, 256 ringsz, 0, NULL, NULL, &qp->ring_desc_tag); 257 if (error != 0) 258 goto out; 259 260 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, 261 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); 262 if (error != 0) 263 goto out; 264 265 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, 266 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); 267 if (error != 0) 268 goto out; 269 270 qp->desc_ring = desc; 271 qp->completions_ring = malloc(num_descriptors * 272 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); 273 274 /* Zero control register; among other things, clears the RUN flag. */ 275 qp->qcontrol = 0; 276 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 277 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); 278 279 /* Clear any leftover interrupt status flags */ 280 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, 281 ALL_INTERRUPTS); 282 283 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; 284 285 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, 286 (uint32_t)qp->desc_ring_bus_addr); 287 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, 288 (uint32_t)qp->desc_ring_bus_addr); 289 290 /* 291 * Enable completion interrupts, as well as error or administrative 292 * halt interrupts. We don't use administrative halts, but they 293 * shouldn't trip unless we do, so it ought to be harmless. 294 */ 295 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 296 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 297 298 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; 299 qp->qcontrol |= CMD_Q_RUN; 300 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); 301 302 out: 303 if (error != 0) { 304 if (qp->desc_ring != NULL) 305 bus_dmamap_unload(qp->ring_desc_tag, 306 qp->ring_desc_map); 307 if (desc != NULL) 308 bus_dmamem_free(qp->ring_desc_tag, desc, 309 qp->ring_desc_map); 310 if (qp->ring_desc_tag != NULL) 311 bus_dma_tag_destroy(qp->ring_desc_tag); 312 } 313 return (error); 314 } 315 316 static void 317 ccp_hw_detach_queue(device_t dev, unsigned queue) 318 { 319 struct ccp_softc *sc; 320 struct ccp_queue *qp; 321 322 sc = device_get_softc(dev); 323 qp = &sc->queues[queue]; 324 325 /* 326 * Don't bother allocating a ring for queues the host isn't allowed to 327 * drive. 328 */ 329 if ((sc->valid_queues & (1 << queue)) == 0) 330 return; 331 332 free(qp->completions_ring, M_CCP); 333 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); 334 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); 335 bus_dma_tag_destroy(qp->ring_desc_tag); 336 } 337 338 static int 339 ccp_map_pci_bar(device_t dev) 340 { 341 struct ccp_softc *sc; 342 343 sc = device_get_softc(dev); 344 345 sc->pci_resource_id = PCIR_BAR(2); 346 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 347 &sc->pci_resource_id, RF_ACTIVE); 348 if (sc->pci_resource == NULL) { 349 device_printf(dev, "unable to allocate pci resource\n"); 350 return (ENODEV); 351 } 352 353 sc->pci_resource_id_msix = PCIR_BAR(5); 354 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 355 &sc->pci_resource_id_msix, RF_ACTIVE); 356 if (sc->pci_resource_msix == NULL) { 357 device_printf(dev, "unable to allocate pci resource msix\n"); 358 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 359 sc->pci_resource); 360 return (ENODEV); 361 } 362 363 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); 364 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); 365 return (0); 366 } 367 368 static void 369 ccp_unmap_pci_bar(device_t dev) 370 { 371 struct ccp_softc *sc; 372 373 sc = device_get_softc(dev); 374 375 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, 376 sc->pci_resource_msix); 377 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, 378 sc->pci_resource); 379 } 380 381 const static struct ccp_error_code { 382 uint8_t ce_code; 383 const char *ce_name; 384 int ce_errno; 385 const char *ce_desc; 386 } ccp_error_codes[] = { 387 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, 388 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, 389 "A non-supported function type was specified" }, 390 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, 391 "A non-supported function mode was specified" }, 392 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, 393 "A CMAC type was specified when ENCRYPT was not specified" }, 394 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, 395 "A non-supported function size was specified.\n" 396 "AES-CFB: Size was not 127 or 7;\n" 397 "3DES-CFB: Size was not 7;\n" 398 "RSA: See supported size table (7.4.2);\n" 399 "ECC: Size was greater than 576 bits." }, 400 { 0x07, "Zlib_MISSING_INIT_EOM", EIO, 401 "Zlib command does not have INIT and EOM set" }, 402 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, 403 "Reserved bits in a function specification were not 0" }, 404 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, 405 "The buffer length specified was not correct for the selected engine" 406 }, 407 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" 408 "Undefined VLSB segment mapping or\n" 409 "mapping to unsupported LSB segment id" }, 410 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, 411 "The specified source/destination buffer access was illegal:\n" 412 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" 413 "Data buffer not completely contained within a single segment; or\n" 414 "Pointer with Fixed=1 is not 32-bit aligned; or\n" 415 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." 416 }, 417 { 0x0C, "ILLEGAL_MEM_SEL", EIO, 418 "A src_mem, dst_mem, or key_mem field was illegal:\n" 419 "A field was set to a reserved value; or\n" 420 "A public command attempted to reference AXI1 (local) or GART memory; or\n" 421 "A Zlib command attmpted to use the LSB." }, 422 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, 423 "The specified context location was illegal:\n" 424 "Context located in a LSB location disallowed by the LSB protection masks; or\n" 425 "Context not completely contained within a single segment." }, 426 { 0x0E, "ILLEGAL_KEY_ADDR", EIO, 427 "The specified key location was illegal:\n" 428 "Key located in a LSB location disallowed by the LSB protection masks; or\n" 429 "Key not completely contained within a single segment." }, 430 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, 431 /* XXX Could fill out these descriptions too */ 432 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, 433 { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, 434 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, 435 { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, 436 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, 437 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, 438 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, 439 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, 440 { 0x1E, "ZLIB_BTYPE", EIO, "" }, 441 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, 442 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, 443 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, 444 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, 445 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, 446 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, 447 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, 448 { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, 449 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, 450 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, 451 { 0x2B, "LSB_PARITY_ERR", EIO, 452 "A read from the LSB encountered a parity error" }, 453 }; 454 455 static void 456 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) 457 { 458 struct ccp_completion_ctx *cctx; 459 const struct ccp_error_code *ec; 460 struct ccp_softc *sc; 461 uint32_t status, error, esource, faultblock; 462 unsigned q, idx; 463 int errno; 464 465 sc = qp->cq_softc; 466 q = qp->cq_qindex; 467 468 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 469 470 error = status & STATUS_ERROR_MASK; 471 472 /* Decode error status */ 473 ec = NULL; 474 for (idx = 0; idx < nitems(ccp_error_codes); idx++) 475 if (ccp_error_codes[idx].ce_code == error) { 476 ec = &ccp_error_codes[idx]; 477 break; 478 } 479 480 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 481 STATUS_ERRORSOURCE_MASK; 482 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 483 STATUS_VLSB_FAULTBLOCK_MASK; 484 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", 485 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 486 faultblock); 487 if (ec != NULL) 488 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); 489 490 /* TODO Could format the desc nicely here */ 491 idx = desc - qp->desc_ring; 492 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx, 493 (const void *)desc, " "); 494 495 /* 496 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, 497 * Zlib Decompress status may be interesting. 498 */ 499 500 while (true) { 501 /* Keep unused descriptors zero for next use. */ 502 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); 503 504 cctx = &qp->completions_ring[idx]; 505 506 /* 507 * Restart procedure described in § 14.2.5. Could be used by HoC if we 508 * used that. 509 * 510 * Advance HEAD_LO past bad descriptor + any remaining in 511 * transaction manually, then restart queue. 512 */ 513 idx = (idx + 1) % (1 << sc->ring_size_order); 514 515 /* Callback function signals end of transaction */ 516 if (cctx->callback_fn != NULL) { 517 if (ec == NULL) 518 errno = EIO; 519 else 520 errno = ec->ce_errno; 521 /* TODO More specific error code */ 522 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); 523 cctx->callback_fn = NULL; 524 break; 525 } 526 } 527 528 qp->cq_head = idx; 529 qp->cq_waiting = false; 530 wakeup(&qp->cq_tail); 531 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 532 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, 533 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); 534 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); 535 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__); 536 } 537 538 static void 539 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) 540 { 541 struct ccp_completion_ctx *cctx; 542 struct ccp_softc *sc; 543 const struct ccp_desc *desc; 544 uint32_t headlo, idx; 545 unsigned q, completed; 546 547 sc = qp->cq_softc; 548 q = qp->cq_qindex; 549 550 mtx_lock(&qp->cq_lock); 551 552 /* 553 * Hardware HEAD_LO points to the first incomplete descriptor. Process 554 * any submitted and completed descriptors, up to but not including 555 * HEAD_LO. 556 */ 557 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 558 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 559 560 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, 561 qp->cq_head); 562 completed = 0; 563 while (qp->cq_head != idx) { 564 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head); 565 566 cctx = &qp->completions_ring[qp->cq_head]; 567 if (cctx->callback_fn != NULL) { 568 cctx->callback_fn(qp, cctx->session, 569 cctx->callback_arg, 0); 570 cctx->callback_fn = NULL; 571 } 572 573 /* Keep unused descriptors zero for next use. */ 574 memset(&qp->desc_ring[qp->cq_head], 0, 575 sizeof(qp->desc_ring[qp->cq_head])); 576 577 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); 578 completed++; 579 } 580 if (completed > 0) { 581 qp->cq_waiting = false; 582 wakeup(&qp->cq_tail); 583 } 584 585 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head); 586 587 /* 588 * Desc points to the first incomplete descriptor, at the time we read 589 * HEAD_LO. If there was an error flagged in interrupt status, the HW 590 * will not proceed past the erroneous descriptor by itself. 591 */ 592 desc = &qp->desc_ring[idx]; 593 if ((ints & INT_ERROR) != 0) 594 ccp_intr_handle_error(qp, desc); 595 596 mtx_unlock(&qp->cq_lock); 597 } 598 599 static void 600 ccp_intr_handler(void *arg) 601 { 602 struct ccp_softc *sc = arg; 603 size_t i; 604 uint32_t ints; 605 606 DPRINTF(sc->dev, "%s: interrupt\n", __func__); 607 608 /* 609 * We get one global interrupt per PCI device, shared over all of 610 * its queues. Scan each valid queue on interrupt for flags indicating 611 * activity. 612 */ 613 for (i = 0; i < nitems(sc->queues); i++) { 614 if ((sc->valid_queues & (1 << i)) == 0) 615 continue; 616 617 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); 618 if (ints == 0) 619 continue; 620 621 #if 0 622 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__, 623 (unsigned)ints, i); 624 #endif 625 /* Write back 1s to clear interrupt status bits. */ 626 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); 627 628 /* 629 * If there was an error, we still need to run completions on 630 * any descriptors prior to the error. The completions handler 631 * invoked below will also handle the error descriptor. 632 */ 633 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) 634 ccp_intr_run_completions(&sc->queues[i], ints); 635 636 if ((ints & INT_QUEUE_STOPPED) != 0) 637 device_printf(sc->dev, "%s: queue %zu stopped\n", 638 __func__, i); 639 } 640 641 /* Re-enable interrupts after processing */ 642 for (i = 0; i < nitems(sc->queues); i++) { 643 if ((sc->valid_queues & (1 << i)) == 0) 644 continue; 645 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 646 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); 647 } 648 } 649 650 static int 651 ccp_intr_filter(void *arg) 652 { 653 struct ccp_softc *sc = arg; 654 size_t i; 655 656 /* TODO: Split individual queues into separate taskqueues? */ 657 for (i = 0; i < nitems(sc->queues); i++) { 658 if ((sc->valid_queues & (1 << i)) == 0) 659 continue; 660 661 /* Mask interrupt until task completes */ 662 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); 663 } 664 665 return (FILTER_SCHEDULE_THREAD); 666 } 667 668 static int 669 ccp_setup_interrupts(struct ccp_softc *sc) 670 { 671 uint32_t nvec; 672 int rid, error, n, ridcopy; 673 674 n = pci_msix_count(sc->dev); 675 if (n < 1) { 676 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n); 677 return (ENXIO); 678 } 679 680 nvec = n; 681 error = pci_alloc_msix(sc->dev, &nvec); 682 if (error != 0) { 683 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__, 684 error); 685 return (error); 686 } 687 if (nvec < 1) { 688 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n", 689 __func__); 690 return (ENXIO); 691 } 692 if (nvec > nitems(sc->intr_res)) { 693 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__, 694 nvec); 695 nvec = nitems(sc->intr_res); 696 } 697 698 for (rid = 1; rid < 1 + nvec; rid++) { 699 ridcopy = rid; 700 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, 701 SYS_RES_IRQ, &ridcopy, RF_ACTIVE); 702 if (sc->intr_res[rid - 1] == NULL) { 703 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", 704 __func__); 705 return (ENXIO); 706 } 707 708 sc->intr_tag[rid - 1] = NULL; 709 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], 710 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, 711 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); 712 if (error != 0) 713 device_printf(sc->dev, "%s: setup_intr: %d\n", 714 __func__, error); 715 } 716 sc->intr_count = nvec; 717 718 return (error); 719 } 720 721 static void 722 ccp_release_interrupts(struct ccp_softc *sc) 723 { 724 unsigned i; 725 726 for (i = 0; i < sc->intr_count; i++) { 727 if (sc->intr_tag[i] != NULL) 728 bus_teardown_intr(sc->dev, sc->intr_res[i], 729 sc->intr_tag[i]); 730 if (sc->intr_res[i] != NULL) 731 bus_release_resource(sc->dev, SYS_RES_IRQ, 732 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); 733 } 734 735 pci_release_msi(sc->dev); 736 } 737 738 int 739 ccp_hw_attach(device_t dev) 740 { 741 struct ccp_softc *sc; 742 uint64_t lsbmask; 743 uint32_t version, lsbmasklo, lsbmaskhi; 744 unsigned queue_idx, j; 745 int error; 746 bool bars_mapped, interrupts_setup; 747 748 queue_idx = 0; 749 bars_mapped = interrupts_setup = false; 750 sc = device_get_softc(dev); 751 752 error = ccp_map_pci_bar(dev); 753 if (error != 0) { 754 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__); 755 goto out; 756 } 757 bars_mapped = true; 758 759 error = pci_enable_busmaster(dev); 760 if (error != 0) { 761 device_printf(dev, "%s: couldn't enable busmaster\n", 762 __func__); 763 goto out; 764 } 765 766 sc->ring_size_order = g_ccp_ring_order; 767 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { 768 device_printf(dev, "bogus hw.ccp.ring_order\n"); 769 error = EINVAL; 770 goto out; 771 } 772 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); 773 774 version = ccp_read_4(sc, VERSION_REG); 775 if ((version & VERSION_NUM_MASK) < 5) { 776 device_printf(dev, 777 "driver supports version 5 and later hardware\n"); 778 error = ENXIO; 779 goto out; 780 } 781 782 error = ccp_setup_interrupts(sc); 783 if (error != 0) 784 goto out; 785 interrupts_setup = true; 786 787 sc->hw_version = version & VERSION_NUM_MASK; 788 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & 789 VERSION_NUMVQM_MASK; 790 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & 791 VERSION_LSBSIZE_MASK; 792 sc->hw_features = version & VERSION_CAP_MASK; 793 794 /* 795 * Copy private LSB mask to public registers to enable access to LSB 796 * from all queues allowed by BIOS. 797 */ 798 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); 799 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); 800 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); 801 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); 802 803 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; 804 805 for (; queue_idx < nitems(sc->queues); queue_idx++) { 806 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); 807 if (error != 0) { 808 device_printf(dev, "%s: couldn't attach queue %u\n", 809 __func__, queue_idx); 810 goto out; 811 } 812 } 813 ccp_assign_lsb_regions(sc, lsbmask); 814 815 out: 816 if (error != 0) { 817 if (interrupts_setup) 818 ccp_release_interrupts(sc); 819 for (j = 0; j < queue_idx; j++) 820 ccp_hw_detach_queue(dev, j); 821 if (sc->ring_size_order != 0) 822 pci_disable_busmaster(dev); 823 if (bars_mapped) 824 ccp_unmap_pci_bar(dev); 825 } 826 return (error); 827 } 828 829 void 830 ccp_hw_detach(device_t dev) 831 { 832 struct ccp_softc *sc; 833 unsigned i; 834 835 sc = device_get_softc(dev); 836 837 for (i = 0; i < nitems(sc->queues); i++) 838 ccp_hw_detach_queue(dev, i); 839 840 ccp_release_interrupts(sc); 841 pci_disable_busmaster(dev); 842 ccp_unmap_pci_bar(dev); 843 } 844 845 static int __must_check 846 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, 847 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, 848 bus_size_t len, enum ccp_passthru_byteswap swapmode, 849 enum ccp_passthru_bitwise bitmode, bool interrupt, 850 const struct ccp_completion_ctx *cctx) 851 { 852 struct ccp_desc *desc; 853 854 if (ccp_queue_get_ring_space(qp) == 0) 855 return (EAGAIN); 856 857 desc = &qp->desc_ring[qp->cq_tail]; 858 859 memset(desc, 0, sizeof(*desc)); 860 desc->engine = CCP_ENGINE_PASSTHRU; 861 862 desc->pt.ioc = interrupt; 863 desc->pt.byteswap = swapmode; 864 desc->pt.bitwise = bitmode; 865 desc->length = len; 866 867 desc->src_lo = (uint32_t)src; 868 desc->src_hi = src >> 32; 869 desc->src_mem = src_type; 870 871 desc->dst_lo = (uint32_t)dst; 872 desc->dst_hi = dst >> 32; 873 desc->dst_mem = dst_type; 874 875 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) 876 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); 877 878 if (cctx != NULL) 879 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); 880 881 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 882 return (0); 883 } 884 885 static int __must_check 886 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, 887 struct sglist *sgl, bus_size_t len, bool interrupt, 888 const struct ccp_completion_ctx *cctx) 889 { 890 struct sglist_seg *seg; 891 size_t i, remain, nb; 892 int error; 893 894 remain = len; 895 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { 896 seg = &sgl->sg_segs[i]; 897 /* crd_len is int, so 32-bit min() is ok. */ 898 nb = min(remain, seg->ss_len); 899 900 if (tolsb) 901 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, 902 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, 903 CCP_PASSTHRU_BYTESWAP_NOOP, 904 CCP_PASSTHRU_BITWISE_NOOP, 905 (nb == remain) && interrupt, cctx); 906 else 907 error = ccp_passthrough(qp, seg->ss_paddr, 908 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, 909 CCP_PASSTHRU_BYTESWAP_NOOP, 910 CCP_PASSTHRU_BITWISE_NOOP, 911 (nb == remain) && interrupt, cctx); 912 if (error != 0) 913 return (error); 914 915 remain -= nb; 916 } 917 return (0); 918 } 919 920 /* 921 * Note that these vectors are in reverse of the usual order. 922 */ 923 const struct SHA_vectors { 924 uint32_t SHA1[8]; 925 uint32_t SHA224[8]; 926 uint32_t SHA256[8]; 927 uint64_t SHA384[8]; 928 uint64_t SHA512[8]; 929 } SHA_H __aligned(PAGE_SIZE) = { 930 .SHA1 = { 931 0xc3d2e1f0ul, 932 0x10325476ul, 933 0x98badcfeul, 934 0xefcdab89ul, 935 0x67452301ul, 936 0, 937 0, 938 0, 939 }, 940 .SHA224 = { 941 0xbefa4fa4ul, 942 0x64f98fa7ul, 943 0x68581511ul, 944 0xffc00b31ul, 945 0xf70e5939ul, 946 0x3070dd17ul, 947 0x367cd507ul, 948 0xc1059ed8ul, 949 }, 950 .SHA256 = { 951 0x5be0cd19ul, 952 0x1f83d9abul, 953 0x9b05688cul, 954 0x510e527ful, 955 0xa54ff53aul, 956 0x3c6ef372ul, 957 0xbb67ae85ul, 958 0x6a09e667ul, 959 }, 960 .SHA384 = { 961 0x47b5481dbefa4fa4ull, 962 0xdb0c2e0d64f98fa7ull, 963 0x8eb44a8768581511ull, 964 0x67332667ffc00b31ull, 965 0x152fecd8f70e5939ull, 966 0x9159015a3070dd17ull, 967 0x629a292a367cd507ull, 968 0xcbbb9d5dc1059ed8ull, 969 }, 970 .SHA512 = { 971 0x5be0cd19137e2179ull, 972 0x1f83d9abfb41bd6bull, 973 0x9b05688c2b3e6c1full, 974 0x510e527fade682d1ull, 975 0xa54ff53a5f1d36f1ull, 976 0x3c6ef372fe94f82bull, 977 0xbb67ae8584caa73bull, 978 0x6a09e667f3bcc908ull, 979 }, 980 }; 981 /* 982 * Ensure vectors do not cross a page boundary. 983 * 984 * Disabled due to a new Clang error: "expression is not an integral constant 985 * expression." GCC (cross toolchain) seems to handle this assertion with 986 * _Static_assert just fine. 987 */ 988 #if 0 989 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); 990 #endif 991 992 const struct SHA_Defn { 993 enum sha_version version; 994 const void *H_vectors; 995 size_t H_size; 996 struct auth_hash *axf; 997 enum ccp_sha_type engine_type; 998 } SHA_definitions[] = { 999 { 1000 .version = SHA1, 1001 .H_vectors = SHA_H.SHA1, 1002 .H_size = sizeof(SHA_H.SHA1), 1003 .axf = &auth_hash_hmac_sha1, 1004 .engine_type = CCP_SHA_TYPE_1, 1005 }, 1006 #if 0 1007 { 1008 .version = SHA2_224, 1009 .H_vectors = SHA_H.SHA224, 1010 .H_size = sizeof(SHA_H.SHA224), 1011 .axf = &auth_hash_hmac_sha2_224, 1012 .engine_type = CCP_SHA_TYPE_224, 1013 }, 1014 #endif 1015 { 1016 .version = SHA2_256, 1017 .H_vectors = SHA_H.SHA256, 1018 .H_size = sizeof(SHA_H.SHA256), 1019 .axf = &auth_hash_hmac_sha2_256, 1020 .engine_type = CCP_SHA_TYPE_256, 1021 }, 1022 { 1023 .version = SHA2_384, 1024 .H_vectors = SHA_H.SHA384, 1025 .H_size = sizeof(SHA_H.SHA384), 1026 .axf = &auth_hash_hmac_sha2_384, 1027 .engine_type = CCP_SHA_TYPE_384, 1028 }, 1029 { 1030 .version = SHA2_512, 1031 .H_vectors = SHA_H.SHA512, 1032 .H_size = sizeof(SHA_H.SHA512), 1033 .axf = &auth_hash_hmac_sha2_512, 1034 .engine_type = CCP_SHA_TYPE_512, 1035 }, 1036 }; 1037 1038 static int __must_check 1039 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, 1040 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) 1041 { 1042 struct ccp_desc *desc; 1043 1044 if (ccp_queue_get_ring_space(qp) == 0) 1045 return (EAGAIN); 1046 1047 desc = &qp->desc_ring[qp->cq_tail]; 1048 1049 memset(desc, 0, sizeof(*desc)); 1050 desc->engine = CCP_ENGINE_SHA; 1051 desc->som = start; 1052 desc->eom = end; 1053 1054 desc->sha.type = defn->engine_type; 1055 desc->length = len; 1056 1057 if (end) { 1058 desc->sha_len_lo = (uint32_t)msgbits; 1059 desc->sha_len_hi = msgbits >> 32; 1060 } 1061 1062 desc->src_lo = (uint32_t)addr; 1063 desc->src_hi = addr >> 32; 1064 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1065 1066 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); 1067 1068 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); 1069 return (0); 1070 } 1071 1072 static int __must_check 1073 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, 1074 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) 1075 { 1076 const struct SHA_Defn *defn; 1077 struct sglist_seg *seg; 1078 size_t i, msgsize, remaining, nb; 1079 uint32_t lsbaddr; 1080 int error; 1081 1082 for (i = 0; i < nitems(SHA_definitions); i++) 1083 if (SHA_definitions[i].version == version) 1084 break; 1085 if (i == nitems(SHA_definitions)) 1086 return (EINVAL); 1087 defn = &SHA_definitions[i]; 1088 1089 /* XXX validate input ??? */ 1090 1091 /* Load initial SHA state into LSB */ 1092 /* XXX ensure H_vectors don't span page boundaries */ 1093 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), 1094 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), 1095 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), 1096 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, 1097 NULL); 1098 if (error != 0) 1099 return (error); 1100 1101 /* Execute series of SHA updates on correctly sized buffers */ 1102 msgsize = 0; 1103 for (i = 0; i < sgl_src->sg_nseg; i++) { 1104 seg = &sgl_src->sg_segs[i]; 1105 msgsize += seg->ss_len; 1106 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, 1107 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, 1108 msgsize << 3); 1109 if (error != 0) 1110 return (error); 1111 } 1112 1113 /* Copy result out to sgl_dst */ 1114 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); 1115 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); 1116 for (i = 0; i < sgl_dst->sg_nseg; i++) { 1117 seg = &sgl_dst->sg_segs[i]; 1118 /* crd_len is int, so 32-bit min() is ok. */ 1119 nb = min(remaining, seg->ss_len); 1120 1121 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, 1122 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, 1123 CCP_PASSTHRU_BITWISE_NOOP, 1124 (cctx != NULL) ? (nb == remaining) : false, 1125 (nb == remaining) ? cctx : NULL); 1126 if (error != 0) 1127 return (error); 1128 1129 remaining -= nb; 1130 lsbaddr += nb; 1131 if (remaining == 0) 1132 break; 1133 } 1134 1135 return (0); 1136 } 1137 1138 static void 1139 byteswap256(uint64_t *buffer) 1140 { 1141 uint64_t t; 1142 1143 t = bswap64(buffer[3]); 1144 buffer[3] = bswap64(buffer[0]); 1145 buffer[0] = t; 1146 1147 t = bswap64(buffer[2]); 1148 buffer[2] = bswap64(buffer[1]); 1149 buffer[1] = t; 1150 } 1151 1152 /* 1153 * Translate CCP internal LSB hash format into a standard hash ouput. 1154 * 1155 * Manipulates input buffer with byteswap256 operation. 1156 */ 1157 static void 1158 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) 1159 { 1160 const struct SHA_Defn *defn; 1161 size_t i; 1162 1163 for (i = 0; i < nitems(SHA_definitions); i++) 1164 if (SHA_definitions[i].version == version) 1165 break; 1166 if (i == nitems(SHA_definitions)) 1167 panic("bogus sha version auth_mode %u\n", (unsigned)version); 1168 1169 defn = &SHA_definitions[i]; 1170 1171 /* Swap 256bit manually -- DMA engine can, but with limitations */ 1172 byteswap256((void *)buffer); 1173 if (defn->axf->hashsize > LSB_ENTRY_SIZE) 1174 byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); 1175 1176 switch (defn->version) { 1177 case SHA1: 1178 memcpy(output, buffer + 12, defn->axf->hashsize); 1179 break; 1180 #if 0 1181 case SHA2_224: 1182 memcpy(output, buffer + XXX, defn->axf->hashsize); 1183 break; 1184 #endif 1185 case SHA2_256: 1186 memcpy(output, buffer, defn->axf->hashsize); 1187 break; 1188 case SHA2_384: 1189 memcpy(output, 1190 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, 1191 defn->axf->hashsize - LSB_ENTRY_SIZE); 1192 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, 1193 LSB_ENTRY_SIZE); 1194 break; 1195 case SHA2_512: 1196 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); 1197 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); 1198 break; 1199 } 1200 } 1201 1202 static void 1203 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, 1204 struct cryptop *crp, struct cryptodesc *crd, int error) 1205 { 1206 char ihash[SHA2_512_HASH_LEN /* max hash len */]; 1207 union authctx auth_ctx; 1208 struct auth_hash *axf; 1209 1210 axf = s->hmac.auth_hash; 1211 1212 s->pending--; 1213 1214 if (error != 0) { 1215 crp->crp_etype = error; 1216 goto out; 1217 } 1218 1219 /* Do remaining outer hash over small inner hash in software */ 1220 axf->Init(&auth_ctx); 1221 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); 1222 ccp_sha_copy_result(ihash, s->hmac.ipad, s->hmac.auth_mode); 1223 #if 0 1224 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__, 1225 (u_char *)ihash, " "); 1226 #endif 1227 axf->Update(&auth_ctx, ihash, axf->hashsize); 1228 axf->Final(s->hmac.ipad, &auth_ctx); 1229 1230 crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, 1231 s->hmac.hash_len, s->hmac.ipad); 1232 1233 /* Avoid leaking key material */ 1234 explicit_bzero(&auth_ctx, sizeof(auth_ctx)); 1235 explicit_bzero(s->hmac.ipad, sizeof(s->hmac.ipad)); 1236 explicit_bzero(s->hmac.opad, sizeof(s->hmac.opad)); 1237 1238 out: 1239 crypto_done(crp); 1240 } 1241 1242 static void 1243 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1244 int error) 1245 { 1246 struct cryptodesc *crd; 1247 struct cryptop *crp; 1248 1249 crp = vcrp; 1250 crd = crp->crp_desc; 1251 ccp_do_hmac_done(qp, s, crp, crd, error); 1252 } 1253 1254 static int __must_check 1255 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1256 struct cryptodesc *crd, const struct ccp_completion_ctx *cctx) 1257 { 1258 device_t dev; 1259 struct auth_hash *axf; 1260 int error; 1261 1262 dev = qp->cq_softc->dev; 1263 axf = s->hmac.auth_hash; 1264 1265 /* 1266 * Populate the SGL describing inside hash contents. We want to hash 1267 * the ipad (key XOR fixed bit pattern) concatenated with the user 1268 * data. 1269 */ 1270 sglist_reset(qp->cq_sg_ulptx); 1271 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); 1272 if (error != 0) 1273 return (error); 1274 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1275 crd->crd_skip, crd->crd_len); 1276 if (error != 0) { 1277 DPRINTF(dev, "%s: sglist too short\n", __func__); 1278 return (error); 1279 } 1280 /* Populate SGL for output -- just reuse hmac.ipad buffer. */ 1281 sglist_reset(qp->cq_sg_dst); 1282 error = sglist_append(qp->cq_sg_dst, s->hmac.ipad, 1283 roundup2(axf->hashsize, LSB_ENTRY_SIZE)); 1284 if (error != 0) 1285 return (error); 1286 1287 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, 1288 cctx); 1289 if (error != 0) { 1290 DPRINTF(dev, "%s: ccp_sha error\n", __func__); 1291 return (error); 1292 } 1293 return (0); 1294 } 1295 1296 int __must_check 1297 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1298 { 1299 struct ccp_completion_ctx ctx; 1300 struct cryptodesc *crd; 1301 1302 crd = crp->crp_desc; 1303 1304 ctx.callback_fn = ccp_hmac_done; 1305 ctx.callback_arg = crp; 1306 ctx.session = s; 1307 1308 return (ccp_do_hmac(qp, s, crp, crd, &ctx)); 1309 } 1310 1311 static void 1312 ccp_byteswap(char *data, size_t len) 1313 { 1314 size_t i; 1315 char t; 1316 1317 len--; 1318 for (i = 0; i < len; i++, len--) { 1319 t = data[i]; 1320 data[i] = data[len]; 1321 data[len] = t; 1322 } 1323 } 1324 1325 static void 1326 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1327 int error) 1328 { 1329 struct cryptop *crp; 1330 1331 explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); 1332 1333 crp = vcrp; 1334 1335 s->pending--; 1336 1337 if (error != 0) 1338 crp->crp_etype = error; 1339 1340 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp); 1341 crypto_done(crp); 1342 } 1343 1344 static void 1345 ccp_collect_iv(struct ccp_session *s, struct cryptop *crp, 1346 struct cryptodesc *crd) 1347 { 1348 1349 if (crd->crd_flags & CRD_F_ENCRYPT) { 1350 if (crd->crd_flags & CRD_F_IV_EXPLICIT) 1351 memcpy(s->blkcipher.iv, crd->crd_iv, 1352 s->blkcipher.iv_len); 1353 else 1354 arc4rand(s->blkcipher.iv, s->blkcipher.iv_len, 0); 1355 if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) 1356 crypto_copyback(crp->crp_flags, crp->crp_buf, 1357 crd->crd_inject, s->blkcipher.iv_len, 1358 s->blkcipher.iv); 1359 } else { 1360 if (crd->crd_flags & CRD_F_IV_EXPLICIT) 1361 memcpy(s->blkcipher.iv, crd->crd_iv, 1362 s->blkcipher.iv_len); 1363 else 1364 crypto_copydata(crp->crp_flags, crp->crp_buf, 1365 crd->crd_inject, s->blkcipher.iv_len, 1366 s->blkcipher.iv); 1367 } 1368 1369 /* 1370 * If the input IV is 12 bytes, append an explicit counter of 1. 1371 */ 1372 if (crd->crd_alg == CRYPTO_AES_NIST_GCM_16 && 1373 s->blkcipher.iv_len == 12) { 1374 *(uint32_t *)&s->blkcipher.iv[12] = htobe32(1); 1375 s->blkcipher.iv_len = AES_BLOCK_LEN; 1376 } 1377 1378 if (crd->crd_alg == CRYPTO_AES_XTS && s->blkcipher.iv_len != AES_BLOCK_LEN) { 1379 DPRINTF(NULL, "got ivlen != 16: %u\n", s->blkcipher.iv_len); 1380 if (s->blkcipher.iv_len < AES_BLOCK_LEN) 1381 memset(&s->blkcipher.iv[s->blkcipher.iv_len], 0, 1382 AES_BLOCK_LEN - s->blkcipher.iv_len); 1383 s->blkcipher.iv_len = AES_BLOCK_LEN; 1384 } 1385 1386 /* Reverse order of IV material for HW */ 1387 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, 1388 s->blkcipher.iv, " ", s->blkcipher.iv_len); 1389 1390 /* 1391 * For unknown reasons, XTS mode expects the IV in the reverse byte 1392 * order to every other AES mode. 1393 */ 1394 if (crd->crd_alg != CRYPTO_AES_XTS) 1395 ccp_byteswap(s->blkcipher.iv, s->blkcipher.iv_len); 1396 } 1397 1398 static int __must_check 1399 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, 1400 size_t len) 1401 { 1402 int error; 1403 1404 sglist_reset(qp->cq_sg_ulptx); 1405 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); 1406 if (error != 0) 1407 return (error); 1408 1409 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, 1410 false, NULL); 1411 return (error); 1412 } 1413 1414 static int __must_check 1415 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1416 struct cryptodesc *crd, enum ccp_cipher_dir dir, 1417 const struct ccp_completion_ctx *cctx) 1418 { 1419 struct ccp_desc *desc; 1420 device_t dev; 1421 unsigned i; 1422 enum ccp_xts_unitsize usize; 1423 1424 /* IV and Key data are already loaded */ 1425 1426 dev = qp->cq_softc->dev; 1427 1428 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++) 1429 if (ccp_xts_unitsize_map[i].cxu_size == crd->crd_len) { 1430 usize = ccp_xts_unitsize_map[i].cxu_id; 1431 break; 1432 } 1433 if (i >= nitems(ccp_xts_unitsize_map)) 1434 return (EINVAL); 1435 1436 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1437 struct sglist_seg *seg; 1438 1439 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1440 1441 desc = &qp->desc_ring[qp->cq_tail]; 1442 desc->engine = CCP_ENGINE_XTS_AES; 1443 desc->som = (i == 0); 1444 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1445 desc->ioc = (desc->eom && cctx != NULL); 1446 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n", 1447 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1448 (int)desc->ioc, (int)dir); 1449 1450 if (desc->ioc) 1451 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1452 sizeof(*cctx)); 1453 1454 desc->aes_xts.encrypt = dir; 1455 desc->aes_xts.type = s->blkcipher.cipher_type; 1456 desc->aes_xts.size = usize; 1457 1458 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__, 1459 qp->cq_tail, (unsigned)desc->aes_xts.type, 1460 (unsigned)desc->aes_xts.size); 1461 1462 desc->length = seg->ss_len; 1463 desc->src_lo = (uint32_t)seg->ss_paddr; 1464 desc->src_hi = (seg->ss_paddr >> 32); 1465 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1466 1467 /* Crypt in-place */ 1468 desc->dst_lo = desc->src_lo; 1469 desc->dst_hi = desc->src_hi; 1470 desc->dst_mem = desc->src_mem; 1471 1472 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1473 desc->key_hi = 0; 1474 desc->key_mem = CCP_MEMTYPE_SB; 1475 1476 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1477 1478 qp->cq_tail = (qp->cq_tail + 1) % 1479 (1 << qp->cq_softc->ring_size_order); 1480 } 1481 return (0); 1482 } 1483 1484 static int __must_check 1485 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, 1486 struct cryptop *crp, struct cryptodesc *crd, 1487 const struct ccp_completion_ctx *cctx) 1488 { 1489 struct ccp_desc *desc; 1490 char *keydata; 1491 device_t dev; 1492 enum ccp_cipher_dir dir; 1493 int error; 1494 size_t keydata_len; 1495 unsigned i, j; 1496 1497 dev = qp->cq_softc->dev; 1498 1499 if (s->blkcipher.key_len == 0 || crd->crd_len == 0) { 1500 DPRINTF(dev, "%s: empty\n", __func__); 1501 return (EINVAL); 1502 } 1503 if ((crd->crd_len % AES_BLOCK_LEN) != 0) { 1504 DPRINTF(dev, "%s: len modulo: %d\n", __func__, crd->crd_len); 1505 return (EINVAL); 1506 } 1507 1508 /* 1509 * Individual segments must be multiples of AES block size for the HW 1510 * to process it. Non-compliant inputs aren't bogus, just not doable 1511 * on this hardware. 1512 */ 1513 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) 1514 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1515 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1516 qp->cq_sg_crp->sg_segs[i].ss_len); 1517 return (EINVAL); 1518 } 1519 1520 /* Gather IV/nonce data */ 1521 ccp_collect_iv(s, crp, crd); 1522 1523 if ((crd->crd_flags & CRD_F_ENCRYPT) != 0) 1524 dir = CCP_CIPHER_DIR_ENCRYPT; 1525 else 1526 dir = CCP_CIPHER_DIR_DECRYPT; 1527 1528 /* Set up passthrough op(s) to copy IV into LSB */ 1529 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1530 s->blkcipher.iv, s->blkcipher.iv_len); 1531 if (error != 0) 1532 return (error); 1533 1534 /* 1535 * Initialize keydata and keydata_len for GCC. The default case of the 1536 * following switch is impossible to reach, but GCC doesn't know that. 1537 */ 1538 keydata_len = 0; 1539 keydata = NULL; 1540 1541 switch (crd->crd_alg) { 1542 case CRYPTO_AES_XTS: 1543 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++) 1544 if (ccp_xts_unitsize_map[j].cxu_size == crd->crd_len) 1545 break; 1546 /* Input buffer must be a supported UnitSize */ 1547 if (j >= nitems(ccp_xts_unitsize_map)) { 1548 device_printf(dev, "%s: rejected block size: %u\n", 1549 __func__, crd->crd_len); 1550 return (EOPNOTSUPP); 1551 } 1552 /* FALLTHROUGH */ 1553 case CRYPTO_AES_CBC: 1554 case CRYPTO_AES_ICM: 1555 keydata = s->blkcipher.enckey; 1556 keydata_len = s->blkcipher.key_len; 1557 break; 1558 } 1559 1560 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len, 1561 keydata, " "); 1562 if (crd->crd_alg == CRYPTO_AES_XTS) 1563 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " "); 1564 1565 /* Reverse order of key material for HW */ 1566 ccp_byteswap(keydata, keydata_len); 1567 1568 /* Store key material into LSB to avoid page boundaries */ 1569 if (crd->crd_alg == CRYPTO_AES_XTS) { 1570 /* 1571 * XTS mode uses 2 256-bit vectors for the primary key and the 1572 * tweak key. For 128-bit keys, the vectors are zero-padded. 1573 * 1574 * After byteswapping the combined OCF-provided K1:K2 vector 1575 * above, we need to reverse the order again so the hardware 1576 * gets the swapped keys in the order K1':K2'. 1577 */ 1578 error = ccp_do_pst_to_lsb(qp, 1579 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata, 1580 keydata_len / 2); 1581 if (error != 0) 1582 return (error); 1583 error = ccp_do_pst_to_lsb(qp, 1584 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1585 keydata + (keydata_len / 2), keydata_len / 2); 1586 1587 /* Zero-pad 128 bit keys */ 1588 if (keydata_len == 32) { 1589 if (error != 0) 1590 return (error); 1591 error = ccp_do_pst_to_lsb(qp, 1592 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) + 1593 keydata_len / 2, g_zeroes, keydata_len / 2); 1594 if (error != 0) 1595 return (error); 1596 error = ccp_do_pst_to_lsb(qp, 1597 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) + 1598 keydata_len / 2, g_zeroes, keydata_len / 2); 1599 } 1600 } else 1601 error = ccp_do_pst_to_lsb(qp, 1602 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata, 1603 keydata_len); 1604 if (error != 0) 1605 return (error); 1606 1607 /* 1608 * Point SGLs at the subset of cryptop buffer contents representing the 1609 * data. 1610 */ 1611 sglist_reset(qp->cq_sg_ulptx); 1612 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1613 crd->crd_skip, crd->crd_len); 1614 if (error != 0) 1615 return (error); 1616 1617 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__, 1618 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); 1619 1620 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail); 1621 1622 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1623 return (EAGAIN); 1624 1625 if (crd->crd_alg == CRYPTO_AES_XTS) 1626 return (ccp_do_xts(qp, s, crp, crd, dir, cctx)); 1627 1628 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1629 struct sglist_seg *seg; 1630 1631 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1632 1633 desc = &qp->desc_ring[qp->cq_tail]; 1634 desc->engine = CCP_ENGINE_AES; 1635 desc->som = (i == 0); 1636 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); 1637 desc->ioc = (desc->eom && cctx != NULL); 1638 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", 1639 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom, 1640 (int)desc->ioc, (int)dir); 1641 1642 if (desc->ioc) 1643 memcpy(&qp->completions_ring[qp->cq_tail], cctx, 1644 sizeof(*cctx)); 1645 1646 desc->aes.encrypt = dir; 1647 desc->aes.mode = s->blkcipher.cipher_mode; 1648 desc->aes.type = s->blkcipher.cipher_type; 1649 if (crd->crd_alg == CRYPTO_AES_ICM) 1650 /* 1651 * Size of CTR value in bits, - 1. ICM mode uses all 1652 * 128 bits as counter. 1653 */ 1654 desc->aes.size = 127; 1655 1656 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__, 1657 qp->cq_tail, (unsigned)desc->aes.mode, 1658 (unsigned)desc->aes.type, (unsigned)desc->aes.size); 1659 1660 desc->length = seg->ss_len; 1661 desc->src_lo = (uint32_t)seg->ss_paddr; 1662 desc->src_hi = (seg->ss_paddr >> 32); 1663 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1664 1665 /* Crypt in-place */ 1666 desc->dst_lo = desc->src_lo; 1667 desc->dst_hi = desc->src_hi; 1668 desc->dst_mem = desc->src_mem; 1669 1670 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1671 desc->key_hi = 0; 1672 desc->key_mem = CCP_MEMTYPE_SB; 1673 1674 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1675 1676 qp->cq_tail = (qp->cq_tail + 1) % 1677 (1 << qp->cq_softc->ring_size_order); 1678 } 1679 return (0); 1680 } 1681 1682 int __must_check 1683 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) 1684 { 1685 struct ccp_completion_ctx ctx; 1686 struct cryptodesc *crd; 1687 1688 crd = crp->crp_desc; 1689 1690 ctx.callback_fn = ccp_blkcipher_done; 1691 ctx.session = s; 1692 ctx.callback_arg = crp; 1693 1694 return (ccp_do_blkcipher(qp, s, crp, crd, &ctx)); 1695 } 1696 1697 static void 1698 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1699 int error) 1700 { 1701 struct cryptodesc *crda; 1702 struct cryptop *crp; 1703 1704 explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); 1705 1706 crp = vcrp; 1707 if (s->cipher_first) 1708 crda = crp->crp_desc->crd_next; 1709 else 1710 crda = crp->crp_desc; 1711 1712 ccp_do_hmac_done(qp, s, crp, crda, error); 1713 } 1714 1715 int __must_check 1716 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1717 struct cryptodesc *crda, struct cryptodesc *crde) 1718 { 1719 struct ccp_completion_ctx ctx; 1720 int error; 1721 1722 ctx.callback_fn = ccp_authenc_done; 1723 ctx.session = s; 1724 ctx.callback_arg = crp; 1725 1726 /* Perform first operation */ 1727 if (s->cipher_first) 1728 error = ccp_do_blkcipher(qp, s, crp, crde, NULL); 1729 else 1730 error = ccp_do_hmac(qp, s, crp, crda, NULL); 1731 if (error != 0) 1732 return (error); 1733 1734 /* Perform second operation */ 1735 if (s->cipher_first) 1736 error = ccp_do_hmac(qp, s, crp, crda, &ctx); 1737 else 1738 error = ccp_do_blkcipher(qp, s, crp, crde, &ctx); 1739 return (error); 1740 } 1741 1742 static int __must_check 1743 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) 1744 { 1745 struct ccp_desc *desc; 1746 struct sglist_seg *seg; 1747 unsigned i; 1748 1749 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) 1750 return (EAGAIN); 1751 1752 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1753 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1754 1755 desc = &qp->desc_ring[qp->cq_tail]; 1756 1757 desc->engine = CCP_ENGINE_AES; 1758 desc->aes.mode = CCP_AES_MODE_GHASH; 1759 desc->aes.type = s->blkcipher.cipher_type; 1760 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; 1761 1762 desc->som = (i == 0); 1763 desc->length = seg->ss_len; 1764 1765 desc->src_lo = (uint32_t)seg->ss_paddr; 1766 desc->src_hi = (seg->ss_paddr >> 32); 1767 desc->src_mem = CCP_MEMTYPE_SYSTEM; 1768 1769 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1770 1771 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1772 desc->key_mem = CCP_MEMTYPE_SB; 1773 1774 qp->cq_tail = (qp->cq_tail + 1) % 1775 (1 << qp->cq_softc->ring_size_order); 1776 } 1777 return (0); 1778 } 1779 1780 static int __must_check 1781 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, 1782 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) 1783 { 1784 struct ccp_desc *desc; 1785 1786 if (ccp_queue_get_ring_space(qp) == 0) 1787 return (EAGAIN); 1788 1789 desc = &qp->desc_ring[qp->cq_tail]; 1790 1791 desc->engine = CCP_ENGINE_AES; 1792 desc->aes.mode = CCP_AES_MODE_GCTR; 1793 desc->aes.type = s->blkcipher.cipher_type; 1794 desc->aes.encrypt = dir; 1795 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; 1796 1797 desc->som = som; 1798 desc->eom = eom; 1799 1800 /* Trailing bytes will be masked off by aes.size above. */ 1801 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); 1802 1803 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; 1804 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; 1805 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; 1806 1807 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1808 1809 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1810 desc->key_mem = CCP_MEMTYPE_SB; 1811 1812 qp->cq_tail = (qp->cq_tail + 1) % 1813 (1 << qp->cq_softc->ring_size_order); 1814 return (0); 1815 } 1816 1817 static int __must_check 1818 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) 1819 { 1820 struct ccp_desc *desc; 1821 1822 if (ccp_queue_get_ring_space(qp) == 0) 1823 return (EAGAIN); 1824 1825 desc = &qp->desc_ring[qp->cq_tail]; 1826 1827 desc->engine = CCP_ENGINE_AES; 1828 desc->aes.mode = CCP_AES_MODE_GHASH; 1829 desc->aes.type = s->blkcipher.cipher_type; 1830 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; 1831 1832 desc->length = GMAC_BLOCK_LEN; 1833 1834 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); 1835 desc->src_mem = CCP_MEMTYPE_SB; 1836 1837 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); 1838 1839 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); 1840 desc->key_mem = CCP_MEMTYPE_SB; 1841 1842 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); 1843 desc->dst_mem = CCP_MEMTYPE_SB; 1844 1845 qp->cq_tail = (qp->cq_tail + 1) % 1846 (1 << qp->cq_softc->ring_size_order); 1847 return (0); 1848 } 1849 1850 static void 1851 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, 1852 int error) 1853 { 1854 char tag[GMAC_DIGEST_LEN]; 1855 struct cryptodesc *crde, *crda; 1856 struct cryptop *crp; 1857 1858 crp = vcrp; 1859 if (s->cipher_first) { 1860 crde = crp->crp_desc; 1861 crda = crp->crp_desc->crd_next; 1862 } else { 1863 crde = crp->crp_desc->crd_next; 1864 crda = crp->crp_desc; 1865 } 1866 1867 s->pending--; 1868 1869 if (error != 0) { 1870 crp->crp_etype = error; 1871 goto out; 1872 } 1873 1874 /* Encrypt is done. Decrypt needs to verify tag. */ 1875 if ((crde->crd_flags & CRD_F_ENCRYPT) != 0) 1876 goto out; 1877 1878 /* Copy in message tag. */ 1879 crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_inject, 1880 sizeof(tag), tag); 1881 1882 /* Verify tag against computed GMAC */ 1883 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) 1884 crp->crp_etype = EBADMSG; 1885 1886 out: 1887 explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); 1888 explicit_bzero(&s->gmac, sizeof(s->gmac)); 1889 crypto_done(crp); 1890 } 1891 1892 int __must_check 1893 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, 1894 struct cryptodesc *crda, struct cryptodesc *crde) 1895 { 1896 struct ccp_completion_ctx ctx; 1897 enum ccp_cipher_dir dir; 1898 device_t dev; 1899 unsigned i; 1900 int error; 1901 1902 if (s->blkcipher.key_len == 0) 1903 return (EINVAL); 1904 1905 /* 1906 * AAD is only permitted before the cipher/plain text, not 1907 * after. 1908 */ 1909 if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip) 1910 return (EINVAL); 1911 1912 dev = qp->cq_softc->dev; 1913 1914 if ((crde->crd_flags & CRD_F_ENCRYPT) != 0) 1915 dir = CCP_CIPHER_DIR_ENCRYPT; 1916 else 1917 dir = CCP_CIPHER_DIR_DECRYPT; 1918 1919 /* Zero initial GHASH portion of context */ 1920 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); 1921 1922 /* Gather IV data */ 1923 ccp_collect_iv(s, crp, crde); 1924 1925 /* Reverse order of key material for HW */ 1926 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); 1927 1928 /* Prepare input buffer of concatenated lengths for final GHASH */ 1929 be64enc(s->gmac.final_block, (uint64_t)crda->crd_len * 8); 1930 be64enc(&s->gmac.final_block[8], (uint64_t)crde->crd_len * 8); 1931 1932 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ 1933 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 1934 s->blkcipher.iv, 32); 1935 if (error != 0) 1936 return (error); 1937 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), 1938 s->blkcipher.enckey, s->blkcipher.key_len); 1939 if (error != 0) 1940 return (error); 1941 error = ccp_do_pst_to_lsb(qp, 1942 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, 1943 GMAC_BLOCK_LEN); 1944 if (error != 0) 1945 return (error); 1946 1947 /* First step - compute GHASH over AAD */ 1948 if (crda->crd_len != 0) { 1949 sglist_reset(qp->cq_sg_ulptx); 1950 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1951 crda->crd_skip, crda->crd_len); 1952 if (error != 0) 1953 return (error); 1954 1955 /* This engine cannot process non-block multiple AAD data. */ 1956 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1957 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % 1958 GMAC_BLOCK_LEN) != 0) { 1959 DPRINTF(dev, "%s: AD seg modulo: %zu\n", 1960 __func__, 1961 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1962 return (EINVAL); 1963 } 1964 1965 error = ccp_do_ghash_aad(qp, s); 1966 if (error != 0) 1967 return (error); 1968 } 1969 1970 /* Feed data piece by piece into GCTR */ 1971 sglist_reset(qp->cq_sg_ulptx); 1972 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 1973 crde->crd_skip, crde->crd_len); 1974 if (error != 0) 1975 return (error); 1976 1977 /* 1978 * All segments except the last must be even multiples of AES block 1979 * size for the HW to process it. Non-compliant inputs aren't bogus, 1980 * just not doable on this hardware. 1981 * 1982 * XXX: Well, the hardware will produce a valid tag for shorter final 1983 * segment inputs, but it will still write out a block-sized plaintext 1984 * or ciphertext chunk. For a typical CRP this tramples trailing data, 1985 * including the provided message tag. So, reject such inputs for now. 1986 */ 1987 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) 1988 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { 1989 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__, 1990 qp->cq_sg_ulptx->sg_segs[i].ss_len); 1991 return (EINVAL); 1992 } 1993 1994 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { 1995 struct sglist_seg *seg; 1996 1997 seg = &qp->cq_sg_ulptx->sg_segs[i]; 1998 error = ccp_do_gctr(qp, s, dir, seg, 1999 (i == 0 && crda->crd_len == 0), 2000 i == (qp->cq_sg_ulptx->sg_nseg - 1)); 2001 if (error != 0) 2002 return (error); 2003 } 2004 2005 /* Send just initial IV (not GHASH!) to LSB again */ 2006 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), 2007 s->blkcipher.iv, s->blkcipher.iv_len); 2008 if (error != 0) 2009 return (error); 2010 2011 ctx.callback_fn = ccp_gcm_done; 2012 ctx.session = s; 2013 ctx.callback_arg = crp; 2014 2015 /* Compute final hash and copy result back */ 2016 error = ccp_do_ghash_final(qp, s); 2017 if (error != 0) 2018 return (error); 2019 2020 /* When encrypting, copy computed tag out to caller buffer. */ 2021 sglist_reset(qp->cq_sg_ulptx); 2022 if (dir == CCP_CIPHER_DIR_ENCRYPT) 2023 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, 2024 crda->crd_inject, s->gmac.hash_len); 2025 else 2026 /* 2027 * For decrypting, copy the computed tag out to our session 2028 * buffer to verify in our callback. 2029 */ 2030 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, 2031 s->gmac.hash_len); 2032 if (error != 0) 2033 return (error); 2034 error = ccp_passthrough_sgl(qp, 2035 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, 2036 s->gmac.hash_len, true, &ctx); 2037 return (error); 2038 } 2039 2040 #define MAX_TRNG_RETRIES 10 2041 u_int 2042 random_ccp_read(void *v, u_int c) 2043 { 2044 uint32_t *buf; 2045 u_int i, j; 2046 2047 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); 2048 2049 buf = v; 2050 for (i = c; i > 0; i -= sizeof(*buf)) { 2051 for (j = 0; j < MAX_TRNG_RETRIES; j++) { 2052 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); 2053 if (*buf != 0) 2054 break; 2055 } 2056 if (j == MAX_TRNG_RETRIES) 2057 return (0); 2058 buf++; 2059 } 2060 return (c); 2061 2062 } 2063 2064 #ifdef DDB 2065 void 2066 db_ccp_show_hw(struct ccp_softc *sc) 2067 { 2068 2069 db_printf(" queue mask: 0x%x\n", 2070 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); 2071 db_printf(" queue prio: 0x%x\n", 2072 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); 2073 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); 2074 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); 2075 db_printf(" cmd timeout: 0x%x\n", 2076 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); 2077 db_printf(" lsb public mask lo: 0x%x\n", 2078 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); 2079 db_printf(" lsb public mask hi: 0x%x\n", 2080 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); 2081 db_printf(" lsb private mask lo: 0x%x\n", 2082 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); 2083 db_printf(" lsb private mask hi: 0x%x\n", 2084 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); 2085 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); 2086 } 2087 2088 void 2089 db_ccp_show_queue_hw(struct ccp_queue *qp) 2090 { 2091 const struct ccp_error_code *ec; 2092 struct ccp_softc *sc; 2093 uint32_t status, error, esource, faultblock, headlo, qcontrol; 2094 unsigned q, i; 2095 2096 sc = qp->cq_softc; 2097 q = qp->cq_qindex; 2098 2099 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); 2100 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, 2101 (qcontrol & CMD_Q_RUN) ? " RUN" : "", 2102 (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); 2103 db_printf(" tail_lo: 0x%x\n", 2104 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); 2105 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); 2106 db_printf(" head_lo: 0x%x\n", headlo); 2107 db_printf(" int enable: 0x%x\n", 2108 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); 2109 db_printf(" interrupt status: 0x%x\n", 2110 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); 2111 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); 2112 db_printf(" status: 0x%x\n", status); 2113 db_printf(" int stats: 0x%x\n", 2114 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); 2115 2116 error = status & STATUS_ERROR_MASK; 2117 if (error == 0) 2118 return; 2119 2120 esource = (status >> STATUS_ERRORSOURCE_SHIFT) & 2121 STATUS_ERRORSOURCE_MASK; 2122 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & 2123 STATUS_VLSB_FAULTBLOCK_MASK; 2124 2125 ec = NULL; 2126 for (i = 0; i < nitems(ccp_error_codes); i++) 2127 if (ccp_error_codes[i].ce_code == error) 2128 break; 2129 if (i < nitems(ccp_error_codes)) 2130 ec = &ccp_error_codes[i]; 2131 2132 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", 2133 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, 2134 faultblock); 2135 if (ec != NULL) 2136 db_printf(" Error description: %s\n", ec->ce_desc); 2137 2138 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; 2139 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, 2140 (void *)&qp->desc_ring[i], " "); 2141 } 2142 #endif 2143