1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/endian.h> 9 #include <sys/gsb_crc32.h> 10 #include <sys/queue.h> 11 #include <sys/socket.h> 12 #include <sys/uio.h> 13 #include <assert.h> 14 #include <errno.h> 15 #include <netdb.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <unistd.h> 20 21 #include "libnvmf.h" 22 #include "internal.h" 23 #include "nvmf_tcp.h" 24 25 struct nvmf_tcp_qpair; 26 27 struct nvmf_tcp_command_buffer { 28 struct nvmf_tcp_qpair *qp; 29 30 void *data; 31 size_t data_len; 32 size_t data_xfered; 33 uint32_t data_offset; 34 35 uint16_t cid; 36 uint16_t ttag; 37 38 LIST_ENTRY(nvmf_tcp_command_buffer) link; 39 }; 40 41 LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer); 42 43 struct nvmf_tcp_association { 44 struct nvmf_association na; 45 46 uint32_t ioccsz; 47 }; 48 49 struct nvmf_tcp_rxpdu { 50 struct nvme_tcp_common_pdu_hdr *hdr; 51 uint32_t data_len; 52 }; 53 54 struct nvmf_tcp_capsule { 55 struct nvmf_capsule nc; 56 57 struct nvmf_tcp_rxpdu rx_pdu; 58 struct nvmf_tcp_command_buffer *cb; 59 60 TAILQ_ENTRY(nvmf_tcp_capsule) link; 61 }; 62 63 struct nvmf_tcp_qpair { 64 struct nvmf_qpair qp; 65 int s; 66 67 uint8_t txpda; 68 uint8_t rxpda; 69 bool header_digests; 70 bool data_digests; 71 uint32_t maxr2t; 72 uint32_t maxh2cdata; 73 uint32_t max_icd; /* Host only */ 74 uint16_t next_ttag; /* Controller only */ 75 76 struct nvmf_tcp_command_buffer_list tx_buffers; 77 struct nvmf_tcp_command_buffer_list rx_buffers; 78 TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules; 79 }; 80 81 #define TASSOC(nc) ((struct nvmf_tcp_association *)(na)) 82 #define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc)) 83 #define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc)) 84 #define TQP(qp) ((struct nvmf_tcp_qpair *)(qp)) 85 86 static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET]; 87 88 static uint32_t 89 compute_digest(const void *buf, size_t len) 90 { 91 return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff); 92 } 93 94 static struct nvmf_tcp_command_buffer * 95 tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data, 96 uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag, 97 bool receive) 98 { 99 struct nvmf_tcp_command_buffer *cb; 100 101 cb = malloc(sizeof(*cb)); 102 cb->qp = qp; 103 cb->data = data; 104 cb->data_offset = data_offset; 105 cb->data_len = data_len; 106 cb->data_xfered = 0; 107 cb->cid = cid; 108 cb->ttag = ttag; 109 110 if (receive) 111 LIST_INSERT_HEAD(&qp->rx_buffers, cb, link); 112 else 113 LIST_INSERT_HEAD(&qp->tx_buffers, cb, link); 114 return (cb); 115 } 116 117 static struct nvmf_tcp_command_buffer * 118 tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 119 bool receive) 120 { 121 struct nvmf_tcp_command_buffer_list *list; 122 struct nvmf_tcp_command_buffer *cb; 123 124 list = receive ? &qp->rx_buffers : &qp->tx_buffers; 125 LIST_FOREACH(cb, list, link) { 126 if (cb->cid == cid && cb->ttag == ttag) 127 return (cb); 128 } 129 return (NULL); 130 } 131 132 static void 133 tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 134 bool receive) 135 { 136 struct nvmf_tcp_command_buffer *cb; 137 138 cb = tcp_find_command_buffer(qp, cid, ttag, receive); 139 if (cb != NULL) 140 LIST_REMOVE(cb, link); 141 } 142 143 static void 144 tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb) 145 { 146 LIST_REMOVE(cb, link); 147 free(cb); 148 } 149 150 static int 151 nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len) 152 { 153 ssize_t nwritten; 154 const char *cp; 155 156 cp = pdu; 157 while (len != 0) { 158 nwritten = write(qp->s, cp, len); 159 if (nwritten < 0) 160 return (errno); 161 len -= nwritten; 162 cp += nwritten; 163 } 164 return (0); 165 } 166 167 static int 168 nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov, 169 u_int iovcnt, size_t len) 170 { 171 ssize_t nwritten; 172 173 for (;;) { 174 nwritten = writev(qp->s, iov, iovcnt); 175 if (nwritten < 0) 176 return (errno); 177 178 len -= nwritten; 179 if (len == 0) 180 return (0); 181 182 while (iov->iov_len <= (size_t)nwritten) { 183 nwritten -= iov->iov_len; 184 iovcnt--; 185 iov++; 186 } 187 188 iov->iov_base = (char *)iov->iov_base + nwritten; 189 iov->iov_len -= nwritten; 190 } 191 } 192 193 static void 194 nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 195 uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen) 196 { 197 struct nvme_tcp_term_req_hdr hdr; 198 struct iovec iov[2]; 199 200 if (hlen != 0) { 201 if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) 202 hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 203 if (hlen > pdu_len) 204 hlen = pdu_len; 205 } 206 207 memset(&hdr, 0, sizeof(hdr)); 208 hdr.common.pdu_type = na->na_controller ? 209 NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 210 hdr.common.hlen = sizeof(hdr); 211 hdr.common.plen = sizeof(hdr) + hlen; 212 hdr.fes = htole16(fes); 213 le32enc(hdr.fei, fei); 214 iov[0].iov_base = &hdr; 215 iov[0].iov_len = sizeof(hdr); 216 iov[1].iov_base = __DECONST(void *, rx_pdu); 217 iov[1].iov_len = hlen; 218 219 (void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen); 220 close(qp->s); 221 qp->s = -1; 222 } 223 224 static int 225 nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu, 226 size_t pdu_len) 227 { 228 const struct nvme_tcp_common_pdu_hdr *ch; 229 uint32_t data_len, fei, plen; 230 uint32_t digest, rx_digest; 231 u_int hlen; 232 int error; 233 uint16_t fes; 234 235 /* Determine how large of a PDU header to return for errors. */ 236 ch = pdu->hdr; 237 hlen = ch->hlen; 238 plen = le32toh(ch->plen); 239 if (hlen < sizeof(*ch) || hlen > plen) 240 hlen = sizeof(*ch); 241 242 error = nvmf_tcp_validate_pdu_header(ch, 243 qp->qp.nq_association->na_controller, qp->header_digests, 244 qp->data_digests, qp->rxpda, &data_len, &fes, &fei); 245 if (error != 0) { 246 if (error == ECONNRESET) { 247 close(qp->s); 248 qp->s = -1; 249 } else { 250 nvmf_tcp_report_error(qp->qp.nq_association, qp, 251 fes, fei, ch, pdu_len, hlen); 252 } 253 return (error); 254 } 255 256 /* Check header digest if present. */ 257 if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) { 258 digest = compute_digest(ch, ch->hlen); 259 memcpy(&rx_digest, (const char *)ch + ch->hlen, 260 sizeof(rx_digest)); 261 if (digest != rx_digest) { 262 printf("NVMe/TCP: Header digest mismatch\n"); 263 nvmf_tcp_report_error(qp->qp.nq_association, qp, 264 NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch, 265 pdu_len, hlen); 266 return (EBADMSG); 267 } 268 } 269 270 /* Check data digest if present. */ 271 if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) { 272 digest = compute_digest((const char *)ch + ch->pdo, data_len); 273 memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest), 274 sizeof(rx_digest)); 275 if (digest != rx_digest) { 276 printf("NVMe/TCP: Data digest mismatch\n"); 277 return (EBADMSG); 278 } 279 } 280 281 pdu->data_len = data_len; 282 return (0); 283 } 284 285 /* 286 * Read data from a socket, retrying until the data has been fully 287 * read or an error occurs. 288 */ 289 static int 290 nvmf_tcp_read_buffer(int s, void *buf, size_t len) 291 { 292 ssize_t nread; 293 char *cp; 294 295 cp = buf; 296 while (len != 0) { 297 nread = read(s, cp, len); 298 if (nread < 0) 299 return (errno); 300 if (nread == 0) 301 return (ECONNRESET); 302 len -= nread; 303 cp += nread; 304 } 305 return (0); 306 } 307 308 static int 309 nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 310 { 311 struct nvme_tcp_common_pdu_hdr ch; 312 uint32_t plen; 313 int error; 314 315 memset(pdu, 0, sizeof(*pdu)); 316 error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch)); 317 if (error != 0) 318 return (error); 319 320 plen = le32toh(ch.plen); 321 322 /* 323 * Validate a header with garbage lengths to trigger 324 * an error message without reading more. 325 */ 326 if (plen < sizeof(ch) || ch.hlen > plen) { 327 pdu->hdr = &ch; 328 error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch)); 329 pdu->hdr = NULL; 330 assert(error != 0); 331 return (error); 332 } 333 334 /* Read the rest of the PDU. */ 335 pdu->hdr = malloc(plen); 336 memcpy(pdu->hdr, &ch, sizeof(ch)); 337 error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch)); 338 if (error != 0) 339 return (error); 340 error = nvmf_tcp_validate_pdu(qp, pdu, plen); 341 if (error != 0) { 342 free(pdu->hdr); 343 pdu->hdr = NULL; 344 } 345 return (error); 346 } 347 348 static void 349 nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu) 350 { 351 free(pdu->hdr); 352 pdu->hdr = NULL; 353 } 354 355 static int 356 nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu) 357 { 358 struct nvme_tcp_term_req_hdr *hdr; 359 360 hdr = (void *)pdu->hdr; 361 362 printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n", 363 le16toh(hdr->fes), le32dec(hdr->fei)); 364 nvmf_tcp_free_pdu(pdu); 365 return (ECONNRESET); 366 } 367 368 static int 369 nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp, 370 struct nvmf_tcp_rxpdu *pdu) 371 { 372 struct nvme_tcp_cmd *cmd; 373 struct nvmf_capsule *nc; 374 struct nvmf_tcp_capsule *tc; 375 376 cmd = (void *)pdu->hdr; 377 378 nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe); 379 if (nc == NULL) 380 return (ENOMEM); 381 382 tc = TCAP(nc); 383 tc->rx_pdu = *pdu; 384 385 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 386 return (0); 387 } 388 389 static int 390 nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp, 391 struct nvmf_tcp_rxpdu *pdu) 392 { 393 struct nvme_tcp_rsp *rsp; 394 struct nvmf_capsule *nc; 395 struct nvmf_tcp_capsule *tc; 396 397 rsp = (void *)pdu->hdr; 398 399 nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe); 400 if (nc == NULL) 401 return (ENOMEM); 402 403 nc->nc_sqhd_valid = true; 404 tc = TCAP(nc); 405 tc->rx_pdu = *pdu; 406 407 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 408 409 /* 410 * Once the CQE has been received, no further transfers to the 411 * command buffer for the associated CID can occur. 412 */ 413 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true); 414 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false); 415 416 return (0); 417 } 418 419 /* 420 * Construct and send a PDU that contains an optional data payload. 421 * This includes dealing with digests and the length fields in the 422 * common header. 423 */ 424 static int 425 nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen, 426 void *data, uint32_t data_len) 427 { 428 struct nvme_tcp_common_pdu_hdr *ch; 429 struct iovec iov[5]; 430 u_int iovcnt; 431 uint32_t header_digest, data_digest, pad, pdo, plen; 432 433 plen = hlen; 434 if (qp->header_digests) 435 plen += sizeof(header_digest); 436 if (data_len != 0) { 437 pdo = roundup(plen, qp->txpda); 438 pad = pdo - plen; 439 plen = pdo + data_len; 440 if (qp->data_digests) 441 plen += sizeof(data_digest); 442 } else { 443 assert(data == NULL); 444 pdo = 0; 445 pad = 0; 446 } 447 448 ch = hdr; 449 ch->hlen = hlen; 450 if (qp->header_digests) 451 ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF; 452 if (qp->data_digests && data_len != 0) 453 ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF; 454 ch->pdo = pdo; 455 ch->plen = htole32(plen); 456 457 /* CH + PSH */ 458 iov[0].iov_base = hdr; 459 iov[0].iov_len = hlen; 460 iovcnt = 1; 461 462 /* HDGST */ 463 if (qp->header_digests) { 464 header_digest = compute_digest(hdr, hlen); 465 iov[iovcnt].iov_base = &header_digest; 466 iov[iovcnt].iov_len = sizeof(header_digest); 467 iovcnt++; 468 } 469 470 if (pad != 0) { 471 /* PAD */ 472 iov[iovcnt].iov_base = __DECONST(char *, zero_padding); 473 iov[iovcnt].iov_len = pad; 474 iovcnt++; 475 } 476 477 if (data_len != 0) { 478 /* DATA */ 479 iov[iovcnt].iov_base = data; 480 iov[iovcnt].iov_len = data_len; 481 iovcnt++; 482 483 /* DDGST */ 484 if (qp->data_digests) { 485 data_digest = compute_digest(data, data_len); 486 iov[iovcnt].iov_base = &data_digest; 487 iov[iovcnt].iov_len = sizeof(data_digest); 488 iovcnt++; 489 } 490 } 491 492 return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen)); 493 } 494 495 static int 496 nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 497 { 498 struct nvme_tcp_h2c_data_hdr *h2c; 499 struct nvmf_tcp_command_buffer *cb; 500 uint32_t data_len, data_offset; 501 const char *icd; 502 503 h2c = (void *)pdu->hdr; 504 if (le32toh(h2c->datal) > qp->maxh2cdata) { 505 nvmf_tcp_report_error(qp->qp.nq_association, qp, 506 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0, 507 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 508 nvmf_tcp_free_pdu(pdu); 509 return (EBADMSG); 510 } 511 512 cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true); 513 if (cb == NULL) { 514 nvmf_tcp_report_error(qp->qp.nq_association, qp, 515 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 516 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr, 517 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 518 nvmf_tcp_free_pdu(pdu); 519 return (EBADMSG); 520 } 521 522 data_len = le32toh(h2c->datal); 523 if (data_len != pdu->data_len) { 524 nvmf_tcp_report_error(qp->qp.nq_association, qp, 525 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 526 offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr, 527 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 528 nvmf_tcp_free_pdu(pdu); 529 return (EBADMSG); 530 } 531 532 data_offset = le32toh(h2c->datao); 533 if (data_offset < cb->data_offset || 534 data_offset + data_len > cb->data_offset + cb->data_len) { 535 nvmf_tcp_report_error(qp->qp.nq_association, qp, 536 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 537 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 538 nvmf_tcp_free_pdu(pdu); 539 return (EBADMSG); 540 } 541 542 if (data_offset != cb->data_offset + cb->data_xfered) { 543 nvmf_tcp_report_error(qp->qp.nq_association, qp, 544 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 545 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 546 nvmf_tcp_free_pdu(pdu); 547 return (EBADMSG); 548 } 549 550 if ((cb->data_xfered + data_len == cb->data_len) != 551 ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) { 552 nvmf_tcp_report_error(qp->qp.nq_association, qp, 553 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 554 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 555 nvmf_tcp_free_pdu(pdu); 556 return (EBADMSG); 557 } 558 559 cb->data_xfered += data_len; 560 data_offset -= cb->data_offset; 561 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 562 memcpy((char *)cb->data + data_offset, icd, data_len); 563 564 nvmf_tcp_free_pdu(pdu); 565 return (0); 566 } 567 568 static int 569 nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 570 { 571 struct nvme_tcp_c2h_data_hdr *c2h; 572 struct nvmf_tcp_command_buffer *cb; 573 uint32_t data_len, data_offset; 574 const char *icd; 575 576 c2h = (void *)pdu->hdr; 577 578 cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true); 579 if (cb == NULL) { 580 /* 581 * XXX: Could be PDU sequence error if cccid is for a 582 * command that doesn't use a command buffer. 583 */ 584 nvmf_tcp_report_error(qp->qp.nq_association, qp, 585 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 586 offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr, 587 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 588 nvmf_tcp_free_pdu(pdu); 589 return (EBADMSG); 590 } 591 592 data_len = le32toh(c2h->datal); 593 if (data_len != pdu->data_len) { 594 nvmf_tcp_report_error(qp->qp.nq_association, qp, 595 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 596 offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr, 597 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 598 nvmf_tcp_free_pdu(pdu); 599 return (EBADMSG); 600 } 601 602 data_offset = le32toh(c2h->datao); 603 if (data_offset < cb->data_offset || 604 data_offset + data_len > cb->data_offset + cb->data_len) { 605 nvmf_tcp_report_error(qp->qp.nq_association, qp, 606 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 607 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 608 nvmf_tcp_free_pdu(pdu); 609 return (EBADMSG); 610 } 611 612 if (data_offset != cb->data_offset + cb->data_xfered) { 613 nvmf_tcp_report_error(qp->qp.nq_association, qp, 614 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 615 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 616 nvmf_tcp_free_pdu(pdu); 617 return (EBADMSG); 618 } 619 620 if ((cb->data_xfered + data_len == cb->data_len) != 621 ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) { 622 nvmf_tcp_report_error(qp->qp.nq_association, qp, 623 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 624 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 625 nvmf_tcp_free_pdu(pdu); 626 return (EBADMSG); 627 } 628 629 cb->data_xfered += data_len; 630 data_offset -= cb->data_offset; 631 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 632 memcpy((char *)cb->data + data_offset, icd, data_len); 633 634 if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) { 635 struct nvme_completion cqe; 636 struct nvmf_tcp_capsule *tc; 637 struct nvmf_capsule *nc; 638 639 memset(&cqe, 0, sizeof(cqe)); 640 cqe.cid = cb->cid; 641 642 nc = nvmf_allocate_response(&qp->qp, &cqe); 643 if (nc == NULL) { 644 nvmf_tcp_free_pdu(pdu); 645 return (ENOMEM); 646 } 647 nc->nc_sqhd_valid = false; 648 649 tc = TCAP(nc); 650 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 651 } 652 653 nvmf_tcp_free_pdu(pdu); 654 return (0); 655 } 656 657 /* NB: cid and ttag and little-endian already. */ 658 static int 659 tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 660 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 661 { 662 struct nvme_tcp_h2c_data_hdr h2c; 663 664 memset(&h2c, 0, sizeof(h2c)); 665 h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA; 666 if (last_pdu) 667 h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 668 h2c.cccid = cid; 669 h2c.ttag = ttag; 670 h2c.datao = htole32(data_offset); 671 h2c.datal = htole32(len); 672 673 return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len)); 674 } 675 676 /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */ 677 static int 678 tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 679 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 680 { 681 char *p; 682 683 p = buf; 684 while (len != 0) { 685 size_t todo; 686 int error; 687 688 todo = len; 689 if (todo > qp->maxh2cdata) 690 todo = qp->maxh2cdata; 691 error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo, 692 last_pdu && todo == len); 693 if (error != 0) 694 return (error); 695 p += todo; 696 len -= todo; 697 } 698 return (0); 699 } 700 701 static int 702 nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 703 { 704 struct nvmf_tcp_command_buffer *cb; 705 struct nvme_tcp_r2t_hdr *r2t; 706 uint32_t data_len, data_offset; 707 int error; 708 709 r2t = (void *)pdu->hdr; 710 711 cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false); 712 if (cb == NULL) { 713 nvmf_tcp_report_error(qp->qp.nq_association, qp, 714 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 715 offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr, 716 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 717 nvmf_tcp_free_pdu(pdu); 718 return (EBADMSG); 719 } 720 721 data_offset = le32toh(r2t->r2to); 722 if (data_offset != cb->data_xfered) { 723 nvmf_tcp_report_error(qp->qp.nq_association, qp, 724 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 725 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 726 nvmf_tcp_free_pdu(pdu); 727 return (EBADMSG); 728 } 729 730 /* 731 * XXX: The spec does not specify how to handle R2T tranfers 732 * out of range of the original command. 733 */ 734 data_len = le32toh(r2t->r2tl); 735 if (data_offset + data_len > cb->data_len) { 736 nvmf_tcp_report_error(qp->qp.nq_association, qp, 737 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 738 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 739 nvmf_tcp_free_pdu(pdu); 740 return (EBADMSG); 741 } 742 743 cb->data_xfered += data_len; 744 745 /* 746 * Write out one or more H2C_DATA PDUs containing the 747 * requested data. 748 */ 749 error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag, 750 data_offset, (char *)cb->data + data_offset, data_len, true); 751 752 nvmf_tcp_free_pdu(pdu); 753 return (error); 754 } 755 756 static int 757 nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp) 758 { 759 struct nvmf_tcp_rxpdu pdu; 760 int error; 761 762 error = nvmf_tcp_read_pdu(qp, &pdu); 763 if (error != 0) 764 return (error); 765 766 switch (pdu.hdr->pdu_type) { 767 default: 768 __unreachable(); 769 break; 770 case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 771 case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 772 return (nvmf_tcp_handle_term_req(&pdu)); 773 case NVME_TCP_PDU_TYPE_CAPSULE_CMD: 774 return (nvmf_tcp_save_command_capsule(qp, &pdu)); 775 case NVME_TCP_PDU_TYPE_CAPSULE_RESP: 776 return (nvmf_tcp_save_response_capsule(qp, &pdu)); 777 case NVME_TCP_PDU_TYPE_H2C_DATA: 778 return (nvmf_tcp_handle_h2c_data(qp, &pdu)); 779 case NVME_TCP_PDU_TYPE_C2H_DATA: 780 return (nvmf_tcp_handle_c2h_data(qp, &pdu)); 781 case NVME_TCP_PDU_TYPE_R2T: 782 return (nvmf_tcp_handle_r2t(qp, &pdu)); 783 } 784 } 785 786 static bool 787 nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 788 const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len) 789 { 790 const struct nvme_tcp_ic_req *pdu; 791 uint32_t plen; 792 u_int hlen; 793 794 /* Determine how large of a PDU header to return for errors. */ 795 hlen = ch->hlen; 796 plen = le32toh(ch->plen); 797 if (hlen < sizeof(*ch) || hlen > plen) 798 hlen = sizeof(*ch); 799 800 /* 801 * Errors must be reported for the lowest incorrect field 802 * first, so validate fields in order. 803 */ 804 805 /* Validate pdu_type. */ 806 807 /* Controllers only receive PDUs with a PDU direction of 0. */ 808 if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) { 809 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 810 nvmf_tcp_report_error(na, qp, 811 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 812 hlen); 813 return (false); 814 } 815 816 switch (ch->pdu_type) { 817 case NVME_TCP_PDU_TYPE_IC_REQ: 818 case NVME_TCP_PDU_TYPE_IC_RESP: 819 break; 820 default: 821 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 822 nvmf_tcp_report_error(na, qp, 823 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 824 hlen); 825 return (false); 826 } 827 828 /* Validate flags. */ 829 if (ch->flags != 0) { 830 na_error(na, "NVMe/TCP: Invalid PDU header flags %#x", 831 ch->flags); 832 nvmf_tcp_report_error(na, qp, 833 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len, 834 hlen); 835 return (false); 836 } 837 838 /* Validate hlen. */ 839 if (ch->hlen != 128) { 840 na_error(na, "NVMe/TCP: Invalid PDU header length %u", 841 ch->hlen); 842 nvmf_tcp_report_error(na, qp, 843 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len, 844 hlen); 845 return (false); 846 } 847 848 /* Validate pdo. */ 849 if (ch->pdo != 0) { 850 na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo); 851 nvmf_tcp_report_error(na, qp, 852 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len, 853 hlen); 854 return (false); 855 } 856 857 /* Validate plen. */ 858 if (plen != 128) { 859 na_error(na, "NVMe/TCP: Invalid PDU length %u", plen); 860 nvmf_tcp_report_error(na, qp, 861 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len, 862 hlen); 863 return (false); 864 } 865 866 /* Validate fields common to both ICReq and ICResp. */ 867 pdu = (const struct nvme_tcp_ic_req *)ch; 868 if (le16toh(pdu->pfv) != 0) { 869 na_error(na, "NVMe/TCP: Unsupported PDU version %u", 870 le16toh(pdu->pfv)); 871 nvmf_tcp_report_error(na, qp, 872 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 873 8, ch, pdu_len, hlen); 874 return (false); 875 } 876 877 if (pdu->hpda > NVME_TCP_HPDA_MAX) { 878 na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda); 879 nvmf_tcp_report_error(na, qp, 880 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len, 881 hlen); 882 return (false); 883 } 884 885 if (pdu->dgst.bits.reserved != 0) { 886 na_error(na, "NVMe/TCP: Invalid digest settings"); 887 nvmf_tcp_report_error(na, qp, 888 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len, 889 hlen); 890 return (false); 891 } 892 893 return (true); 894 } 895 896 static bool 897 nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 898 struct nvme_tcp_ic_req *pdu) 899 { 900 int error; 901 902 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 903 if (error != 0) { 904 na_error(na, "NVMe/TCP: Failed to read IC request: %s", 905 strerror(error)); 906 return (false); 907 } 908 909 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 910 } 911 912 static bool 913 nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 914 struct nvme_tcp_ic_resp *pdu) 915 { 916 int error; 917 918 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 919 if (error != 0) { 920 na_error(na, "NVMe/TCP: Failed to read IC response: %s", 921 strerror(error)); 922 return (false); 923 } 924 925 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 926 } 927 928 static struct nvmf_association * 929 tcp_allocate_association(bool controller, 930 const struct nvmf_association_params *params) 931 { 932 struct nvmf_tcp_association *ta; 933 934 if (controller) { 935 /* 7.4.10.3 */ 936 if (params->tcp.maxh2cdata < 4096 || 937 params->tcp.maxh2cdata % 4 != 0) 938 return (NULL); 939 } 940 941 ta = calloc(1, sizeof(*ta)); 942 943 return (&ta->na); 944 } 945 946 static void 947 tcp_update_association(struct nvmf_association *na, 948 const struct nvme_controller_data *cdata) 949 { 950 struct nvmf_tcp_association *ta = TASSOC(na); 951 952 ta->ioccsz = le32toh(cdata->ioccsz); 953 } 954 955 static void 956 tcp_free_association(struct nvmf_association *na) 957 { 958 free(na); 959 } 960 961 static bool 962 tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin) 963 { 964 const struct nvmf_association_params *params = &na->na_params; 965 struct nvmf_tcp_association *ta = TASSOC(na); 966 struct nvme_tcp_ic_req ic_req; 967 struct nvme_tcp_ic_resp ic_resp; 968 uint32_t maxh2cdata; 969 int error; 970 971 if (!admin) { 972 if (ta->ioccsz == 0) { 973 na_error(na, "TCP I/O queues require cdata"); 974 return (false); 975 } 976 if (ta->ioccsz < 4) { 977 na_error(na, "Invalid IOCCSZ %u", ta->ioccsz); 978 return (false); 979 } 980 } 981 982 memset(&ic_req, 0, sizeof(ic_req)); 983 ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ; 984 ic_req.common.hlen = sizeof(ic_req); 985 ic_req.common.plen = htole32(sizeof(ic_req)); 986 ic_req.pfv = htole16(0); 987 ic_req.hpda = params->tcp.pda; 988 if (params->tcp.header_digests) 989 ic_req.dgst.bits.hdgst_enable = 1; 990 if (params->tcp.data_digests) 991 ic_req.dgst.bits.ddgst_enable = 1; 992 ic_req.maxr2t = htole32(params->tcp.maxr2t); 993 994 error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req)); 995 if (error != 0) { 996 na_error(na, "Failed to write IC request: %s", strerror(error)); 997 return (false); 998 } 999 1000 if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp)) 1001 return (false); 1002 1003 /* Ensure the controller didn't enable digests we didn't request. */ 1004 if ((!params->tcp.header_digests && 1005 ic_resp.dgst.bits.hdgst_enable != 0) || 1006 (!params->tcp.data_digests && 1007 ic_resp.dgst.bits.ddgst_enable != 0)) { 1008 na_error(na, "Controller enabled unrequested digests"); 1009 nvmf_tcp_report_error(na, qp, 1010 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 1011 11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp)); 1012 return (false); 1013 } 1014 1015 /* 1016 * XXX: Is there an upper-bound to enforce here? Perhaps pick 1017 * some large value and report larger values as an unsupported 1018 * parameter? 1019 */ 1020 maxh2cdata = le32toh(ic_resp.maxh2cdata); 1021 if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) { 1022 na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata); 1023 nvmf_tcp_report_error(na, qp, 1024 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp, 1025 sizeof(ic_resp), sizeof(ic_resp)); 1026 return (false); 1027 } 1028 1029 qp->rxpda = (params->tcp.pda + 1) * 4; 1030 qp->txpda = (ic_resp.cpda + 1) * 4; 1031 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1032 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1033 qp->maxr2t = params->tcp.maxr2t; 1034 qp->maxh2cdata = maxh2cdata; 1035 if (admin) 1036 /* 7.4.3 */ 1037 qp->max_icd = 8192; 1038 else 1039 qp->max_icd = (ta->ioccsz - 4) * 16; 1040 1041 return (0); 1042 } 1043 1044 static bool 1045 tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na) 1046 { 1047 const struct nvmf_association_params *params = &na->na_params; 1048 struct nvme_tcp_ic_req ic_req; 1049 struct nvme_tcp_ic_resp ic_resp; 1050 int error; 1051 1052 if (!nvmf_tcp_read_ic_req(na, qp, &ic_req)) 1053 return (false); 1054 1055 memset(&ic_resp, 0, sizeof(ic_resp)); 1056 ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP; 1057 ic_resp.common.hlen = sizeof(ic_req); 1058 ic_resp.common.plen = htole32(sizeof(ic_req)); 1059 ic_resp.pfv = htole16(0); 1060 ic_resp.cpda = params->tcp.pda; 1061 if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0) 1062 ic_resp.dgst.bits.hdgst_enable = 1; 1063 if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0) 1064 ic_resp.dgst.bits.ddgst_enable = 1; 1065 ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata); 1066 1067 error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp)); 1068 if (error != 0) { 1069 na_error(na, "Failed to write IC response: %s", 1070 strerror(error)); 1071 return (false); 1072 } 1073 1074 qp->rxpda = (params->tcp.pda + 1) * 4; 1075 qp->txpda = (ic_req.hpda + 1) * 4; 1076 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1077 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1078 qp->maxr2t = le32toh(ic_req.maxr2t); 1079 qp->maxh2cdata = params->tcp.maxh2cdata; 1080 qp->max_icd = 0; /* XXX */ 1081 return (0); 1082 } 1083 1084 static struct nvmf_qpair * 1085 tcp_allocate_qpair(struct nvmf_association *na, 1086 const struct nvmf_qpair_params *qparams) 1087 { 1088 const struct nvmf_association_params *aparams = &na->na_params; 1089 struct nvmf_tcp_qpair *qp; 1090 int error; 1091 1092 if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) { 1093 na_error(na, "Invalid PDA"); 1094 return (NULL); 1095 } 1096 1097 qp = calloc(1, sizeof(*qp)); 1098 qp->s = qparams->tcp.fd; 1099 LIST_INIT(&qp->rx_buffers); 1100 LIST_INIT(&qp->tx_buffers); 1101 TAILQ_INIT(&qp->rx_capsules); 1102 if (na->na_controller) 1103 error = tcp_accept(qp, na); 1104 else 1105 error = tcp_connect(qp, na, qparams->admin); 1106 if (error != 0) { 1107 free(qp); 1108 return (NULL); 1109 } 1110 1111 return (&qp->qp); 1112 } 1113 1114 static void 1115 tcp_free_qpair(struct nvmf_qpair *nq) 1116 { 1117 struct nvmf_tcp_qpair *qp = TQP(nq); 1118 struct nvmf_tcp_capsule *ntc, *tc; 1119 struct nvmf_tcp_command_buffer *ncb, *cb; 1120 1121 TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) { 1122 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1123 nvmf_free_capsule(&tc->nc); 1124 } 1125 LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) { 1126 tcp_free_command_buffer(cb); 1127 } 1128 LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) { 1129 tcp_free_command_buffer(cb); 1130 } 1131 free(qp); 1132 } 1133 1134 static void 1135 tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl) 1136 { 1137 struct nvmf_tcp_qpair *qp = TQP(nq); 1138 1139 nvlist_add_number(nvl, "fd", qp->s); 1140 nvlist_add_number(nvl, "rxpda", qp->rxpda); 1141 nvlist_add_number(nvl, "txpda", qp->txpda); 1142 nvlist_add_bool(nvl, "header_digests", qp->header_digests); 1143 nvlist_add_bool(nvl, "data_digests", qp->data_digests); 1144 nvlist_add_number(nvl, "maxr2t", qp->maxr2t); 1145 nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata); 1146 nvlist_add_number(nvl, "max_icd", qp->max_icd); 1147 } 1148 1149 static int 1150 tcp_populate_dle(struct nvmf_qpair *nq, struct nvme_discovery_log_entry *dle) 1151 { 1152 struct nvmf_tcp_qpair *qp = TQP(nq); 1153 struct sockaddr_storage ss; 1154 socklen_t ss_len; 1155 1156 ss_len = sizeof(ss); 1157 if (getpeername(qp->s, (struct sockaddr *)&ss, &ss_len) == -1) 1158 return (errno); 1159 1160 if (getnameinfo((struct sockaddr *)&ss, ss_len, dle->traddr, 1161 sizeof(dle->traddr), dle->trsvcid, sizeof(dle->trsvcid), 1162 NI_NUMERICHOST | NI_NUMERICSERV) != 0) 1163 return (EINVAL); 1164 1165 return (0); 1166 } 1167 1168 static struct nvmf_capsule * 1169 tcp_allocate_capsule(struct nvmf_qpair *qp __unused) 1170 { 1171 struct nvmf_tcp_capsule *nc; 1172 1173 nc = calloc(1, sizeof(*nc)); 1174 return (&nc->nc); 1175 } 1176 1177 static void 1178 tcp_free_capsule(struct nvmf_capsule *nc) 1179 { 1180 struct nvmf_tcp_capsule *tc = TCAP(nc); 1181 1182 nvmf_tcp_free_pdu(&tc->rx_pdu); 1183 if (tc->cb != NULL) 1184 tcp_free_command_buffer(tc->cb); 1185 free(tc); 1186 } 1187 1188 static int 1189 tcp_transmit_command(struct nvmf_capsule *nc) 1190 { 1191 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1192 struct nvmf_tcp_capsule *tc = TCAP(nc); 1193 struct nvme_tcp_cmd cmd; 1194 struct nvme_sgl_descriptor *sgl; 1195 int error; 1196 bool use_icd; 1197 1198 use_icd = false; 1199 if (nc->nc_data_len != 0 && nc->nc_send_data && 1200 nc->nc_data_len <= qp->max_icd) 1201 use_icd = true; 1202 1203 memset(&cmd, 0, sizeof(cmd)); 1204 cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD; 1205 cmd.ccsqe = nc->nc_sqe; 1206 1207 /* Populate SGL in SQE. */ 1208 sgl = &cmd.ccsqe.sgl; 1209 memset(sgl, 0, sizeof(*sgl)); 1210 sgl->address = 0; 1211 sgl->length = htole32(nc->nc_data_len); 1212 if (use_icd) { 1213 /* Use in-capsule data. */ 1214 sgl->type = NVME_SGL_TYPE_ICD; 1215 } else { 1216 /* Use a command buffer. */ 1217 sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER; 1218 } 1219 1220 /* Send command capsule. */ 1221 error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ? 1222 nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0); 1223 if (error != 0) 1224 return (error); 1225 1226 /* 1227 * If data will be transferred using a command buffer, allocate a 1228 * buffer structure and queue it. 1229 */ 1230 if (nc->nc_data_len != 0 && !use_icd) 1231 tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0, 1232 nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data); 1233 1234 return (0); 1235 } 1236 1237 static int 1238 tcp_transmit_response(struct nvmf_capsule *nc) 1239 { 1240 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1241 struct nvme_tcp_rsp rsp; 1242 1243 memset(&rsp, 0, sizeof(rsp)); 1244 rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1245 rsp.rccqe = nc->nc_cqe; 1246 1247 return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0)); 1248 } 1249 1250 static int 1251 tcp_transmit_capsule(struct nvmf_capsule *nc) 1252 { 1253 if (nc->nc_qe_len == sizeof(struct nvme_command)) 1254 return (tcp_transmit_command(nc)); 1255 else 1256 return (tcp_transmit_response(nc)); 1257 } 1258 1259 static int 1260 tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp) 1261 { 1262 struct nvmf_tcp_qpair *qp = TQP(nq); 1263 struct nvmf_tcp_capsule *tc; 1264 int error; 1265 1266 while (TAILQ_EMPTY(&qp->rx_capsules)) { 1267 error = nvmf_tcp_receive_pdu(qp); 1268 if (error != 0) 1269 return (error); 1270 } 1271 tc = TAILQ_FIRST(&qp->rx_capsules); 1272 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1273 *ncp = &tc->nc; 1274 return (0); 1275 } 1276 1277 static uint8_t 1278 tcp_validate_command_capsule(const struct nvmf_capsule *nc) 1279 { 1280 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1281 const struct nvme_sgl_descriptor *sgl; 1282 1283 assert(tc->rx_pdu.hdr != NULL); 1284 1285 sgl = &nc->nc_sqe.sgl; 1286 switch (sgl->type) { 1287 case NVME_SGL_TYPE_ICD: 1288 if (tc->rx_pdu.data_len != le32toh(sgl->length)) { 1289 printf("NVMe/TCP: Command Capsule with mismatched ICD length\n"); 1290 return (NVME_SC_DATA_SGL_LENGTH_INVALID); 1291 } 1292 break; 1293 case NVME_SGL_TYPE_COMMAND_BUFFER: 1294 if (tc->rx_pdu.data_len != 0) { 1295 printf("NVMe/TCP: Command Buffer SGL with ICD\n"); 1296 return (NVME_SC_INVALID_FIELD); 1297 } 1298 break; 1299 default: 1300 printf("NVMe/TCP: Invalid SGL type in Command Capsule\n"); 1301 return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID); 1302 } 1303 1304 if (sgl->address != 0) { 1305 printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n"); 1306 return (NVME_SC_SGL_OFFSET_INVALID); 1307 } 1308 1309 return (NVME_SC_SUCCESS); 1310 } 1311 1312 static size_t 1313 tcp_capsule_data_len(const struct nvmf_capsule *nc) 1314 { 1315 assert(nc->nc_qe_len == sizeof(struct nvme_command)); 1316 return (le32toh(nc->nc_sqe.sgl.length)); 1317 } 1318 1319 /* NB: cid and ttag are both little-endian already. */ 1320 static int 1321 tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 1322 uint32_t data_offset, uint32_t data_len) 1323 { 1324 struct nvme_tcp_r2t_hdr r2t; 1325 1326 memset(&r2t, 0, sizeof(r2t)); 1327 r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T; 1328 r2t.cccid = cid; 1329 r2t.ttag = ttag; 1330 r2t.r2to = htole32(data_offset); 1331 r2t.r2tl = htole32(data_len); 1332 1333 return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0)); 1334 } 1335 1336 static int 1337 tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1338 void *buf, size_t len) 1339 { 1340 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1341 struct nvmf_tcp_command_buffer *cb; 1342 int error; 1343 uint16_t ttag; 1344 1345 /* 1346 * Don't bother byte-swapping ttag as it is just a cookie 1347 * value returned by the other end as-is. 1348 */ 1349 ttag = qp->next_ttag++; 1350 1351 error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len); 1352 if (error != 0) 1353 return (error); 1354 1355 cb = tcp_alloc_command_buffer(qp, buf, data_offset, len, 1356 nc->nc_sqe.cid, ttag, true); 1357 1358 /* Parse received PDUs until the data transfer is complete. */ 1359 while (cb->data_xfered < cb->data_len) { 1360 error = nvmf_tcp_receive_pdu(qp); 1361 if (error != 0) 1362 break; 1363 } 1364 tcp_free_command_buffer(cb); 1365 return (error); 1366 } 1367 1368 static int 1369 tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1370 void *buf, size_t len) 1371 { 1372 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1373 const char *icd; 1374 1375 icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset; 1376 memcpy(buf, icd, len); 1377 return (0); 1378 } 1379 1380 static int 1381 tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1382 void *buf, size_t len) 1383 { 1384 struct nvmf_association *na = nc->nc_qpair->nq_association; 1385 const struct nvme_sgl_descriptor *sgl; 1386 size_t data_len; 1387 1388 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1389 return (EINVAL); 1390 1391 sgl = &nc->nc_sqe.sgl; 1392 data_len = le32toh(sgl->length); 1393 if (data_offset + len > data_len) 1394 return (EFBIG); 1395 1396 if (sgl->type == NVME_SGL_TYPE_ICD) 1397 return (tcp_receive_icd_data(nc, data_offset, buf, len)); 1398 else 1399 return (tcp_receive_r2t_data(nc, data_offset, buf, len)); 1400 } 1401 1402 /* NB: cid is little-endian already. */ 1403 static int 1404 tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, 1405 uint32_t data_offset, const void *buf, size_t len, bool last_pdu, 1406 bool success) 1407 { 1408 struct nvme_tcp_c2h_data_hdr c2h; 1409 1410 memset(&c2h, 0, sizeof(c2h)); 1411 c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA; 1412 if (last_pdu) 1413 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 1414 if (success) 1415 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 1416 c2h.cccid = cid; 1417 c2h.datao = htole32(data_offset); 1418 c2h.datal = htole32(len); 1419 1420 return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h), 1421 __DECONST(void *, buf), len)); 1422 } 1423 1424 static int 1425 tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf, 1426 size_t len) 1427 { 1428 struct nvmf_association *na = nc->nc_qpair->nq_association; 1429 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1430 const struct nvme_sgl_descriptor *sgl; 1431 const char *src; 1432 size_t todo; 1433 uint32_t data_len, data_offset; 1434 int error; 1435 bool last_pdu, send_success_flag; 1436 1437 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1438 return (EINVAL); 1439 1440 sgl = &nc->nc_sqe.sgl; 1441 data_len = le32toh(sgl->length); 1442 if (len != data_len) { 1443 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1444 return (EFBIG); 1445 } 1446 1447 if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) { 1448 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1449 return (EINVAL); 1450 } 1451 1452 /* Use the SUCCESS flag if SQ flow control is disabled. */ 1453 send_success_flag = !qp->qp.nq_flow_control; 1454 1455 /* 1456 * Write out one or more C2H_DATA PDUs containing the data. 1457 * Each PDU is arbitrarily capped at 256k. 1458 */ 1459 data_offset = 0; 1460 src = buf; 1461 while (len > 0) { 1462 if (len > 256 * 1024) { 1463 todo = 256 * 1024; 1464 last_pdu = false; 1465 } else { 1466 todo = len; 1467 last_pdu = true; 1468 } 1469 error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, 1470 src, todo, last_pdu, last_pdu && send_success_flag); 1471 if (error != 0) { 1472 nvmf_send_generic_error(nc, 1473 NVME_SC_TRANSIENT_TRANSPORT_ERROR); 1474 return (error); 1475 } 1476 data_offset += todo; 1477 src += todo; 1478 len -= todo; 1479 } 1480 if (!send_success_flag) 1481 nvmf_send_success(nc); 1482 return (0); 1483 } 1484 1485 struct nvmf_transport_ops tcp_ops = { 1486 .allocate_association = tcp_allocate_association, 1487 .update_association = tcp_update_association, 1488 .free_association = tcp_free_association, 1489 .allocate_qpair = tcp_allocate_qpair, 1490 .free_qpair = tcp_free_qpair, 1491 .kernel_handoff_params = tcp_kernel_handoff_params, 1492 .populate_dle = tcp_populate_dle, 1493 .allocate_capsule = tcp_allocate_capsule, 1494 .free_capsule = tcp_free_capsule, 1495 .transmit_capsule = tcp_transmit_capsule, 1496 .receive_capsule = tcp_receive_capsule, 1497 .validate_command_capsule = tcp_validate_command_capsule, 1498 .capsule_data_len = tcp_capsule_data_len, 1499 .receive_controller_data = tcp_receive_controller_data, 1500 .send_controller_data = tcp_send_controller_data, 1501 }; 1502