1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/endian.h> 9 #include <sys/gsb_crc32.h> 10 #include <sys/queue.h> 11 #include <sys/uio.h> 12 #include <assert.h> 13 #include <errno.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include "libnvmf.h" 20 #include "internal.h" 21 #include "nvmf_tcp.h" 22 23 struct nvmf_tcp_qpair; 24 25 struct nvmf_tcp_command_buffer { 26 struct nvmf_tcp_qpair *qp; 27 28 void *data; 29 size_t data_len; 30 size_t data_xfered; 31 uint32_t data_offset; 32 33 uint16_t cid; 34 uint16_t ttag; 35 36 LIST_ENTRY(nvmf_tcp_command_buffer) link; 37 }; 38 39 LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer); 40 41 struct nvmf_tcp_association { 42 struct nvmf_association na; 43 44 uint32_t ioccsz; 45 }; 46 47 struct nvmf_tcp_rxpdu { 48 struct nvme_tcp_common_pdu_hdr *hdr; 49 uint32_t data_len; 50 }; 51 52 struct nvmf_tcp_capsule { 53 struct nvmf_capsule nc; 54 55 struct nvmf_tcp_rxpdu rx_pdu; 56 struct nvmf_tcp_command_buffer *cb; 57 58 TAILQ_ENTRY(nvmf_tcp_capsule) link; 59 }; 60 61 struct nvmf_tcp_qpair { 62 struct nvmf_qpair qp; 63 int s; 64 65 uint8_t txpda; 66 uint8_t rxpda; 67 bool header_digests; 68 bool data_digests; 69 uint32_t maxr2t; 70 uint32_t maxh2cdata; 71 uint32_t max_icd; /* Host only */ 72 uint16_t next_ttag; /* Controller only */ 73 74 struct nvmf_tcp_command_buffer_list tx_buffers; 75 struct nvmf_tcp_command_buffer_list rx_buffers; 76 TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules; 77 }; 78 79 #define TASSOC(nc) ((struct nvmf_tcp_association *)(na)) 80 #define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc)) 81 #define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc)) 82 #define TQP(qp) ((struct nvmf_tcp_qpair *)(qp)) 83 84 static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET]; 85 86 static uint32_t 87 compute_digest(const void *buf, size_t len) 88 { 89 return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff); 90 } 91 92 static struct nvmf_tcp_command_buffer * 93 tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data, 94 uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag, 95 bool receive) 96 { 97 struct nvmf_tcp_command_buffer *cb; 98 99 cb = malloc(sizeof(*cb)); 100 cb->qp = qp; 101 cb->data = data; 102 cb->data_offset = data_offset; 103 cb->data_len = data_len; 104 cb->data_xfered = 0; 105 cb->cid = cid; 106 cb->ttag = ttag; 107 108 if (receive) 109 LIST_INSERT_HEAD(&qp->rx_buffers, cb, link); 110 else 111 LIST_INSERT_HEAD(&qp->tx_buffers, cb, link); 112 return (cb); 113 } 114 115 static struct nvmf_tcp_command_buffer * 116 tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 117 bool receive) 118 { 119 struct nvmf_tcp_command_buffer_list *list; 120 struct nvmf_tcp_command_buffer *cb; 121 122 list = receive ? &qp->rx_buffers : &qp->tx_buffers; 123 LIST_FOREACH(cb, list, link) { 124 if (cb->cid == cid && cb->ttag == ttag) 125 return (cb); 126 } 127 return (NULL); 128 } 129 130 static void 131 tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 132 bool receive) 133 { 134 struct nvmf_tcp_command_buffer *cb; 135 136 cb = tcp_find_command_buffer(qp, cid, ttag, receive); 137 if (cb != NULL) 138 LIST_REMOVE(cb, link); 139 } 140 141 static void 142 tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb) 143 { 144 LIST_REMOVE(cb, link); 145 free(cb); 146 } 147 148 static int 149 nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len) 150 { 151 ssize_t nwritten; 152 const char *cp; 153 154 cp = pdu; 155 while (len != 0) { 156 nwritten = write(qp->s, cp, len); 157 if (nwritten < 0) 158 return (errno); 159 len -= nwritten; 160 cp += nwritten; 161 } 162 return (0); 163 } 164 165 static int 166 nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov, 167 u_int iovcnt, size_t len) 168 { 169 ssize_t nwritten; 170 171 for (;;) { 172 nwritten = writev(qp->s, iov, iovcnt); 173 if (nwritten < 0) 174 return (errno); 175 176 len -= nwritten; 177 if (len == 0) 178 return (0); 179 180 while (iov->iov_len <= (size_t)nwritten) { 181 nwritten -= iov->iov_len; 182 iovcnt--; 183 iov++; 184 } 185 186 iov->iov_base = (char *)iov->iov_base + nwritten; 187 iov->iov_len -= nwritten; 188 } 189 } 190 191 static void 192 nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 193 uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen) 194 { 195 struct nvme_tcp_term_req_hdr hdr; 196 struct iovec iov[2]; 197 198 if (hlen != 0) { 199 if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) 200 hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 201 if (hlen > pdu_len) 202 hlen = pdu_len; 203 } 204 205 memset(&hdr, 0, sizeof(hdr)); 206 hdr.common.pdu_type = na->na_controller ? 207 NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 208 hdr.common.hlen = sizeof(hdr); 209 hdr.common.plen = sizeof(hdr) + hlen; 210 hdr.fes = htole16(fes); 211 le32enc(hdr.fei, fei); 212 iov[0].iov_base = &hdr; 213 iov[0].iov_len = sizeof(hdr); 214 iov[1].iov_base = __DECONST(void *, rx_pdu); 215 iov[1].iov_len = hlen; 216 217 (void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen); 218 close(qp->s); 219 qp->s = -1; 220 } 221 222 static int 223 nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu, 224 size_t pdu_len) 225 { 226 const struct nvme_tcp_common_pdu_hdr *ch; 227 uint32_t data_len, fei, plen; 228 uint32_t digest, rx_digest; 229 u_int hlen; 230 int error; 231 uint16_t fes; 232 233 /* Determine how large of a PDU header to return for errors. */ 234 ch = pdu->hdr; 235 hlen = ch->hlen; 236 plen = le32toh(ch->plen); 237 if (hlen < sizeof(*ch) || hlen > plen) 238 hlen = sizeof(*ch); 239 240 error = nvmf_tcp_validate_pdu_header(ch, 241 qp->qp.nq_association->na_controller, qp->header_digests, 242 qp->data_digests, qp->rxpda, &data_len, &fes, &fei); 243 if (error != 0) { 244 if (error == ECONNRESET) { 245 close(qp->s); 246 qp->s = -1; 247 } else { 248 nvmf_tcp_report_error(qp->qp.nq_association, qp, 249 fes, fei, ch, pdu_len, hlen); 250 } 251 return (error); 252 } 253 254 /* Check header digest if present. */ 255 if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) { 256 digest = compute_digest(ch, ch->hlen); 257 memcpy(&rx_digest, (const char *)ch + ch->hlen, 258 sizeof(rx_digest)); 259 if (digest != rx_digest) { 260 printf("NVMe/TCP: Header digest mismatch\n"); 261 nvmf_tcp_report_error(qp->qp.nq_association, qp, 262 NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch, 263 pdu_len, hlen); 264 return (EBADMSG); 265 } 266 } 267 268 /* Check data digest if present. */ 269 if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) { 270 digest = compute_digest((const char *)ch + ch->pdo, data_len); 271 memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest), 272 sizeof(rx_digest)); 273 if (digest != rx_digest) { 274 printf("NVMe/TCP: Data digest mismatch\n"); 275 return (EBADMSG); 276 } 277 } 278 279 pdu->data_len = data_len; 280 return (0); 281 } 282 283 /* 284 * Read data from a socket, retrying until the data has been fully 285 * read or an error occurs. 286 */ 287 static int 288 nvmf_tcp_read_buffer(int s, void *buf, size_t len) 289 { 290 ssize_t nread; 291 char *cp; 292 293 cp = buf; 294 while (len != 0) { 295 nread = read(s, cp, len); 296 if (nread < 0) 297 return (errno); 298 if (nread == 0) 299 return (ECONNRESET); 300 len -= nread; 301 cp += nread; 302 } 303 return (0); 304 } 305 306 static int 307 nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 308 { 309 struct nvme_tcp_common_pdu_hdr ch; 310 uint32_t plen; 311 int error; 312 313 memset(pdu, 0, sizeof(*pdu)); 314 error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch)); 315 if (error != 0) 316 return (error); 317 318 plen = le32toh(ch.plen); 319 320 /* 321 * Validate a header with garbage lengths to trigger 322 * an error message without reading more. 323 */ 324 if (plen < sizeof(ch) || ch.hlen > plen) { 325 pdu->hdr = &ch; 326 error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch)); 327 pdu->hdr = NULL; 328 assert(error != 0); 329 return (error); 330 } 331 332 /* Read the rest of the PDU. */ 333 pdu->hdr = malloc(plen); 334 memcpy(pdu->hdr, &ch, sizeof(ch)); 335 error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch)); 336 if (error != 0) 337 return (error); 338 error = nvmf_tcp_validate_pdu(qp, pdu, plen); 339 if (error != 0) { 340 free(pdu->hdr); 341 pdu->hdr = NULL; 342 } 343 return (error); 344 } 345 346 static void 347 nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu) 348 { 349 free(pdu->hdr); 350 pdu->hdr = NULL; 351 } 352 353 static int 354 nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu) 355 { 356 struct nvme_tcp_term_req_hdr *hdr; 357 358 hdr = (void *)pdu->hdr; 359 360 printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n", 361 le16toh(hdr->fes), le32dec(hdr->fei)); 362 nvmf_tcp_free_pdu(pdu); 363 return (ECONNRESET); 364 } 365 366 static int 367 nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp, 368 struct nvmf_tcp_rxpdu *pdu) 369 { 370 struct nvme_tcp_cmd *cmd; 371 struct nvmf_capsule *nc; 372 struct nvmf_tcp_capsule *tc; 373 374 cmd = (void *)pdu->hdr; 375 376 nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe); 377 if (nc == NULL) 378 return (ENOMEM); 379 380 tc = TCAP(nc); 381 tc->rx_pdu = *pdu; 382 383 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 384 return (0); 385 } 386 387 static int 388 nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp, 389 struct nvmf_tcp_rxpdu *pdu) 390 { 391 struct nvme_tcp_rsp *rsp; 392 struct nvmf_capsule *nc; 393 struct nvmf_tcp_capsule *tc; 394 395 rsp = (void *)pdu->hdr; 396 397 nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe); 398 if (nc == NULL) 399 return (ENOMEM); 400 401 nc->nc_sqhd_valid = true; 402 tc = TCAP(nc); 403 tc->rx_pdu = *pdu; 404 405 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 406 407 /* 408 * Once the CQE has been received, no further transfers to the 409 * command buffer for the associated CID can occur. 410 */ 411 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true); 412 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false); 413 414 return (0); 415 } 416 417 /* 418 * Construct and send a PDU that contains an optional data payload. 419 * This includes dealing with digests and the length fields in the 420 * common header. 421 */ 422 static int 423 nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen, 424 void *data, uint32_t data_len) 425 { 426 struct nvme_tcp_common_pdu_hdr *ch; 427 struct iovec iov[5]; 428 u_int iovcnt; 429 uint32_t header_digest, data_digest, pad, pdo, plen; 430 431 plen = hlen; 432 if (qp->header_digests) 433 plen += sizeof(header_digest); 434 if (data_len != 0) { 435 pdo = roundup(plen, qp->txpda); 436 pad = pdo - plen; 437 plen = pdo + data_len; 438 if (qp->data_digests) 439 plen += sizeof(data_digest); 440 } else { 441 assert(data == NULL); 442 pdo = 0; 443 pad = 0; 444 } 445 446 ch = hdr; 447 ch->hlen = hlen; 448 if (qp->header_digests) 449 ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF; 450 if (qp->data_digests && data_len != 0) 451 ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF; 452 ch->pdo = pdo; 453 ch->plen = htole32(plen); 454 455 /* CH + PSH */ 456 iov[0].iov_base = hdr; 457 iov[0].iov_len = hlen; 458 iovcnt = 1; 459 460 /* HDGST */ 461 if (qp->header_digests) { 462 header_digest = compute_digest(hdr, hlen); 463 iov[iovcnt].iov_base = &header_digest; 464 iov[iovcnt].iov_len = sizeof(header_digest); 465 iovcnt++; 466 } 467 468 if (pad != 0) { 469 /* PAD */ 470 iov[iovcnt].iov_base = __DECONST(char *, zero_padding); 471 iov[iovcnt].iov_len = pad; 472 iovcnt++; 473 } 474 475 if (data_len != 0) { 476 /* DATA */ 477 iov[iovcnt].iov_base = data; 478 iov[iovcnt].iov_len = data_len; 479 iovcnt++; 480 481 /* DDGST */ 482 if (qp->data_digests) { 483 data_digest = compute_digest(data, data_len); 484 iov[iovcnt].iov_base = &data_digest; 485 iov[iovcnt].iov_len = sizeof(data_digest); 486 iovcnt++; 487 } 488 } 489 490 return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen)); 491 } 492 493 static int 494 nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 495 { 496 struct nvme_tcp_h2c_data_hdr *h2c; 497 struct nvmf_tcp_command_buffer *cb; 498 uint32_t data_len, data_offset; 499 const char *icd; 500 501 h2c = (void *)pdu->hdr; 502 if (le32toh(h2c->datal) > qp->maxh2cdata) { 503 nvmf_tcp_report_error(qp->qp.nq_association, qp, 504 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0, 505 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 506 nvmf_tcp_free_pdu(pdu); 507 return (EBADMSG); 508 } 509 510 cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true); 511 if (cb == NULL) { 512 nvmf_tcp_report_error(qp->qp.nq_association, qp, 513 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 514 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr, 515 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 516 nvmf_tcp_free_pdu(pdu); 517 return (EBADMSG); 518 } 519 520 data_len = le32toh(h2c->datal); 521 if (data_len != pdu->data_len) { 522 nvmf_tcp_report_error(qp->qp.nq_association, qp, 523 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 524 offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr, 525 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 526 nvmf_tcp_free_pdu(pdu); 527 return (EBADMSG); 528 } 529 530 data_offset = le32toh(h2c->datao); 531 if (data_offset < cb->data_offset || 532 data_offset + data_len > cb->data_offset + cb->data_len) { 533 nvmf_tcp_report_error(qp->qp.nq_association, qp, 534 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 535 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 536 nvmf_tcp_free_pdu(pdu); 537 return (EBADMSG); 538 } 539 540 if (data_offset != cb->data_offset + cb->data_xfered) { 541 nvmf_tcp_report_error(qp->qp.nq_association, qp, 542 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 543 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 544 nvmf_tcp_free_pdu(pdu); 545 return (EBADMSG); 546 } 547 548 if ((cb->data_xfered + data_len == cb->data_len) != 549 ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) { 550 nvmf_tcp_report_error(qp->qp.nq_association, qp, 551 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 552 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 553 nvmf_tcp_free_pdu(pdu); 554 return (EBADMSG); 555 } 556 557 cb->data_xfered += data_len; 558 data_offset -= cb->data_offset; 559 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 560 memcpy((char *)cb->data + data_offset, icd, data_len); 561 562 nvmf_tcp_free_pdu(pdu); 563 return (0); 564 } 565 566 static int 567 nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 568 { 569 struct nvme_tcp_c2h_data_hdr *c2h; 570 struct nvmf_tcp_command_buffer *cb; 571 uint32_t data_len, data_offset; 572 const char *icd; 573 574 c2h = (void *)pdu->hdr; 575 576 cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true); 577 if (cb == NULL) { 578 /* 579 * XXX: Could be PDU sequence error if cccid is for a 580 * command that doesn't use a command buffer. 581 */ 582 nvmf_tcp_report_error(qp->qp.nq_association, qp, 583 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 584 offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr, 585 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 586 nvmf_tcp_free_pdu(pdu); 587 return (EBADMSG); 588 } 589 590 data_len = le32toh(c2h->datal); 591 if (data_len != pdu->data_len) { 592 nvmf_tcp_report_error(qp->qp.nq_association, qp, 593 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 594 offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr, 595 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 596 nvmf_tcp_free_pdu(pdu); 597 return (EBADMSG); 598 } 599 600 data_offset = le32toh(c2h->datao); 601 if (data_offset < cb->data_offset || 602 data_offset + data_len > cb->data_offset + cb->data_len) { 603 nvmf_tcp_report_error(qp->qp.nq_association, qp, 604 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 605 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 606 nvmf_tcp_free_pdu(pdu); 607 return (EBADMSG); 608 } 609 610 if (data_offset != cb->data_offset + cb->data_xfered) { 611 nvmf_tcp_report_error(qp->qp.nq_association, qp, 612 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 613 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 614 nvmf_tcp_free_pdu(pdu); 615 return (EBADMSG); 616 } 617 618 if ((cb->data_xfered + data_len == cb->data_len) != 619 ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) { 620 nvmf_tcp_report_error(qp->qp.nq_association, qp, 621 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 622 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 623 nvmf_tcp_free_pdu(pdu); 624 return (EBADMSG); 625 } 626 627 cb->data_xfered += data_len; 628 data_offset -= cb->data_offset; 629 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 630 memcpy((char *)cb->data + data_offset, icd, data_len); 631 632 if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) { 633 struct nvme_completion cqe; 634 struct nvmf_tcp_capsule *tc; 635 struct nvmf_capsule *nc; 636 637 memset(&cqe, 0, sizeof(cqe)); 638 cqe.cid = cb->cid; 639 640 nc = nvmf_allocate_response(&qp->qp, &cqe); 641 if (nc == NULL) { 642 nvmf_tcp_free_pdu(pdu); 643 return (ENOMEM); 644 } 645 nc->nc_sqhd_valid = false; 646 647 tc = TCAP(nc); 648 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 649 } 650 651 nvmf_tcp_free_pdu(pdu); 652 return (0); 653 } 654 655 /* NB: cid and ttag and little-endian already. */ 656 static int 657 tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 658 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 659 { 660 struct nvme_tcp_h2c_data_hdr h2c; 661 662 memset(&h2c, 0, sizeof(h2c)); 663 h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA; 664 if (last_pdu) 665 h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 666 h2c.cccid = cid; 667 h2c.ttag = ttag; 668 h2c.datao = htole32(data_offset); 669 h2c.datal = htole32(len); 670 671 return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len)); 672 } 673 674 /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */ 675 static int 676 tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 677 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 678 { 679 char *p; 680 681 p = buf; 682 while (len != 0) { 683 size_t todo; 684 int error; 685 686 todo = len; 687 if (todo > qp->maxh2cdata) 688 todo = qp->maxh2cdata; 689 error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo, 690 last_pdu && todo == len); 691 if (error != 0) 692 return (error); 693 p += todo; 694 len -= todo; 695 } 696 return (0); 697 } 698 699 static int 700 nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 701 { 702 struct nvmf_tcp_command_buffer *cb; 703 struct nvme_tcp_r2t_hdr *r2t; 704 uint32_t data_len, data_offset; 705 int error; 706 707 r2t = (void *)pdu->hdr; 708 709 cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false); 710 if (cb == NULL) { 711 nvmf_tcp_report_error(qp->qp.nq_association, qp, 712 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 713 offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr, 714 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 715 nvmf_tcp_free_pdu(pdu); 716 return (EBADMSG); 717 } 718 719 data_offset = le32toh(r2t->r2to); 720 if (data_offset != cb->data_xfered) { 721 nvmf_tcp_report_error(qp->qp.nq_association, qp, 722 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 723 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 724 nvmf_tcp_free_pdu(pdu); 725 return (EBADMSG); 726 } 727 728 /* 729 * XXX: The spec does not specify how to handle R2T tranfers 730 * out of range of the original command. 731 */ 732 data_len = le32toh(r2t->r2tl); 733 if (data_offset + data_len > cb->data_len) { 734 nvmf_tcp_report_error(qp->qp.nq_association, qp, 735 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 736 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 737 nvmf_tcp_free_pdu(pdu); 738 return (EBADMSG); 739 } 740 741 cb->data_xfered += data_len; 742 743 /* 744 * Write out one or more H2C_DATA PDUs containing the 745 * requested data. 746 */ 747 error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag, 748 data_offset, (char *)cb->data + data_offset, data_len, true); 749 750 nvmf_tcp_free_pdu(pdu); 751 return (error); 752 } 753 754 static int 755 nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp) 756 { 757 struct nvmf_tcp_rxpdu pdu; 758 int error; 759 760 error = nvmf_tcp_read_pdu(qp, &pdu); 761 if (error != 0) 762 return (error); 763 764 switch (pdu.hdr->pdu_type) { 765 default: 766 __unreachable(); 767 break; 768 case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 769 case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 770 return (nvmf_tcp_handle_term_req(&pdu)); 771 case NVME_TCP_PDU_TYPE_CAPSULE_CMD: 772 return (nvmf_tcp_save_command_capsule(qp, &pdu)); 773 case NVME_TCP_PDU_TYPE_CAPSULE_RESP: 774 return (nvmf_tcp_save_response_capsule(qp, &pdu)); 775 case NVME_TCP_PDU_TYPE_H2C_DATA: 776 return (nvmf_tcp_handle_h2c_data(qp, &pdu)); 777 case NVME_TCP_PDU_TYPE_C2H_DATA: 778 return (nvmf_tcp_handle_c2h_data(qp, &pdu)); 779 case NVME_TCP_PDU_TYPE_R2T: 780 return (nvmf_tcp_handle_r2t(qp, &pdu)); 781 } 782 } 783 784 static bool 785 nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 786 const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len) 787 { 788 const struct nvme_tcp_ic_req *pdu; 789 uint32_t plen; 790 u_int hlen; 791 792 /* Determine how large of a PDU header to return for errors. */ 793 hlen = ch->hlen; 794 plen = le32toh(ch->plen); 795 if (hlen < sizeof(*ch) || hlen > plen) 796 hlen = sizeof(*ch); 797 798 /* 799 * Errors must be reported for the lowest incorrect field 800 * first, so validate fields in order. 801 */ 802 803 /* Validate pdu_type. */ 804 805 /* Controllers only receive PDUs with a PDU direction of 0. */ 806 if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) { 807 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 808 nvmf_tcp_report_error(na, qp, 809 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 810 hlen); 811 return (false); 812 } 813 814 switch (ch->pdu_type) { 815 case NVME_TCP_PDU_TYPE_IC_REQ: 816 case NVME_TCP_PDU_TYPE_IC_RESP: 817 break; 818 default: 819 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 820 nvmf_tcp_report_error(na, qp, 821 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 822 hlen); 823 return (false); 824 } 825 826 /* Validate flags. */ 827 if (ch->flags != 0) { 828 na_error(na, "NVMe/TCP: Invalid PDU header flags %#x", 829 ch->flags); 830 nvmf_tcp_report_error(na, qp, 831 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len, 832 hlen); 833 return (false); 834 } 835 836 /* Validate hlen. */ 837 if (ch->hlen != 128) { 838 na_error(na, "NVMe/TCP: Invalid PDU header length %u", 839 ch->hlen); 840 nvmf_tcp_report_error(na, qp, 841 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len, 842 hlen); 843 return (false); 844 } 845 846 /* Validate pdo. */ 847 if (ch->pdo != 0) { 848 na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo); 849 nvmf_tcp_report_error(na, qp, 850 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len, 851 hlen); 852 return (false); 853 } 854 855 /* Validate plen. */ 856 if (plen != 128) { 857 na_error(na, "NVMe/TCP: Invalid PDU length %u", plen); 858 nvmf_tcp_report_error(na, qp, 859 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len, 860 hlen); 861 return (false); 862 } 863 864 /* Validate fields common to both ICReq and ICResp. */ 865 pdu = (const struct nvme_tcp_ic_req *)ch; 866 if (le16toh(pdu->pfv) != 0) { 867 na_error(na, "NVMe/TCP: Unsupported PDU version %u", 868 le16toh(pdu->pfv)); 869 nvmf_tcp_report_error(na, qp, 870 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 871 8, ch, pdu_len, hlen); 872 return (false); 873 } 874 875 if (pdu->hpda > NVME_TCP_HPDA_MAX) { 876 na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda); 877 nvmf_tcp_report_error(na, qp, 878 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len, 879 hlen); 880 return (false); 881 } 882 883 if (pdu->dgst.bits.reserved != 0) { 884 na_error(na, "NVMe/TCP: Invalid digest settings"); 885 nvmf_tcp_report_error(na, qp, 886 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len, 887 hlen); 888 return (false); 889 } 890 891 return (true); 892 } 893 894 static bool 895 nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 896 struct nvme_tcp_ic_req *pdu) 897 { 898 int error; 899 900 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 901 if (error != 0) { 902 na_error(na, "NVMe/TCP: Failed to read IC request: %s", 903 strerror(error)); 904 return (false); 905 } 906 907 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 908 } 909 910 static bool 911 nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 912 struct nvme_tcp_ic_resp *pdu) 913 { 914 int error; 915 916 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 917 if (error != 0) { 918 na_error(na, "NVMe/TCP: Failed to read IC response: %s", 919 strerror(error)); 920 return (false); 921 } 922 923 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 924 } 925 926 static struct nvmf_association * 927 tcp_allocate_association(bool controller, 928 const struct nvmf_association_params *params) 929 { 930 struct nvmf_tcp_association *ta; 931 932 if (controller) { 933 /* 7.4.10.3 */ 934 if (params->tcp.maxh2cdata < 4096 || 935 params->tcp.maxh2cdata % 4 != 0) 936 return (NULL); 937 } 938 939 ta = calloc(1, sizeof(*ta)); 940 941 return (&ta->na); 942 } 943 944 static void 945 tcp_update_association(struct nvmf_association *na, 946 const struct nvme_controller_data *cdata) 947 { 948 struct nvmf_tcp_association *ta = TASSOC(na); 949 950 ta->ioccsz = le32toh(cdata->ioccsz); 951 } 952 953 static void 954 tcp_free_association(struct nvmf_association *na) 955 { 956 free(na); 957 } 958 959 static bool 960 tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin) 961 { 962 const struct nvmf_association_params *params = &na->na_params; 963 struct nvmf_tcp_association *ta = TASSOC(na); 964 struct nvme_tcp_ic_req ic_req; 965 struct nvme_tcp_ic_resp ic_resp; 966 uint32_t maxh2cdata; 967 int error; 968 969 if (!admin) { 970 if (ta->ioccsz == 0) { 971 na_error(na, "TCP I/O queues require cdata"); 972 return (false); 973 } 974 if (ta->ioccsz < 4) { 975 na_error(na, "Invalid IOCCSZ %u", ta->ioccsz); 976 return (false); 977 } 978 } 979 980 memset(&ic_req, 0, sizeof(ic_req)); 981 ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ; 982 ic_req.common.hlen = sizeof(ic_req); 983 ic_req.common.plen = htole32(sizeof(ic_req)); 984 ic_req.pfv = htole16(0); 985 ic_req.hpda = params->tcp.pda; 986 if (params->tcp.header_digests) 987 ic_req.dgst.bits.hdgst_enable = 1; 988 if (params->tcp.data_digests) 989 ic_req.dgst.bits.ddgst_enable = 1; 990 ic_req.maxr2t = htole32(params->tcp.maxr2t); 991 992 error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req)); 993 if (error != 0) { 994 na_error(na, "Failed to write IC request: %s", strerror(error)); 995 return (false); 996 } 997 998 if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp)) 999 return (false); 1000 1001 /* Ensure the controller didn't enable digests we didn't request. */ 1002 if ((!params->tcp.header_digests && 1003 ic_resp.dgst.bits.hdgst_enable != 0) || 1004 (!params->tcp.data_digests && 1005 ic_resp.dgst.bits.ddgst_enable != 0)) { 1006 na_error(na, "Controller enabled unrequested digests"); 1007 nvmf_tcp_report_error(na, qp, 1008 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 1009 11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp)); 1010 return (false); 1011 } 1012 1013 /* 1014 * XXX: Is there an upper-bound to enforce here? Perhaps pick 1015 * some large value and report larger values as an unsupported 1016 * parameter? 1017 */ 1018 maxh2cdata = le32toh(ic_resp.maxh2cdata); 1019 if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) { 1020 na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata); 1021 nvmf_tcp_report_error(na, qp, 1022 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp, 1023 sizeof(ic_resp), sizeof(ic_resp)); 1024 return (false); 1025 } 1026 1027 qp->rxpda = (params->tcp.pda + 1) * 4; 1028 qp->txpda = (ic_resp.cpda + 1) * 4; 1029 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1030 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1031 qp->maxr2t = params->tcp.maxr2t; 1032 qp->maxh2cdata = maxh2cdata; 1033 if (admin) 1034 /* 7.4.3 */ 1035 qp->max_icd = 8192; 1036 else 1037 qp->max_icd = (ta->ioccsz - 4) * 16; 1038 1039 return (0); 1040 } 1041 1042 static bool 1043 tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na) 1044 { 1045 const struct nvmf_association_params *params = &na->na_params; 1046 struct nvme_tcp_ic_req ic_req; 1047 struct nvme_tcp_ic_resp ic_resp; 1048 int error; 1049 1050 if (!nvmf_tcp_read_ic_req(na, qp, &ic_req)) 1051 return (false); 1052 1053 memset(&ic_resp, 0, sizeof(ic_resp)); 1054 ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP; 1055 ic_resp.common.hlen = sizeof(ic_req); 1056 ic_resp.common.plen = htole32(sizeof(ic_req)); 1057 ic_resp.pfv = htole16(0); 1058 ic_resp.cpda = params->tcp.pda; 1059 if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0) 1060 ic_resp.dgst.bits.hdgst_enable = 1; 1061 if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0) 1062 ic_resp.dgst.bits.ddgst_enable = 1; 1063 ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata); 1064 1065 error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp)); 1066 if (error != 0) { 1067 na_error(na, "Failed to write IC response: %s", 1068 strerror(error)); 1069 return (false); 1070 } 1071 1072 qp->rxpda = (params->tcp.pda + 1) * 4; 1073 qp->txpda = (ic_req.hpda + 1) * 4; 1074 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1075 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1076 qp->maxr2t = le32toh(ic_req.maxr2t); 1077 qp->maxh2cdata = params->tcp.maxh2cdata; 1078 qp->max_icd = 0; /* XXX */ 1079 return (0); 1080 } 1081 1082 static struct nvmf_qpair * 1083 tcp_allocate_qpair(struct nvmf_association *na, 1084 const struct nvmf_qpair_params *qparams) 1085 { 1086 const struct nvmf_association_params *aparams = &na->na_params; 1087 struct nvmf_tcp_qpair *qp; 1088 int error; 1089 1090 if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) { 1091 na_error(na, "Invalid PDA"); 1092 return (NULL); 1093 } 1094 1095 qp = calloc(1, sizeof(*qp)); 1096 qp->s = qparams->tcp.fd; 1097 LIST_INIT(&qp->rx_buffers); 1098 LIST_INIT(&qp->tx_buffers); 1099 TAILQ_INIT(&qp->rx_capsules); 1100 if (na->na_controller) 1101 error = tcp_accept(qp, na); 1102 else 1103 error = tcp_connect(qp, na, qparams->admin); 1104 if (error != 0) { 1105 free(qp); 1106 return (NULL); 1107 } 1108 1109 return (&qp->qp); 1110 } 1111 1112 static void 1113 tcp_free_qpair(struct nvmf_qpair *nq) 1114 { 1115 struct nvmf_tcp_qpair *qp = TQP(nq); 1116 struct nvmf_tcp_capsule *ntc, *tc; 1117 struct nvmf_tcp_command_buffer *ncb, *cb; 1118 1119 TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) { 1120 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1121 nvmf_free_capsule(&tc->nc); 1122 } 1123 LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) { 1124 tcp_free_command_buffer(cb); 1125 } 1126 LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) { 1127 tcp_free_command_buffer(cb); 1128 } 1129 free(qp); 1130 } 1131 1132 static void 1133 tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl) 1134 { 1135 struct nvmf_tcp_qpair *qp = TQP(nq); 1136 1137 nvlist_add_number(nvl, "fd", qp->s); 1138 nvlist_add_number(nvl, "rxpda", qp->rxpda); 1139 nvlist_add_number(nvl, "txpda", qp->txpda); 1140 nvlist_add_bool(nvl, "header_digests", qp->header_digests); 1141 nvlist_add_bool(nvl, "data_digests", qp->data_digests); 1142 nvlist_add_number(nvl, "maxr2t", qp->maxr2t); 1143 nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata); 1144 nvlist_add_number(nvl, "max_icd", qp->max_icd); 1145 } 1146 1147 static struct nvmf_capsule * 1148 tcp_allocate_capsule(struct nvmf_qpair *qp __unused) 1149 { 1150 struct nvmf_tcp_capsule *nc; 1151 1152 nc = calloc(1, sizeof(*nc)); 1153 return (&nc->nc); 1154 } 1155 1156 static void 1157 tcp_free_capsule(struct nvmf_capsule *nc) 1158 { 1159 struct nvmf_tcp_capsule *tc = TCAP(nc); 1160 1161 nvmf_tcp_free_pdu(&tc->rx_pdu); 1162 if (tc->cb != NULL) 1163 tcp_free_command_buffer(tc->cb); 1164 free(tc); 1165 } 1166 1167 static int 1168 tcp_transmit_command(struct nvmf_capsule *nc) 1169 { 1170 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1171 struct nvmf_tcp_capsule *tc = TCAP(nc); 1172 struct nvme_tcp_cmd cmd; 1173 struct nvme_sgl_descriptor *sgl; 1174 int error; 1175 bool use_icd; 1176 1177 use_icd = false; 1178 if (nc->nc_data_len != 0 && nc->nc_send_data && 1179 nc->nc_data_len <= qp->max_icd) 1180 use_icd = true; 1181 1182 memset(&cmd, 0, sizeof(cmd)); 1183 cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD; 1184 cmd.ccsqe = nc->nc_sqe; 1185 1186 /* Populate SGL in SQE. */ 1187 sgl = &cmd.ccsqe.sgl; 1188 memset(sgl, 0, sizeof(*sgl)); 1189 sgl->address = 0; 1190 sgl->length = htole32(nc->nc_data_len); 1191 if (use_icd) { 1192 /* Use in-capsule data. */ 1193 sgl->type = NVME_SGL_TYPE_ICD; 1194 } else { 1195 /* Use a command buffer. */ 1196 sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER; 1197 } 1198 1199 /* Send command capsule. */ 1200 error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ? 1201 nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0); 1202 if (error != 0) 1203 return (error); 1204 1205 /* 1206 * If data will be transferred using a command buffer, allocate a 1207 * buffer structure and queue it. 1208 */ 1209 if (nc->nc_data_len != 0 && !use_icd) 1210 tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0, 1211 nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data); 1212 1213 return (0); 1214 } 1215 1216 static int 1217 tcp_transmit_response(struct nvmf_capsule *nc) 1218 { 1219 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1220 struct nvme_tcp_rsp rsp; 1221 1222 memset(&rsp, 0, sizeof(rsp)); 1223 rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1224 rsp.rccqe = nc->nc_cqe; 1225 1226 return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0)); 1227 } 1228 1229 static int 1230 tcp_transmit_capsule(struct nvmf_capsule *nc) 1231 { 1232 if (nc->nc_qe_len == sizeof(struct nvme_command)) 1233 return (tcp_transmit_command(nc)); 1234 else 1235 return (tcp_transmit_response(nc)); 1236 } 1237 1238 static int 1239 tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp) 1240 { 1241 struct nvmf_tcp_qpair *qp = TQP(nq); 1242 struct nvmf_tcp_capsule *tc; 1243 int error; 1244 1245 while (TAILQ_EMPTY(&qp->rx_capsules)) { 1246 error = nvmf_tcp_receive_pdu(qp); 1247 if (error != 0) 1248 return (error); 1249 } 1250 tc = TAILQ_FIRST(&qp->rx_capsules); 1251 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1252 *ncp = &tc->nc; 1253 return (0); 1254 } 1255 1256 static uint8_t 1257 tcp_validate_command_capsule(const struct nvmf_capsule *nc) 1258 { 1259 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1260 const struct nvme_sgl_descriptor *sgl; 1261 1262 assert(tc->rx_pdu.hdr != NULL); 1263 1264 sgl = &nc->nc_sqe.sgl; 1265 switch (sgl->type) { 1266 case NVME_SGL_TYPE_ICD: 1267 if (tc->rx_pdu.data_len != le32toh(sgl->length)) { 1268 printf("NVMe/TCP: Command Capsule with mismatched ICD length\n"); 1269 return (NVME_SC_DATA_SGL_LENGTH_INVALID); 1270 } 1271 break; 1272 case NVME_SGL_TYPE_COMMAND_BUFFER: 1273 if (tc->rx_pdu.data_len != 0) { 1274 printf("NVMe/TCP: Command Buffer SGL with ICD\n"); 1275 return (NVME_SC_INVALID_FIELD); 1276 } 1277 break; 1278 default: 1279 printf("NVMe/TCP: Invalid SGL type in Command Capsule\n"); 1280 return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID); 1281 } 1282 1283 if (sgl->address != 0) { 1284 printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n"); 1285 return (NVME_SC_SGL_OFFSET_INVALID); 1286 } 1287 1288 return (NVME_SC_SUCCESS); 1289 } 1290 1291 static size_t 1292 tcp_capsule_data_len(const struct nvmf_capsule *nc) 1293 { 1294 assert(nc->nc_qe_len == sizeof(struct nvme_command)); 1295 return (le32toh(nc->nc_sqe.sgl.length)); 1296 } 1297 1298 /* NB: cid and ttag are both little-endian already. */ 1299 static int 1300 tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 1301 uint32_t data_offset, uint32_t data_len) 1302 { 1303 struct nvme_tcp_r2t_hdr r2t; 1304 1305 memset(&r2t, 0, sizeof(r2t)); 1306 r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T; 1307 r2t.cccid = cid; 1308 r2t.ttag = ttag; 1309 r2t.r2to = htole32(data_offset); 1310 r2t.r2tl = htole32(data_len); 1311 1312 return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0)); 1313 } 1314 1315 static int 1316 tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1317 void *buf, size_t len) 1318 { 1319 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1320 struct nvmf_tcp_command_buffer *cb; 1321 int error; 1322 uint16_t ttag; 1323 1324 /* 1325 * Don't bother byte-swapping ttag as it is just a cookie 1326 * value returned by the other end as-is. 1327 */ 1328 ttag = qp->next_ttag++; 1329 1330 error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len); 1331 if (error != 0) 1332 return (error); 1333 1334 cb = tcp_alloc_command_buffer(qp, buf, data_offset, len, 1335 nc->nc_sqe.cid, ttag, true); 1336 1337 /* Parse received PDUs until the data transfer is complete. */ 1338 while (cb->data_xfered < cb->data_len) { 1339 error = nvmf_tcp_receive_pdu(qp); 1340 if (error != 0) 1341 break; 1342 } 1343 tcp_free_command_buffer(cb); 1344 return (error); 1345 } 1346 1347 static int 1348 tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1349 void *buf, size_t len) 1350 { 1351 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1352 const char *icd; 1353 1354 icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset; 1355 memcpy(buf, icd, len); 1356 return (0); 1357 } 1358 1359 static int 1360 tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1361 void *buf, size_t len) 1362 { 1363 struct nvmf_association *na = nc->nc_qpair->nq_association; 1364 const struct nvme_sgl_descriptor *sgl; 1365 size_t data_len; 1366 1367 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1368 return (EINVAL); 1369 1370 sgl = &nc->nc_sqe.sgl; 1371 data_len = le32toh(sgl->length); 1372 if (data_offset + len > data_len) 1373 return (EFBIG); 1374 1375 if (sgl->type == NVME_SGL_TYPE_ICD) 1376 return (tcp_receive_icd_data(nc, data_offset, buf, len)); 1377 else 1378 return (tcp_receive_r2t_data(nc, data_offset, buf, len)); 1379 } 1380 1381 /* NB: cid is little-endian already. */ 1382 static int 1383 tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, 1384 uint32_t data_offset, const void *buf, size_t len, bool last_pdu, 1385 bool success) 1386 { 1387 struct nvme_tcp_c2h_data_hdr c2h; 1388 1389 memset(&c2h, 0, sizeof(c2h)); 1390 c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA; 1391 if (last_pdu) 1392 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 1393 if (success) 1394 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 1395 c2h.cccid = cid; 1396 c2h.datao = htole32(data_offset); 1397 c2h.datal = htole32(len); 1398 1399 return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h), 1400 __DECONST(void *, buf), len)); 1401 } 1402 1403 static int 1404 tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf, 1405 size_t len) 1406 { 1407 struct nvmf_association *na = nc->nc_qpair->nq_association; 1408 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1409 const struct nvme_sgl_descriptor *sgl; 1410 const char *src; 1411 size_t todo; 1412 uint32_t data_len, data_offset; 1413 int error; 1414 bool last_pdu, send_success_flag; 1415 1416 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1417 return (EINVAL); 1418 1419 sgl = &nc->nc_sqe.sgl; 1420 data_len = le32toh(sgl->length); 1421 if (len != data_len) { 1422 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1423 return (EFBIG); 1424 } 1425 1426 if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) { 1427 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1428 return (EINVAL); 1429 } 1430 1431 /* Use the SUCCESS flag if SQ flow control is disabled. */ 1432 send_success_flag = !qp->qp.nq_flow_control; 1433 1434 /* 1435 * Write out one or more C2H_DATA PDUs containing the data. 1436 * Each PDU is arbitrarily capped at 256k. 1437 */ 1438 data_offset = 0; 1439 src = buf; 1440 while (len > 0) { 1441 if (len > 256 * 1024) { 1442 todo = 256 * 1024; 1443 last_pdu = false; 1444 } else { 1445 todo = len; 1446 last_pdu = true; 1447 } 1448 error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, 1449 src, todo, last_pdu, last_pdu && send_success_flag); 1450 if (error != 0) { 1451 nvmf_send_generic_error(nc, 1452 NVME_SC_TRANSIENT_TRANSPORT_ERROR); 1453 return (error); 1454 } 1455 data_offset += todo; 1456 src += todo; 1457 len -= todo; 1458 } 1459 if (!send_success_flag) 1460 nvmf_send_success(nc); 1461 return (0); 1462 } 1463 1464 struct nvmf_transport_ops tcp_ops = { 1465 .allocate_association = tcp_allocate_association, 1466 .update_association = tcp_update_association, 1467 .free_association = tcp_free_association, 1468 .allocate_qpair = tcp_allocate_qpair, 1469 .free_qpair = tcp_free_qpair, 1470 .kernel_handoff_params = tcp_kernel_handoff_params, 1471 .allocate_capsule = tcp_allocate_capsule, 1472 .free_capsule = tcp_free_capsule, 1473 .transmit_capsule = tcp_transmit_capsule, 1474 .receive_capsule = tcp_receive_capsule, 1475 .validate_command_capsule = tcp_validate_command_capsule, 1476 .capsule_data_len = tcp_capsule_data_len, 1477 .receive_controller_data = tcp_receive_controller_data, 1478 .send_controller_data = tcp_send_controller_data, 1479 }; 1480