1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/endian.h> 9 #include <sys/gsb_crc32.h> 10 #include <sys/queue.h> 11 #include <sys/uio.h> 12 #include <assert.h> 13 #include <errno.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include "libnvmf.h" 20 #include "internal.h" 21 #include "nvmf_tcp.h" 22 23 struct nvmf_tcp_qpair; 24 25 struct nvmf_tcp_command_buffer { 26 struct nvmf_tcp_qpair *qp; 27 28 void *data; 29 size_t data_len; 30 size_t data_xfered; 31 uint32_t data_offset; 32 33 uint16_t cid; 34 uint16_t ttag; 35 36 LIST_ENTRY(nvmf_tcp_command_buffer) link; 37 }; 38 39 LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer); 40 41 struct nvmf_tcp_association { 42 struct nvmf_association na; 43 44 uint32_t ioccsz; 45 }; 46 47 struct nvmf_tcp_rxpdu { 48 struct nvme_tcp_common_pdu_hdr *hdr; 49 uint32_t data_len; 50 }; 51 52 struct nvmf_tcp_capsule { 53 struct nvmf_capsule nc; 54 55 struct nvmf_tcp_rxpdu rx_pdu; 56 struct nvmf_tcp_command_buffer *cb; 57 58 TAILQ_ENTRY(nvmf_tcp_capsule) link; 59 }; 60 61 struct nvmf_tcp_qpair { 62 struct nvmf_qpair qp; 63 int s; 64 65 uint8_t txpda; 66 uint8_t rxpda; 67 bool header_digests; 68 bool data_digests; 69 uint32_t maxr2t; 70 uint32_t maxh2cdata; 71 uint32_t max_icd; /* Host only */ 72 uint16_t next_ttag; /* Controller only */ 73 74 struct nvmf_tcp_command_buffer_list tx_buffers; 75 struct nvmf_tcp_command_buffer_list rx_buffers; 76 TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules; 77 }; 78 79 #define TASSOC(nc) ((struct nvmf_tcp_association *)(na)) 80 #define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc)) 81 #define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc)) 82 #define TQP(qp) ((struct nvmf_tcp_qpair *)(qp)) 83 84 static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET]; 85 86 static uint32_t 87 compute_digest(const void *buf, size_t len) 88 { 89 return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff); 90 } 91 92 static struct nvmf_tcp_command_buffer * 93 tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data, 94 uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag, 95 bool receive) 96 { 97 struct nvmf_tcp_command_buffer *cb; 98 99 cb = malloc(sizeof(*cb)); 100 cb->qp = qp; 101 cb->data = data; 102 cb->data_offset = data_offset; 103 cb->data_len = data_len; 104 cb->data_xfered = 0; 105 cb->cid = cid; 106 cb->ttag = ttag; 107 108 if (receive) 109 LIST_INSERT_HEAD(&qp->rx_buffers, cb, link); 110 else 111 LIST_INSERT_HEAD(&qp->tx_buffers, cb, link); 112 return (cb); 113 } 114 115 static struct nvmf_tcp_command_buffer * 116 tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 117 bool receive) 118 { 119 struct nvmf_tcp_command_buffer_list *list; 120 struct nvmf_tcp_command_buffer *cb; 121 122 list = receive ? &qp->rx_buffers : &qp->tx_buffers; 123 LIST_FOREACH(cb, list, link) { 124 if (cb->cid == cid && cb->ttag == ttag) 125 return (cb); 126 } 127 return (NULL); 128 } 129 130 static void 131 tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 132 bool receive) 133 { 134 struct nvmf_tcp_command_buffer *cb; 135 136 cb = tcp_find_command_buffer(qp, cid, ttag, receive); 137 if (cb != NULL) 138 LIST_REMOVE(cb, link); 139 } 140 141 static void 142 tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb) 143 { 144 LIST_REMOVE(cb, link); 145 free(cb); 146 } 147 148 static int 149 nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len) 150 { 151 ssize_t nwritten; 152 const char *cp; 153 154 cp = pdu; 155 while (len != 0) { 156 nwritten = write(qp->s, cp, len); 157 if (nwritten < 0) 158 return (errno); 159 len -= nwritten; 160 cp += nwritten; 161 } 162 return (0); 163 } 164 165 static int 166 nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov, 167 u_int iovcnt, size_t len) 168 { 169 ssize_t nwritten; 170 171 for (;;) { 172 nwritten = writev(qp->s, iov, iovcnt); 173 if (nwritten < 0) 174 return (errno); 175 176 len -= nwritten; 177 if (len == 0) 178 return (0); 179 180 while (iov->iov_len <= (size_t)nwritten) { 181 nwritten -= iov->iov_len; 182 iovcnt--; 183 iov++; 184 } 185 186 iov->iov_base = (char *)iov->iov_base + nwritten; 187 iov->iov_len -= nwritten; 188 } 189 } 190 191 static void 192 nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 193 uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen) 194 { 195 struct nvme_tcp_term_req_hdr hdr; 196 struct iovec iov[2]; 197 198 if (hlen != 0) { 199 if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) 200 hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 201 if (hlen > pdu_len) 202 hlen = pdu_len; 203 } 204 205 memset(&hdr, 0, sizeof(hdr)); 206 hdr.common.pdu_type = na->na_controller ? 207 NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 208 hdr.common.hlen = sizeof(hdr); 209 hdr.common.plen = sizeof(hdr) + hlen; 210 hdr.fes = htole16(fes); 211 le32enc(hdr.fei, fei); 212 iov[0].iov_base = &hdr; 213 iov[0].iov_len = sizeof(hdr); 214 iov[1].iov_base = __DECONST(void *, rx_pdu); 215 iov[1].iov_len = hlen; 216 217 (void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen); 218 close(qp->s); 219 qp->s = -1; 220 } 221 222 static int 223 nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu, 224 size_t pdu_len) 225 { 226 const struct nvme_tcp_common_pdu_hdr *ch; 227 uint32_t data_len, fei, plen; 228 uint32_t digest, rx_digest; 229 u_int hlen; 230 int error; 231 uint16_t fes; 232 233 /* Determine how large of a PDU header to return for errors. */ 234 ch = pdu->hdr; 235 hlen = ch->hlen; 236 plen = le32toh(ch->plen); 237 if (hlen < sizeof(*ch) || hlen > plen) 238 hlen = sizeof(*ch); 239 240 error = nvmf_tcp_validate_pdu_header(ch, 241 qp->qp.nq_association->na_controller, qp->header_digests, 242 qp->data_digests, qp->rxpda, &data_len, &fes, &fei); 243 if (error != 0) { 244 if (error == ECONNRESET) { 245 close(qp->s); 246 qp->s = -1; 247 } else { 248 nvmf_tcp_report_error(qp->qp.nq_association, qp, 249 fes, fei, ch, pdu_len, hlen); 250 } 251 return (error); 252 } 253 254 /* Check header digest if present. */ 255 if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) { 256 digest = compute_digest(ch, ch->hlen); 257 memcpy(&rx_digest, (const char *)ch + ch->hlen, 258 sizeof(rx_digest)); 259 if (digest != rx_digest) { 260 printf("NVMe/TCP: Header digest mismatch\n"); 261 nvmf_tcp_report_error(qp->qp.nq_association, qp, 262 NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch, 263 pdu_len, hlen); 264 return (EBADMSG); 265 } 266 } 267 268 /* Check data digest if present. */ 269 if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) { 270 digest = compute_digest((const char *)ch + ch->pdo, data_len); 271 memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest), 272 sizeof(rx_digest)); 273 if (digest != rx_digest) { 274 printf("NVMe/TCP: Data digest mismatch\n"); 275 return (EBADMSG); 276 } 277 } 278 279 pdu->data_len = data_len; 280 return (0); 281 } 282 283 /* 284 * Read data from a socket, retrying until the data has been fully 285 * read or an error occurs. 286 */ 287 static int 288 nvmf_tcp_read_buffer(int s, void *buf, size_t len) 289 { 290 ssize_t nread; 291 char *cp; 292 293 cp = buf; 294 while (len != 0) { 295 nread = read(s, cp, len); 296 if (nread < 0) 297 return (errno); 298 if (nread == 0) 299 return (ECONNRESET); 300 len -= nread; 301 cp += nread; 302 } 303 return (0); 304 } 305 306 static int 307 nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 308 { 309 struct nvme_tcp_common_pdu_hdr ch; 310 uint32_t plen; 311 int error; 312 313 memset(pdu, 0, sizeof(*pdu)); 314 error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch)); 315 if (error != 0) 316 return (error); 317 318 plen = le32toh(ch.plen); 319 320 /* 321 * Validate a header with garbage lengths to trigger 322 * an error message without reading more. 323 */ 324 if (plen < sizeof(ch) || ch.hlen > plen) { 325 pdu->hdr = &ch; 326 error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch)); 327 pdu->hdr = NULL; 328 assert(error != 0); 329 return (error); 330 } 331 332 /* Read the rest of the PDU. */ 333 pdu->hdr = malloc(plen); 334 memcpy(pdu->hdr, &ch, sizeof(ch)); 335 error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch)); 336 if (error != 0) 337 return (error); 338 error = nvmf_tcp_validate_pdu(qp, pdu, plen); 339 if (error != 0) { 340 free(pdu->hdr); 341 pdu->hdr = NULL; 342 } 343 return (error); 344 } 345 346 static void 347 nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu) 348 { 349 free(pdu->hdr); 350 pdu->hdr = NULL; 351 } 352 353 static int 354 nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu) 355 { 356 struct nvme_tcp_term_req_hdr *hdr; 357 358 hdr = (void *)pdu->hdr; 359 360 printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n", 361 le16toh(hdr->fes), le32dec(hdr->fei)); 362 nvmf_tcp_free_pdu(pdu); 363 return (ECONNRESET); 364 } 365 366 static int 367 nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp, 368 struct nvmf_tcp_rxpdu *pdu) 369 { 370 struct nvme_tcp_cmd *cmd; 371 struct nvmf_capsule *nc; 372 struct nvmf_tcp_capsule *tc; 373 374 cmd = (void *)pdu->hdr; 375 376 nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe); 377 if (nc == NULL) 378 return (ENOMEM); 379 380 tc = TCAP(nc); 381 tc->rx_pdu = *pdu; 382 383 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 384 return (0); 385 } 386 387 static int 388 nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp, 389 struct nvmf_tcp_rxpdu *pdu) 390 { 391 struct nvme_tcp_rsp *rsp; 392 struct nvmf_capsule *nc; 393 struct nvmf_tcp_capsule *tc; 394 395 rsp = (void *)pdu->hdr; 396 397 nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe); 398 if (nc == NULL) 399 return (ENOMEM); 400 401 nc->nc_sqhd_valid = true; 402 tc = TCAP(nc); 403 tc->rx_pdu = *pdu; 404 405 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 406 407 /* 408 * Once the CQE has been received, no further transfers to the 409 * command buffer for the associated CID can occur. 410 */ 411 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true); 412 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false); 413 414 return (0); 415 } 416 417 /* 418 * Construct and send a PDU that contains an optional data payload. 419 * This includes dealing with digests and the length fields in the 420 * common header. 421 */ 422 static int 423 nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen, 424 void *data, uint32_t data_len) 425 { 426 struct nvme_tcp_common_pdu_hdr *ch; 427 struct iovec iov[5]; 428 u_int iovcnt; 429 uint32_t header_digest, data_digest, pad, pdo, plen; 430 431 plen = hlen; 432 if (qp->header_digests) 433 plen += sizeof(header_digest); 434 if (data_len != 0) { 435 pdo = roundup2(plen, qp->txpda); 436 pad = pdo - plen; 437 plen = pdo + data_len; 438 if (qp->data_digests) 439 plen += sizeof(data_digest); 440 } else { 441 assert(data == NULL); 442 pdo = 0; 443 pad = 0; 444 } 445 446 ch = hdr; 447 ch->hlen = hlen; 448 if (qp->header_digests) 449 ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF; 450 if (qp->data_digests && data_len != 0) 451 ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF; 452 ch->pdo = pdo; 453 ch->plen = htole32(plen); 454 455 /* CH + PSH */ 456 iov[0].iov_base = hdr; 457 iov[0].iov_len = hlen; 458 iovcnt = 1; 459 460 /* HDGST */ 461 if (qp->header_digests) { 462 header_digest = compute_digest(hdr, hlen); 463 iov[iovcnt].iov_base = &header_digest; 464 iov[iovcnt].iov_len = sizeof(header_digest); 465 iovcnt++; 466 } 467 468 if (pad != 0) { 469 /* PAD */ 470 iov[iovcnt].iov_base = __DECONST(char *, zero_padding); 471 iov[iovcnt].iov_len = pad; 472 iovcnt++; 473 } 474 475 if (data_len != 0) { 476 /* DATA */ 477 iov[iovcnt].iov_base = data; 478 iov[iovcnt].iov_len = data_len; 479 iovcnt++; 480 481 /* DDGST */ 482 if (qp->data_digests) { 483 data_digest = compute_digest(data, data_len); 484 iov[iovcnt].iov_base = &data_digest; 485 iov[iovcnt].iov_len = sizeof(data_digest); 486 iovcnt++; 487 } 488 } 489 490 return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen)); 491 } 492 493 static int 494 nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 495 { 496 struct nvme_tcp_h2c_data_hdr *h2c; 497 struct nvmf_tcp_command_buffer *cb; 498 uint32_t data_len, data_offset; 499 const char *icd; 500 501 h2c = (void *)pdu->hdr; 502 if (le32toh(h2c->datal) > qp->maxh2cdata) { 503 nvmf_tcp_report_error(qp->qp.nq_association, qp, 504 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0, 505 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 506 nvmf_tcp_free_pdu(pdu); 507 return (EBADMSG); 508 } 509 510 cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true); 511 if (cb == NULL) { 512 nvmf_tcp_report_error(qp->qp.nq_association, qp, 513 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 514 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr, 515 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 516 nvmf_tcp_free_pdu(pdu); 517 return (EBADMSG); 518 } 519 520 data_len = le32toh(h2c->datal); 521 if (data_len != pdu->data_len) { 522 nvmf_tcp_report_error(qp->qp.nq_association, qp, 523 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 524 offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr, 525 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 526 nvmf_tcp_free_pdu(pdu); 527 return (EBADMSG); 528 } 529 530 data_offset = le32toh(h2c->datao); 531 if (data_offset < cb->data_offset || 532 data_offset + data_len > cb->data_offset + cb->data_len) { 533 nvmf_tcp_report_error(qp->qp.nq_association, qp, 534 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 535 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 536 nvmf_tcp_free_pdu(pdu); 537 return (EBADMSG); 538 } 539 540 if (data_offset != cb->data_offset + cb->data_xfered) { 541 nvmf_tcp_report_error(qp->qp.nq_association, qp, 542 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 543 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 544 nvmf_tcp_free_pdu(pdu); 545 return (EBADMSG); 546 } 547 548 if ((cb->data_xfered + data_len == cb->data_len) != 549 ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) { 550 nvmf_tcp_report_error(qp->qp.nq_association, qp, 551 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 552 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 553 nvmf_tcp_free_pdu(pdu); 554 return (EBADMSG); 555 } 556 557 cb->data_xfered += data_len; 558 data_offset -= cb->data_offset; 559 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 560 memcpy((char *)cb->data + data_offset, icd, data_len); 561 562 nvmf_tcp_free_pdu(pdu); 563 return (0); 564 } 565 566 static int 567 nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 568 { 569 struct nvme_tcp_c2h_data_hdr *c2h; 570 struct nvmf_tcp_command_buffer *cb; 571 uint32_t data_len, data_offset; 572 const char *icd; 573 574 c2h = (void *)pdu->hdr; 575 576 cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true); 577 if (cb == NULL) { 578 /* 579 * XXX: Could be PDU sequence error if cccid is for a 580 * command that doesn't use a command buffer. 581 */ 582 nvmf_tcp_report_error(qp->qp.nq_association, qp, 583 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 584 offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr, 585 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 586 nvmf_tcp_free_pdu(pdu); 587 return (EBADMSG); 588 } 589 590 data_len = le32toh(c2h->datal); 591 if (data_len != pdu->data_len) { 592 nvmf_tcp_report_error(qp->qp.nq_association, qp, 593 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 594 offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr, 595 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 596 nvmf_tcp_free_pdu(pdu); 597 return (EBADMSG); 598 } 599 600 data_offset = le32toh(c2h->datao); 601 if (data_offset < cb->data_offset || 602 data_offset + data_len > cb->data_offset + cb->data_len) { 603 nvmf_tcp_report_error(qp->qp.nq_association, qp, 604 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 605 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 606 nvmf_tcp_free_pdu(pdu); 607 return (EBADMSG); 608 } 609 610 if (data_offset != cb->data_offset + cb->data_xfered) { 611 nvmf_tcp_report_error(qp->qp.nq_association, qp, 612 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 613 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 614 nvmf_tcp_free_pdu(pdu); 615 return (EBADMSG); 616 } 617 618 if ((cb->data_xfered + data_len == cb->data_len) != 619 ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) { 620 nvmf_tcp_report_error(qp->qp.nq_association, qp, 621 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 622 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 623 nvmf_tcp_free_pdu(pdu); 624 return (EBADMSG); 625 } 626 627 cb->data_xfered += data_len; 628 data_offset -= cb->data_offset; 629 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 630 memcpy((char *)cb->data + data_offset, icd, data_len); 631 632 if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) { 633 struct nvme_completion cqe; 634 struct nvmf_tcp_capsule *tc; 635 struct nvmf_capsule *nc; 636 637 memset(&cqe, 0, sizeof(cqe)); 638 cqe.cid = cb->cid; 639 640 nc = nvmf_allocate_response(&qp->qp, &cqe); 641 if (nc == NULL) { 642 nvmf_tcp_free_pdu(pdu); 643 return (ENOMEM); 644 } 645 nc->nc_sqhd_valid = false; 646 647 tc = TCAP(nc); 648 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 649 } 650 651 nvmf_tcp_free_pdu(pdu); 652 return (0); 653 } 654 655 /* NB: cid and ttag and little-endian already. */ 656 static int 657 tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 658 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 659 { 660 struct nvme_tcp_h2c_data_hdr h2c; 661 662 memset(&h2c, 0, sizeof(h2c)); 663 h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA; 664 if (last_pdu) 665 h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 666 h2c.cccid = cid; 667 h2c.ttag = ttag; 668 h2c.datao = htole32(data_offset); 669 h2c.datal = htole32(len); 670 671 return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len)); 672 } 673 674 /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */ 675 static int 676 tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 677 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 678 { 679 char *p; 680 681 p = buf; 682 while (len != 0) { 683 size_t todo; 684 int error; 685 686 todo = len; 687 if (todo > qp->maxh2cdata) 688 todo = qp->maxh2cdata; 689 error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo, 690 last_pdu && todo == len); 691 if (error != 0) 692 return (error); 693 p += todo; 694 len -= todo; 695 } 696 return (0); 697 } 698 699 static int 700 nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 701 { 702 struct nvmf_tcp_command_buffer *cb; 703 struct nvme_tcp_r2t_hdr *r2t; 704 uint32_t data_len, data_offset; 705 int error; 706 707 r2t = (void *)pdu->hdr; 708 709 cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false); 710 if (cb == NULL) { 711 nvmf_tcp_report_error(qp->qp.nq_association, qp, 712 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 713 offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr, 714 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 715 nvmf_tcp_free_pdu(pdu); 716 return (EBADMSG); 717 } 718 719 data_offset = le32toh(r2t->r2to); 720 if (data_offset != cb->data_xfered) { 721 nvmf_tcp_report_error(qp->qp.nq_association, qp, 722 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 723 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 724 nvmf_tcp_free_pdu(pdu); 725 return (EBADMSG); 726 } 727 728 /* 729 * XXX: The spec does not specify how to handle R2T tranfers 730 * out of range of the original command. 731 */ 732 data_len = le32toh(r2t->r2tl); 733 if (data_offset + data_len > cb->data_len) { 734 nvmf_tcp_report_error(qp->qp.nq_association, qp, 735 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 736 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 737 nvmf_tcp_free_pdu(pdu); 738 return (EBADMSG); 739 } 740 741 cb->data_xfered += data_len; 742 743 /* 744 * Write out one or more H2C_DATA PDUs containing the 745 * requested data. 746 */ 747 error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag, 748 data_offset, (char *)cb->data + data_offset, data_len, true); 749 750 nvmf_tcp_free_pdu(pdu); 751 return (error); 752 } 753 754 static int 755 nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp) 756 { 757 struct nvmf_tcp_rxpdu pdu; 758 int error; 759 760 error = nvmf_tcp_read_pdu(qp, &pdu); 761 if (error != 0) 762 return (error); 763 764 switch (pdu.hdr->pdu_type) { 765 default: 766 __unreachable(); 767 break; 768 case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 769 case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 770 return (nvmf_tcp_handle_term_req(&pdu)); 771 case NVME_TCP_PDU_TYPE_CAPSULE_CMD: 772 return (nvmf_tcp_save_command_capsule(qp, &pdu)); 773 case NVME_TCP_PDU_TYPE_CAPSULE_RESP: 774 return (nvmf_tcp_save_response_capsule(qp, &pdu)); 775 case NVME_TCP_PDU_TYPE_H2C_DATA: 776 return (nvmf_tcp_handle_h2c_data(qp, &pdu)); 777 case NVME_TCP_PDU_TYPE_C2H_DATA: 778 return (nvmf_tcp_handle_c2h_data(qp, &pdu)); 779 case NVME_TCP_PDU_TYPE_R2T: 780 return (nvmf_tcp_handle_r2t(qp, &pdu)); 781 } 782 } 783 784 static bool 785 nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 786 const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len) 787 { 788 const struct nvme_tcp_ic_req *pdu; 789 uint32_t plen; 790 u_int hlen; 791 792 /* Determine how large of a PDU header to return for errors. */ 793 hlen = ch->hlen; 794 plen = le32toh(ch->plen); 795 if (hlen < sizeof(*ch) || hlen > plen) 796 hlen = sizeof(*ch); 797 798 /* 799 * Errors must be reported for the lowest incorrect field 800 * first, so validate fields in order. 801 */ 802 803 /* Validate pdu_type. */ 804 805 /* Controllers only receive PDUs with a PDU direction of 0. */ 806 if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) { 807 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 808 nvmf_tcp_report_error(na, qp, 809 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 810 hlen); 811 return (false); 812 } 813 814 switch (ch->pdu_type) { 815 case NVME_TCP_PDU_TYPE_IC_REQ: 816 case NVME_TCP_PDU_TYPE_IC_RESP: 817 break; 818 default: 819 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 820 nvmf_tcp_report_error(na, qp, 821 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 822 hlen); 823 return (false); 824 } 825 826 /* Validate flags. */ 827 if (ch->flags != 0) { 828 na_error(na, "NVMe/TCP: Invalid PDU header flags %#x", 829 ch->flags); 830 nvmf_tcp_report_error(na, qp, 831 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len, 832 hlen); 833 return (false); 834 } 835 836 /* Validate hlen. */ 837 if (ch->hlen != 128) { 838 na_error(na, "NVMe/TCP: Invalid PDU header length %u", 839 ch->hlen); 840 nvmf_tcp_report_error(na, qp, 841 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len, 842 hlen); 843 return (false); 844 } 845 846 /* Validate pdo. */ 847 if (ch->pdo != 0) { 848 na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo); 849 nvmf_tcp_report_error(na, qp, 850 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len, 851 hlen); 852 return (false); 853 } 854 855 /* Validate plen. */ 856 if (plen != 128) { 857 na_error(na, "NVMe/TCP: Invalid PDU length %u", plen); 858 nvmf_tcp_report_error(na, qp, 859 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len, 860 hlen); 861 return (false); 862 } 863 864 /* Validate fields common to both ICReq and ICResp. */ 865 pdu = (const struct nvme_tcp_ic_req *)ch; 866 if (le16toh(pdu->pfv) != 0) { 867 na_error(na, "NVMe/TCP: Unsupported PDU version %u", 868 le16toh(pdu->pfv)); 869 nvmf_tcp_report_error(na, qp, 870 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 871 8, ch, pdu_len, hlen); 872 return (false); 873 } 874 875 if (pdu->hpda > NVME_TCP_HPDA_MAX) { 876 na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda); 877 nvmf_tcp_report_error(na, qp, 878 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len, 879 hlen); 880 return (false); 881 } 882 883 if (pdu->dgst.bits.reserved != 0) { 884 na_error(na, "NVMe/TCP: Invalid digest settings"); 885 nvmf_tcp_report_error(na, qp, 886 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len, 887 hlen); 888 return (false); 889 } 890 891 return (true); 892 } 893 894 static bool 895 nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 896 struct nvme_tcp_ic_req *pdu) 897 { 898 int error; 899 900 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 901 if (error != 0) { 902 na_error(na, "NVMe/TCP: Failed to read IC request: %s", 903 strerror(error)); 904 return (false); 905 } 906 907 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 908 } 909 910 static bool 911 nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 912 struct nvme_tcp_ic_resp *pdu) 913 { 914 int error; 915 916 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 917 if (error != 0) { 918 na_error(na, "NVMe/TCP: Failed to read IC response: %s", 919 strerror(error)); 920 return (false); 921 } 922 923 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 924 } 925 926 static struct nvmf_association * 927 tcp_allocate_association(bool controller __unused, 928 const struct nvmf_association_params *params __unused) 929 { 930 struct nvmf_tcp_association *ta; 931 932 ta = calloc(1, sizeof(*ta)); 933 934 return (&ta->na); 935 } 936 937 static void 938 tcp_update_association(struct nvmf_association *na, 939 const struct nvme_controller_data *cdata) 940 { 941 struct nvmf_tcp_association *ta = TASSOC(na); 942 943 ta->ioccsz = le32toh(cdata->ioccsz); 944 } 945 946 static void 947 tcp_free_association(struct nvmf_association *na) 948 { 949 free(na); 950 } 951 952 static bool 953 tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin) 954 { 955 const struct nvmf_association_params *params = &na->na_params; 956 struct nvmf_tcp_association *ta = TASSOC(na); 957 struct nvme_tcp_ic_req ic_req; 958 struct nvme_tcp_ic_resp ic_resp; 959 uint32_t maxh2cdata; 960 int error; 961 962 if (!admin) { 963 if (ta->ioccsz == 0) { 964 na_error(na, "TCP I/O queues require cdata"); 965 return (false); 966 } 967 if (ta->ioccsz < 4) { 968 na_error(na, "Invalid IOCCSZ %u", ta->ioccsz); 969 return (false); 970 } 971 } 972 973 memset(&ic_req, 0, sizeof(ic_req)); 974 ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ; 975 ic_req.common.hlen = sizeof(ic_req); 976 ic_req.common.plen = htole32(sizeof(ic_req)); 977 ic_req.pfv = htole16(0); 978 ic_req.hpda = params->tcp.pda; 979 if (params->tcp.header_digests) 980 ic_req.dgst.bits.hdgst_enable = 1; 981 if (params->tcp.data_digests) 982 ic_req.dgst.bits.ddgst_enable = 1; 983 ic_req.maxr2t = htole32(params->tcp.maxr2t); 984 985 error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req)); 986 if (error != 0) { 987 na_error(na, "Failed to write IC request: %s", strerror(error)); 988 return (false); 989 } 990 991 if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp)) 992 return (false); 993 994 /* Ensure the controller didn't enable digests we didn't request. */ 995 if ((!params->tcp.header_digests && 996 ic_resp.dgst.bits.hdgst_enable != 0) || 997 (!params->tcp.data_digests && 998 ic_resp.dgst.bits.ddgst_enable != 0)) { 999 na_error(na, "Controller enabled unrequested digests"); 1000 nvmf_tcp_report_error(na, qp, 1001 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 1002 11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp)); 1003 return (false); 1004 } 1005 1006 /* 1007 * XXX: Is there an upper-bound to enforce here? Perhaps pick 1008 * some large value and report larger values as an unsupported 1009 * parameter? 1010 */ 1011 maxh2cdata = le32toh(ic_resp.maxh2cdata); 1012 if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) { 1013 na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata); 1014 nvmf_tcp_report_error(na, qp, 1015 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp, 1016 sizeof(ic_resp), sizeof(ic_resp)); 1017 return (false); 1018 } 1019 1020 qp->txpda = (params->tcp.pda + 1) * 4; 1021 qp->rxpda = (ic_resp.cpda + 1) * 4; 1022 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1023 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1024 qp->maxr2t = params->tcp.maxr2t; 1025 qp->maxh2cdata = maxh2cdata; 1026 if (admin) 1027 /* 7.4.3 */ 1028 qp->max_icd = 8192; 1029 else 1030 qp->max_icd = (ta->ioccsz - 4) * 16; 1031 1032 return (0); 1033 } 1034 1035 static bool 1036 tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na) 1037 { 1038 const struct nvmf_association_params *params = &na->na_params; 1039 struct nvme_tcp_ic_req ic_req; 1040 struct nvme_tcp_ic_resp ic_resp; 1041 int error; 1042 1043 if (!nvmf_tcp_read_ic_req(na, qp, &ic_req)) 1044 return (false); 1045 1046 memset(&ic_resp, 0, sizeof(ic_resp)); 1047 ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP; 1048 ic_resp.common.hlen = sizeof(ic_req); 1049 ic_resp.common.plen = htole32(sizeof(ic_req)); 1050 ic_resp.pfv = htole16(0); 1051 ic_resp.cpda = params->tcp.pda; 1052 if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0) 1053 ic_resp.dgst.bits.hdgst_enable = 1; 1054 if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0) 1055 ic_resp.dgst.bits.ddgst_enable = 1; 1056 ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata); 1057 1058 error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp)); 1059 if (error != 0) { 1060 na_error(na, "Failed to write IC response: %s", 1061 strerror(error)); 1062 return (false); 1063 } 1064 1065 qp->txpda = (params->tcp.pda + 1) * 4; 1066 qp->rxpda = (ic_req.hpda + 1) * 4; 1067 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1068 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1069 qp->maxr2t = le32toh(ic_req.maxr2t); 1070 qp->maxh2cdata = params->tcp.maxh2cdata; 1071 qp->max_icd = 0; /* XXX */ 1072 return (0); 1073 } 1074 1075 static struct nvmf_qpair * 1076 tcp_allocate_qpair(struct nvmf_association *na, 1077 const struct nvmf_qpair_params *qparams) 1078 { 1079 const struct nvmf_association_params *aparams = &na->na_params; 1080 struct nvmf_tcp_qpair *qp; 1081 int error; 1082 1083 if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) { 1084 na_error(na, "Invalid PDA"); 1085 return (NULL); 1086 } 1087 1088 qp = calloc(1, sizeof(*qp)); 1089 qp->s = qparams->tcp.fd; 1090 LIST_INIT(&qp->rx_buffers); 1091 LIST_INIT(&qp->tx_buffers); 1092 TAILQ_INIT(&qp->rx_capsules); 1093 if (na->na_controller) 1094 error = tcp_accept(qp, na); 1095 else 1096 error = tcp_connect(qp, na, qparams->admin); 1097 if (error != 0) { 1098 free(qp); 1099 return (NULL); 1100 } 1101 1102 return (&qp->qp); 1103 } 1104 1105 static void 1106 tcp_free_qpair(struct nvmf_qpair *nq) 1107 { 1108 struct nvmf_tcp_qpair *qp = TQP(nq); 1109 struct nvmf_tcp_capsule *ntc, *tc; 1110 struct nvmf_tcp_command_buffer *ncb, *cb; 1111 1112 TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) { 1113 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1114 nvmf_free_capsule(&tc->nc); 1115 } 1116 LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) { 1117 tcp_free_command_buffer(cb); 1118 } 1119 LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) { 1120 tcp_free_command_buffer(cb); 1121 } 1122 free(qp); 1123 } 1124 1125 static int 1126 tcp_kernel_handoff_params(struct nvmf_qpair *nq, 1127 struct nvmf_handoff_qpair_params *qparams) 1128 { 1129 struct nvmf_tcp_qpair *qp = TQP(nq); 1130 1131 qparams->tcp.fd = qp->s; 1132 qparams->tcp.rxpda = qp->rxpda; 1133 qparams->tcp.txpda = qp->txpda; 1134 qparams->tcp.header_digests = qp->header_digests; 1135 qparams->tcp.data_digests = qp->data_digests; 1136 qparams->tcp.maxr2t = qp->maxr2t; 1137 qparams->tcp.maxh2cdata = qp->maxh2cdata; 1138 qparams->tcp.max_icd = qp->max_icd; 1139 1140 return (0); 1141 } 1142 1143 static struct nvmf_capsule * 1144 tcp_allocate_capsule(struct nvmf_qpair *qp __unused) 1145 { 1146 struct nvmf_tcp_capsule *nc; 1147 1148 nc = calloc(1, sizeof(*nc)); 1149 return (&nc->nc); 1150 } 1151 1152 static void 1153 tcp_free_capsule(struct nvmf_capsule *nc) 1154 { 1155 struct nvmf_tcp_capsule *tc = TCAP(nc); 1156 1157 nvmf_tcp_free_pdu(&tc->rx_pdu); 1158 if (tc->cb != NULL) 1159 tcp_free_command_buffer(tc->cb); 1160 free(tc); 1161 } 1162 1163 static int 1164 tcp_transmit_command(struct nvmf_capsule *nc) 1165 { 1166 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1167 struct nvmf_tcp_capsule *tc = TCAP(nc); 1168 struct nvme_tcp_cmd cmd; 1169 struct nvme_sgl_descriptor *sgl; 1170 int error; 1171 bool use_icd; 1172 1173 use_icd = false; 1174 if (nc->nc_data_len != 0 && nc->nc_send_data && 1175 nc->nc_data_len <= qp->max_icd) 1176 use_icd = true; 1177 1178 memset(&cmd, 0, sizeof(cmd)); 1179 cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD; 1180 cmd.ccsqe = nc->nc_sqe; 1181 1182 /* Populate SGL in SQE. */ 1183 sgl = &cmd.ccsqe.sgl; 1184 memset(sgl, 0, sizeof(*sgl)); 1185 sgl->address = 0; 1186 sgl->length = htole32(nc->nc_data_len); 1187 if (use_icd) { 1188 /* Use in-capsule data. */ 1189 sgl->type = NVME_SGL_TYPE_ICD; 1190 } else { 1191 /* Use a command buffer. */ 1192 sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER; 1193 } 1194 1195 /* Send command capsule. */ 1196 error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ? 1197 nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0); 1198 if (error != 0) 1199 return (error); 1200 1201 /* 1202 * If data will be transferred using a command buffer, allocate a 1203 * buffer structure and queue it. 1204 */ 1205 if (nc->nc_data_len != 0 && !use_icd) 1206 tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0, 1207 nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data); 1208 1209 return (0); 1210 } 1211 1212 static int 1213 tcp_transmit_response(struct nvmf_capsule *nc) 1214 { 1215 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1216 struct nvme_tcp_rsp rsp; 1217 1218 memset(&rsp, 0, sizeof(rsp)); 1219 rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1220 rsp.rccqe = nc->nc_cqe; 1221 1222 return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0)); 1223 } 1224 1225 static int 1226 tcp_transmit_capsule(struct nvmf_capsule *nc) 1227 { 1228 if (nc->nc_qe_len == sizeof(struct nvme_command)) 1229 return (tcp_transmit_command(nc)); 1230 else 1231 return (tcp_transmit_response(nc)); 1232 } 1233 1234 static int 1235 tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp) 1236 { 1237 struct nvmf_tcp_qpair *qp = TQP(nq); 1238 struct nvmf_tcp_capsule *tc; 1239 int error; 1240 1241 while (TAILQ_EMPTY(&qp->rx_capsules)) { 1242 error = nvmf_tcp_receive_pdu(qp); 1243 if (error != 0) 1244 return (error); 1245 } 1246 tc = TAILQ_FIRST(&qp->rx_capsules); 1247 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1248 *ncp = &tc->nc; 1249 return (0); 1250 } 1251 1252 static uint8_t 1253 tcp_validate_command_capsule(const struct nvmf_capsule *nc) 1254 { 1255 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1256 const struct nvme_sgl_descriptor *sgl; 1257 1258 assert(tc->rx_pdu.hdr != NULL); 1259 1260 sgl = &nc->nc_sqe.sgl; 1261 switch (sgl->type) { 1262 case NVME_SGL_TYPE_ICD: 1263 if (tc->rx_pdu.data_len != le32toh(sgl->length)) { 1264 printf("NVMe/TCP: Command Capsule with mismatched ICD length\n"); 1265 return (NVME_SC_DATA_SGL_LENGTH_INVALID); 1266 } 1267 break; 1268 case NVME_SGL_TYPE_COMMAND_BUFFER: 1269 if (tc->rx_pdu.data_len != 0) { 1270 printf("NVMe/TCP: Command Buffer SGL with ICD\n"); 1271 return (NVME_SC_INVALID_FIELD); 1272 } 1273 break; 1274 default: 1275 printf("NVMe/TCP: Invalid SGL type in Command Capsule\n"); 1276 return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID); 1277 } 1278 1279 if (sgl->address != 0) { 1280 printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n"); 1281 return (NVME_SC_SGL_OFFSET_INVALID); 1282 } 1283 1284 return (NVME_SC_SUCCESS); 1285 } 1286 1287 static size_t 1288 tcp_capsule_data_len(const struct nvmf_capsule *nc) 1289 { 1290 assert(nc->nc_qe_len == sizeof(struct nvme_command)); 1291 return (le32toh(nc->nc_sqe.sgl.length)); 1292 } 1293 1294 /* NB: cid and ttag are both little-endian already. */ 1295 static int 1296 tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 1297 uint32_t data_offset, uint32_t data_len) 1298 { 1299 struct nvme_tcp_r2t_hdr r2t; 1300 1301 memset(&r2t, 0, sizeof(r2t)); 1302 r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T; 1303 r2t.cccid = cid; 1304 r2t.ttag = ttag; 1305 r2t.r2to = htole32(data_offset); 1306 r2t.r2tl = htole32(data_len); 1307 1308 return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0)); 1309 } 1310 1311 static int 1312 tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1313 void *buf, size_t len) 1314 { 1315 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1316 struct nvmf_tcp_command_buffer *cb; 1317 int error; 1318 uint16_t ttag; 1319 1320 /* 1321 * Don't bother byte-swapping ttag as it is just a cookie 1322 * value returned by the other end as-is. 1323 */ 1324 ttag = qp->next_ttag++; 1325 1326 error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len); 1327 if (error != 0) 1328 return (error); 1329 1330 cb = tcp_alloc_command_buffer(qp, buf, data_offset, len, 1331 nc->nc_sqe.cid, ttag, true); 1332 1333 /* Parse received PDUs until the data transfer is complete. */ 1334 while (cb->data_xfered < cb->data_len) { 1335 error = nvmf_tcp_receive_pdu(qp); 1336 if (error != 0) 1337 break; 1338 } 1339 tcp_free_command_buffer(cb); 1340 return (error); 1341 } 1342 1343 static int 1344 tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1345 void *buf, size_t len) 1346 { 1347 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1348 const char *icd; 1349 1350 icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset; 1351 memcpy(buf, icd, len); 1352 return (0); 1353 } 1354 1355 static int 1356 tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1357 void *buf, size_t len) 1358 { 1359 struct nvmf_association *na = nc->nc_qpair->nq_association; 1360 const struct nvme_sgl_descriptor *sgl; 1361 size_t data_len; 1362 1363 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1364 return (EINVAL); 1365 1366 sgl = &nc->nc_sqe.sgl; 1367 data_len = le32toh(sgl->length); 1368 if (data_offset + len > data_len) 1369 return (EFBIG); 1370 1371 if (sgl->type == NVME_SGL_TYPE_ICD) 1372 return (tcp_receive_icd_data(nc, data_offset, buf, len)); 1373 else 1374 return (tcp_receive_r2t_data(nc, data_offset, buf, len)); 1375 } 1376 1377 /* NB: cid is little-endian already. */ 1378 static int 1379 tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, 1380 uint32_t data_offset, const void *buf, size_t len, bool last_pdu, 1381 bool success) 1382 { 1383 struct nvme_tcp_c2h_data_hdr c2h; 1384 1385 memset(&c2h, 0, sizeof(c2h)); 1386 c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA; 1387 if (last_pdu) 1388 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 1389 if (success) 1390 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 1391 c2h.cccid = cid; 1392 c2h.datao = htole32(data_offset); 1393 c2h.datal = htole32(len); 1394 1395 return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h), 1396 __DECONST(void *, buf), len)); 1397 } 1398 1399 static int 1400 tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf, 1401 size_t len) 1402 { 1403 struct nvmf_association *na = nc->nc_qpair->nq_association; 1404 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1405 const struct nvme_sgl_descriptor *sgl; 1406 const char *src; 1407 size_t todo; 1408 uint32_t data_len, data_offset; 1409 int error; 1410 bool last_pdu, send_success_flag; 1411 1412 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1413 return (EINVAL); 1414 1415 sgl = &nc->nc_sqe.sgl; 1416 data_len = le32toh(sgl->length); 1417 if (len != data_len) { 1418 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1419 return (EFBIG); 1420 } 1421 1422 if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) { 1423 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1424 return (EINVAL); 1425 } 1426 1427 /* Use the SUCCESS flag if SQ flow control is disabled. */ 1428 send_success_flag = !qp->qp.nq_flow_control; 1429 1430 /* 1431 * Write out one or more C2H_DATA PDUs containing the data. 1432 * Each PDU is arbitrarily capped at 256k. 1433 */ 1434 data_offset = 0; 1435 src = buf; 1436 while (len > 0) { 1437 if (len > 256 * 1024) { 1438 todo = 256 * 1024; 1439 last_pdu = false; 1440 } else { 1441 todo = len; 1442 last_pdu = true; 1443 } 1444 error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, 1445 src, todo, last_pdu, last_pdu && send_success_flag); 1446 if (error != 0) { 1447 nvmf_send_generic_error(nc, 1448 NVME_SC_TRANSIENT_TRANSPORT_ERROR); 1449 return (error); 1450 } 1451 data_offset += todo; 1452 src += todo; 1453 len -= todo; 1454 } 1455 if (!send_success_flag) 1456 nvmf_send_success(nc); 1457 return (0); 1458 } 1459 1460 struct nvmf_transport_ops tcp_ops = { 1461 .allocate_association = tcp_allocate_association, 1462 .update_association = tcp_update_association, 1463 .free_association = tcp_free_association, 1464 .allocate_qpair = tcp_allocate_qpair, 1465 .free_qpair = tcp_free_qpair, 1466 .kernel_handoff_params = tcp_kernel_handoff_params, 1467 .allocate_capsule = tcp_allocate_capsule, 1468 .free_capsule = tcp_free_capsule, 1469 .transmit_capsule = tcp_transmit_capsule, 1470 .receive_capsule = tcp_receive_capsule, 1471 .validate_command_capsule = tcp_validate_command_capsule, 1472 .capsule_data_len = tcp_capsule_data_len, 1473 .receive_controller_data = tcp_receive_controller_data, 1474 .send_controller_data = tcp_send_controller_data, 1475 }; 1476