1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/condvar.h> 42 #include <sys/conf.h> 43 #include <sys/gsb_crc32.h> 44 #include <sys/file.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mbuf.h> 49 #include <sys/mutex.h> 50 #include <sys/module.h> 51 #include <sys/protosw.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/sysctl.h> 55 #include <sys/systm.h> 56 #include <sys/sx.h> 57 #include <sys/uio.h> 58 #include <vm/uma.h> 59 #include <netinet/in.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/iscsi/icl.h> 63 #include <dev/iscsi/iscsi_proto.h> 64 #include <icl_conn_if.h> 65 66 struct icl_soft_pdu { 67 struct icl_pdu ip; 68 69 /* soft specific stuff goes here. */ 70 u_int ref_cnt; 71 icl_pdu_cb cb; 72 int error; 73 }; 74 75 static int coalesce = 1; 76 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 77 &coalesce, 0, "Try to coalesce PDUs before sending"); 78 static int partial_receive_len = 128 * 1024; 79 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 80 &partial_receive_len, 0, "Minimum read size for partially received " 81 "data segment"); 82 static int sendspace = 1048576; 83 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 84 &sendspace, 0, "Default send socket buffer size"); 85 static int recvspace = 1048576; 86 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 87 &recvspace, 0, "Default receive socket buffer size"); 88 89 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 90 static uma_zone_t icl_soft_pdu_zone; 91 92 static volatile u_int icl_ncons; 93 94 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 95 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 96 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 97 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 98 99 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 100 101 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 102 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 103 static icl_conn_pdu_data_segment_length_t 104 icl_soft_conn_pdu_data_segment_length; 105 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 106 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 107 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 108 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 109 static icl_conn_handoff_t icl_soft_conn_handoff; 110 static icl_conn_free_t icl_soft_conn_free; 111 static icl_conn_close_t icl_soft_conn_close; 112 static icl_conn_task_setup_t icl_soft_conn_task_setup; 113 static icl_conn_task_done_t icl_soft_conn_task_done; 114 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 115 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 116 #ifdef ICL_KERNEL_PROXY 117 static icl_conn_connect_t icl_soft_conn_connect; 118 #endif 119 120 static kobj_method_t icl_soft_methods[] = { 121 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 122 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 123 KOBJMETHOD(icl_conn_pdu_data_segment_length, 124 icl_soft_conn_pdu_data_segment_length), 125 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 126 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 127 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 128 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 129 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 130 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 131 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 132 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 133 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 134 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 135 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 136 #ifdef ICL_KERNEL_PROXY 137 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 138 #endif 139 { 0, 0 } 140 }; 141 142 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 143 144 static void 145 icl_conn_fail(struct icl_conn *ic) 146 { 147 if (ic->ic_socket == NULL) 148 return; 149 150 /* 151 * XXX 152 */ 153 ic->ic_socket->so_error = EDOOFUS; 154 (ic->ic_error)(ic); 155 } 156 157 static struct mbuf * 158 icl_conn_receive(struct icl_conn *ic, size_t len) 159 { 160 struct uio uio; 161 struct socket *so; 162 struct mbuf *m; 163 int error, flags; 164 165 so = ic->ic_socket; 166 167 memset(&uio, 0, sizeof(uio)); 168 uio.uio_resid = len; 169 170 flags = MSG_DONTWAIT; 171 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 172 if (error != 0) { 173 ICL_DEBUG("soreceive error %d", error); 174 return (NULL); 175 } 176 if (uio.uio_resid != 0) { 177 m_freem(m); 178 ICL_DEBUG("short read"); 179 return (NULL); 180 } 181 182 return (m); 183 } 184 185 static int 186 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) 187 { 188 struct iovec iov[1]; 189 struct uio uio; 190 struct socket *so; 191 int error, flags; 192 193 so = ic->ic_socket; 194 195 memset(&uio, 0, sizeof(uio)); 196 iov[0].iov_base = buf; 197 iov[0].iov_len = len; 198 uio.uio_iov = iov; 199 uio.uio_iovcnt = 1; 200 uio.uio_offset = 0; 201 uio.uio_resid = len; 202 uio.uio_segflg = UIO_SYSSPACE; 203 uio.uio_rw = UIO_READ; 204 205 flags = MSG_DONTWAIT; 206 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 207 if (error != 0) { 208 ICL_DEBUG("soreceive error %d", error); 209 return (-1); 210 } 211 if (uio.uio_resid != 0) { 212 ICL_DEBUG("short read"); 213 return (-1); 214 } 215 216 return (0); 217 } 218 219 static void 220 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 221 { 222 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 223 224 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 225 m_freem(ip->ip_bhs_mbuf); 226 m_freem(ip->ip_ahs_mbuf); 227 m_freem(ip->ip_data_mbuf); 228 uma_zfree(icl_soft_pdu_zone, isp); 229 #ifdef DIAGNOSTIC 230 refcount_release(&ic->ic_outstanding_pdus); 231 #endif 232 } 233 234 static void 235 icl_soft_pdu_call_cb(struct icl_pdu *ip) 236 { 237 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 238 239 if (isp->cb != NULL) 240 isp->cb(ip, isp->error); 241 #ifdef DIAGNOSTIC 242 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 243 #endif 244 uma_zfree(icl_soft_pdu_zone, isp); 245 } 246 247 static void 248 icl_soft_pdu_done(struct icl_pdu *ip, int error) 249 { 250 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 251 252 if (error != 0) 253 isp->error = error; 254 255 m_freem(ip->ip_bhs_mbuf); 256 ip->ip_bhs_mbuf = NULL; 257 m_freem(ip->ip_ahs_mbuf); 258 ip->ip_ahs_mbuf = NULL; 259 m_freem(ip->ip_data_mbuf); 260 ip->ip_data_mbuf = NULL; 261 262 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 263 icl_soft_pdu_call_cb(ip); 264 } 265 266 static void 267 icl_soft_mbuf_done(struct mbuf *mb) 268 { 269 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 270 271 icl_soft_pdu_call_cb(&isp->ip); 272 } 273 274 /* 275 * Allocate icl_pdu with empty BHS to fill up by the caller. 276 */ 277 struct icl_pdu * 278 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 279 { 280 struct icl_soft_pdu *isp; 281 struct icl_pdu *ip; 282 283 #ifdef DIAGNOSTIC 284 refcount_acquire(&ic->ic_outstanding_pdus); 285 #endif 286 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 287 if (isp == NULL) { 288 ICL_WARN("failed to allocate soft PDU"); 289 #ifdef DIAGNOSTIC 290 refcount_release(&ic->ic_outstanding_pdus); 291 #endif 292 return (NULL); 293 } 294 ip = &isp->ip; 295 ip->ip_conn = ic; 296 297 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 298 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 299 if (ip->ip_bhs_mbuf == NULL) { 300 ICL_WARN("failed to allocate BHS mbuf"); 301 icl_soft_conn_pdu_free(ic, ip); 302 return (NULL); 303 } 304 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 305 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 306 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 307 308 return (ip); 309 } 310 311 static int 312 icl_pdu_ahs_length(const struct icl_pdu *request) 313 { 314 315 return (request->ip_bhs->bhs_total_ahs_len * 4); 316 } 317 318 static size_t 319 icl_pdu_data_segment_length(const struct icl_pdu *request) 320 { 321 uint32_t len = 0; 322 323 len += request->ip_bhs->bhs_data_segment_len[0]; 324 len <<= 8; 325 len += request->ip_bhs->bhs_data_segment_len[1]; 326 len <<= 8; 327 len += request->ip_bhs->bhs_data_segment_len[2]; 328 329 return (len); 330 } 331 332 size_t 333 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 334 const struct icl_pdu *request) 335 { 336 337 return (icl_pdu_data_segment_length(request)); 338 } 339 340 static void 341 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 342 { 343 344 response->ip_bhs->bhs_data_segment_len[2] = len; 345 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 346 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 347 } 348 349 static size_t 350 icl_pdu_padding(const struct icl_pdu *ip) 351 { 352 353 if ((ip->ip_data_len % 4) != 0) 354 return (4 - (ip->ip_data_len % 4)); 355 356 return (0); 357 } 358 359 static size_t 360 icl_pdu_size(const struct icl_pdu *response) 361 { 362 size_t len; 363 364 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 365 366 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 367 icl_pdu_padding(response); 368 if (response->ip_conn->ic_header_crc32c) 369 len += ISCSI_HEADER_DIGEST_SIZE; 370 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 371 len += ISCSI_DATA_DIGEST_SIZE; 372 373 return (len); 374 } 375 376 static int 377 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 378 { 379 380 if (icl_conn_receive_buf(request->ip_conn, 381 request->ip_bhs, sizeof(struct iscsi_bhs))) { 382 ICL_DEBUG("failed to receive BHS"); 383 return (-1); 384 } 385 386 *availablep -= sizeof(struct iscsi_bhs); 387 return (0); 388 } 389 390 static int 391 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 392 { 393 394 request->ip_ahs_len = icl_pdu_ahs_length(request); 395 if (request->ip_ahs_len == 0) 396 return (0); 397 398 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 399 request->ip_ahs_len); 400 if (request->ip_ahs_mbuf == NULL) { 401 ICL_DEBUG("failed to receive AHS"); 402 return (-1); 403 } 404 405 *availablep -= request->ip_ahs_len; 406 return (0); 407 } 408 409 static uint32_t 410 icl_mbuf_to_crc32c(const struct mbuf *m0) 411 { 412 uint32_t digest = 0xffffffff; 413 const struct mbuf *m; 414 415 for (m = m0; m != NULL; m = m->m_next) 416 digest = calculate_crc32c(digest, 417 mtod(m, const void *), m->m_len); 418 419 digest = digest ^ 0xffffffff; 420 421 return (digest); 422 } 423 424 static int 425 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 426 { 427 uint32_t received_digest, valid_digest; 428 429 if (request->ip_conn->ic_header_crc32c == false) 430 return (0); 431 432 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 433 if (icl_conn_receive_buf(request->ip_conn, 434 &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { 435 ICL_DEBUG("failed to receive header digest"); 436 return (-1); 437 } 438 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 439 440 /* Temporary attach AHS to BHS to calculate header digest. */ 441 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 442 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 443 request->ip_bhs_mbuf->m_next = NULL; 444 if (received_digest != valid_digest) { 445 ICL_WARN("header digest check failed; got 0x%x, " 446 "should be 0x%x", received_digest, valid_digest); 447 return (-1); 448 } 449 450 return (0); 451 } 452 453 /* 454 * Return the number of bytes that should be waiting in the receive socket 455 * before icl_pdu_receive_data_segment() gets called. 456 */ 457 static size_t 458 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 459 { 460 size_t len; 461 462 len = icl_pdu_data_segment_length(request); 463 if (len == 0) 464 return (0); 465 466 /* 467 * Account for the parts of data segment already read from 468 * the socket buffer. 469 */ 470 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 471 len -= request->ip_data_len; 472 473 /* 474 * Don't always wait for the full data segment to be delivered 475 * to the socket; this might badly affect performance due to 476 * TCP window scaling. 477 */ 478 if (len > partial_receive_len) { 479 #if 0 480 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 481 len, partial_receive_len)); 482 #endif 483 len = partial_receive_len; 484 485 return (len); 486 } 487 488 /* 489 * Account for padding. Note that due to the way code is written, 490 * the icl_pdu_receive_data_segment() must always receive padding 491 * along with the last part of data segment, because it would be 492 * impossible to tell whether we've already received the full data 493 * segment including padding, or without it. 494 */ 495 if ((len % 4) != 0) 496 len += 4 - (len % 4); 497 498 #if 0 499 ICL_DEBUG("need %zd bytes of data", len)); 500 #endif 501 502 return (len); 503 } 504 505 static int 506 icl_pdu_receive_data_segment(struct icl_pdu *request, 507 size_t *availablep, bool *more_neededp) 508 { 509 struct icl_conn *ic; 510 size_t len, padding = 0; 511 struct mbuf *m; 512 513 ic = request->ip_conn; 514 515 *more_neededp = false; 516 ic->ic_receive_len = 0; 517 518 len = icl_pdu_data_segment_length(request); 519 if (len == 0) 520 return (0); 521 522 if ((len % 4) != 0) 523 padding = 4 - (len % 4); 524 525 /* 526 * Account for already received parts of data segment. 527 */ 528 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 529 len -= request->ip_data_len; 530 531 if (len + padding > *availablep) { 532 /* 533 * Not enough data in the socket buffer. Receive as much 534 * as we can. Don't receive padding, since, obviously, it's 535 * not the end of data segment yet. 536 */ 537 #if 0 538 ICL_DEBUG("limited from %zd to %zd", 539 len + padding, *availablep - padding)); 540 #endif 541 len = *availablep - padding; 542 *more_neededp = true; 543 padding = 0; 544 } 545 546 /* 547 * Must not try to receive padding without at least one byte 548 * of actual data segment. 549 */ 550 if (len > 0) { 551 m = icl_conn_receive(request->ip_conn, len + padding); 552 if (m == NULL) { 553 ICL_DEBUG("failed to receive data segment"); 554 return (-1); 555 } 556 557 if (request->ip_data_mbuf == NULL) 558 request->ip_data_mbuf = m; 559 else 560 m_cat(request->ip_data_mbuf, m); 561 562 request->ip_data_len += len; 563 *availablep -= len + padding; 564 } else 565 ICL_DEBUG("len 0"); 566 567 if (*more_neededp) 568 ic->ic_receive_len = 569 icl_pdu_data_segment_receive_len(request); 570 571 return (0); 572 } 573 574 static int 575 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 576 { 577 uint32_t received_digest, valid_digest; 578 579 if (request->ip_conn->ic_data_crc32c == false) 580 return (0); 581 582 if (request->ip_data_len == 0) 583 return (0); 584 585 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 586 if (icl_conn_receive_buf(request->ip_conn, 587 &received_digest, ISCSI_DATA_DIGEST_SIZE)) { 588 ICL_DEBUG("failed to receive data digest"); 589 return (-1); 590 } 591 *availablep -= ISCSI_DATA_DIGEST_SIZE; 592 593 /* 594 * Note that ip_data_mbuf also contains padding; since digest 595 * calculation is supposed to include that, we iterate over 596 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 597 */ 598 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 599 if (received_digest != valid_digest) { 600 ICL_WARN("data digest check failed; got 0x%x, " 601 "should be 0x%x", received_digest, valid_digest); 602 return (-1); 603 } 604 605 return (0); 606 } 607 608 /* 609 * Somewhat contrary to the name, this attempts to receive only one 610 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 611 */ 612 static struct icl_pdu * 613 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 614 { 615 struct icl_pdu *request; 616 struct socket *so; 617 size_t len; 618 int error; 619 bool more_needed; 620 621 so = ic->ic_socket; 622 623 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 624 KASSERT(ic->ic_receive_pdu == NULL, 625 ("ic->ic_receive_pdu != NULL")); 626 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 627 if (request == NULL) { 628 ICL_DEBUG("failed to allocate PDU; " 629 "dropping connection"); 630 icl_conn_fail(ic); 631 return (NULL); 632 } 633 ic->ic_receive_pdu = request; 634 } else { 635 KASSERT(ic->ic_receive_pdu != NULL, 636 ("ic->ic_receive_pdu == NULL")); 637 request = ic->ic_receive_pdu; 638 } 639 640 if (*availablep < ic->ic_receive_len) { 641 #if 0 642 ICL_DEBUG("not enough data; need %zd, " 643 "have %zd", ic->ic_receive_len, *availablep); 644 #endif 645 return (NULL); 646 } 647 648 switch (ic->ic_receive_state) { 649 case ICL_CONN_STATE_BHS: 650 //ICL_DEBUG("receiving BHS"); 651 error = icl_pdu_receive_bhs(request, availablep); 652 if (error != 0) { 653 ICL_DEBUG("failed to receive BHS; " 654 "dropping connection"); 655 break; 656 } 657 658 /* 659 * We don't enforce any limit for AHS length; 660 * its length is stored in 8 bit field. 661 */ 662 663 len = icl_pdu_data_segment_length(request); 664 if (len > ic->ic_max_data_segment_length) { 665 ICL_WARN("received data segment " 666 "length %zd is larger than negotiated " 667 "MaxDataSegmentLength %zd; " 668 "dropping connection", 669 len, ic->ic_max_data_segment_length); 670 error = EINVAL; 671 break; 672 } 673 674 ic->ic_receive_state = ICL_CONN_STATE_AHS; 675 ic->ic_receive_len = icl_pdu_ahs_length(request); 676 break; 677 678 case ICL_CONN_STATE_AHS: 679 //ICL_DEBUG("receiving AHS"); 680 error = icl_pdu_receive_ahs(request, availablep); 681 if (error != 0) { 682 ICL_DEBUG("failed to receive AHS; " 683 "dropping connection"); 684 break; 685 } 686 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 687 if (ic->ic_header_crc32c == false) 688 ic->ic_receive_len = 0; 689 else 690 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 691 break; 692 693 case ICL_CONN_STATE_HEADER_DIGEST: 694 //ICL_DEBUG("receiving header digest"); 695 error = icl_pdu_check_header_digest(request, availablep); 696 if (error != 0) { 697 ICL_DEBUG("header digest failed; " 698 "dropping connection"); 699 break; 700 } 701 702 ic->ic_receive_state = ICL_CONN_STATE_DATA; 703 ic->ic_receive_len = 704 icl_pdu_data_segment_receive_len(request); 705 break; 706 707 case ICL_CONN_STATE_DATA: 708 //ICL_DEBUG("receiving data segment"); 709 error = icl_pdu_receive_data_segment(request, availablep, 710 &more_needed); 711 if (error != 0) { 712 ICL_DEBUG("failed to receive data segment;" 713 "dropping connection"); 714 break; 715 } 716 717 if (more_needed) 718 break; 719 720 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 721 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 722 ic->ic_receive_len = 0; 723 else 724 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 725 break; 726 727 case ICL_CONN_STATE_DATA_DIGEST: 728 //ICL_DEBUG("receiving data digest"); 729 error = icl_pdu_check_data_digest(request, availablep); 730 if (error != 0) { 731 ICL_DEBUG("data digest failed; " 732 "dropping connection"); 733 break; 734 } 735 736 /* 737 * We've received complete PDU; reset the receive state machine 738 * and return the PDU. 739 */ 740 ic->ic_receive_state = ICL_CONN_STATE_BHS; 741 ic->ic_receive_len = sizeof(struct iscsi_bhs); 742 ic->ic_receive_pdu = NULL; 743 return (request); 744 745 default: 746 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 747 } 748 749 if (error != 0) { 750 /* 751 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 752 * and will get freed in icl_soft_conn_close(). 753 */ 754 icl_conn_fail(ic); 755 } 756 757 return (NULL); 758 } 759 760 static void 761 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 762 { 763 struct icl_pdu *response; 764 struct socket *so; 765 766 so = ic->ic_socket; 767 768 /* 769 * This can never happen; we're careful to only mess with ic->ic_socket 770 * pointer when the send/receive threads are not running. 771 */ 772 KASSERT(so != NULL, ("NULL socket")); 773 774 for (;;) { 775 if (ic->ic_disconnecting) 776 return; 777 778 if (so->so_error != 0) { 779 ICL_DEBUG("connection error %d; " 780 "dropping connection", so->so_error); 781 icl_conn_fail(ic); 782 return; 783 } 784 785 /* 786 * Loop until we have a complete PDU or there is not enough 787 * data in the socket buffer. 788 */ 789 if (available < ic->ic_receive_len) { 790 #if 0 791 ICL_DEBUG("not enough data; have %zd, " 792 "need %zd", available, 793 ic->ic_receive_len); 794 #endif 795 return; 796 } 797 798 response = icl_conn_receive_pdu(ic, &available); 799 if (response == NULL) 800 continue; 801 802 if (response->ip_ahs_len > 0) { 803 ICL_WARN("received PDU with unsupported " 804 "AHS; opcode 0x%x; dropping connection", 805 response->ip_bhs->bhs_opcode); 806 icl_soft_conn_pdu_free(ic, response); 807 icl_conn_fail(ic); 808 return; 809 } 810 811 (ic->ic_receive)(response); 812 } 813 } 814 815 static void 816 icl_receive_thread(void *arg) 817 { 818 struct icl_conn *ic; 819 size_t available; 820 struct socket *so; 821 822 ic = arg; 823 so = ic->ic_socket; 824 825 for (;;) { 826 if (ic->ic_disconnecting) { 827 //ICL_DEBUG("terminating"); 828 break; 829 } 830 831 /* 832 * Set the low watermark, to be checked by 833 * soreadable() in icl_soupcall_receive() 834 * to avoid unnecessary wakeups until there 835 * is enough data received to read the PDU. 836 */ 837 SOCKBUF_LOCK(&so->so_rcv); 838 available = sbavail(&so->so_rcv); 839 if (available < ic->ic_receive_len) { 840 so->so_rcv.sb_lowat = ic->ic_receive_len; 841 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 842 } else 843 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 844 SOCKBUF_UNLOCK(&so->so_rcv); 845 846 icl_conn_receive_pdus(ic, available); 847 } 848 849 ICL_CONN_LOCK(ic); 850 ic->ic_receive_running = false; 851 cv_signal(&ic->ic_send_cv); 852 ICL_CONN_UNLOCK(ic); 853 kthread_exit(); 854 } 855 856 static int 857 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 858 { 859 struct icl_conn *ic; 860 861 if (!soreadable(so)) 862 return (SU_OK); 863 864 ic = arg; 865 cv_signal(&ic->ic_receive_cv); 866 return (SU_OK); 867 } 868 869 static int 870 icl_pdu_finalize(struct icl_pdu *request) 871 { 872 size_t padding, pdu_len; 873 uint32_t digest, zero = 0; 874 int ok; 875 struct icl_conn *ic; 876 877 ic = request->ip_conn; 878 879 icl_pdu_set_data_segment_length(request, request->ip_data_len); 880 881 pdu_len = icl_pdu_size(request); 882 883 if (ic->ic_header_crc32c) { 884 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 885 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 886 (void *)&digest); 887 if (ok != 1) { 888 ICL_WARN("failed to append header digest"); 889 return (1); 890 } 891 } 892 893 if (request->ip_data_len != 0) { 894 padding = icl_pdu_padding(request); 895 if (padding > 0) { 896 ok = m_append(request->ip_data_mbuf, padding, 897 (void *)&zero); 898 if (ok != 1) { 899 ICL_WARN("failed to append padding"); 900 return (1); 901 } 902 } 903 904 if (ic->ic_data_crc32c) { 905 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 906 907 ok = m_append(request->ip_data_mbuf, sizeof(digest), 908 (void *)&digest); 909 if (ok != 1) { 910 ICL_WARN("failed to append data digest"); 911 return (1); 912 } 913 } 914 915 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 916 request->ip_data_mbuf = NULL; 917 } 918 919 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 920 921 return (0); 922 } 923 924 static void 925 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 926 { 927 struct icl_pdu *request, *request2; 928 struct socket *so; 929 long available, size, size2; 930 int coalesced, error; 931 932 ICL_CONN_LOCK_ASSERT_NOT(ic); 933 934 so = ic->ic_socket; 935 936 SOCKBUF_LOCK(&so->so_snd); 937 /* 938 * Check how much space do we have for transmit. We can't just 939 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 940 * as it always frees the mbuf chain passed to it, even in case 941 * of error. 942 */ 943 available = sbspace(&so->so_snd); 944 945 /* 946 * Notify the socket upcall that we don't need wakeups 947 * for the time being. 948 */ 949 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 950 SOCKBUF_UNLOCK(&so->so_snd); 951 952 while (!STAILQ_EMPTY(queue)) { 953 request = STAILQ_FIRST(queue); 954 size = icl_pdu_size(request); 955 if (available < size) { 956 /* 957 * Set the low watermark, to be checked by 958 * sowriteable() in icl_soupcall_send() 959 * to avoid unnecessary wakeups until there 960 * is enough space for the PDU to fit. 961 */ 962 SOCKBUF_LOCK(&so->so_snd); 963 available = sbspace(&so->so_snd); 964 if (available < size) { 965 #if 1 966 ICL_DEBUG("no space to send; " 967 "have %ld, need %ld", 968 available, size); 969 #endif 970 so->so_snd.sb_lowat = max(size, 971 so->so_snd.sb_hiwat / 8); 972 SOCKBUF_UNLOCK(&so->so_snd); 973 return; 974 } 975 SOCKBUF_UNLOCK(&so->so_snd); 976 } 977 STAILQ_REMOVE_HEAD(queue, ip_next); 978 error = icl_pdu_finalize(request); 979 if (error != 0) { 980 ICL_DEBUG("failed to finalize PDU; " 981 "dropping connection"); 982 icl_soft_pdu_done(request, EIO); 983 icl_conn_fail(ic); 984 return; 985 } 986 if (coalesce) { 987 coalesced = 1; 988 for (;;) { 989 request2 = STAILQ_FIRST(queue); 990 if (request2 == NULL) 991 break; 992 size2 = icl_pdu_size(request2); 993 if (available < size + size2) 994 break; 995 STAILQ_REMOVE_HEAD(queue, ip_next); 996 error = icl_pdu_finalize(request2); 997 if (error != 0) { 998 ICL_DEBUG("failed to finalize PDU; " 999 "dropping connection"); 1000 icl_soft_pdu_done(request, EIO); 1001 icl_soft_pdu_done(request2, EIO); 1002 icl_conn_fail(ic); 1003 return; 1004 } 1005 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 1006 request2->ip_bhs_mbuf = NULL; 1007 request->ip_bhs_mbuf->m_pkthdr.len += size2; 1008 size += size2; 1009 STAILQ_REMOVE_AFTER(queue, request, ip_next); 1010 icl_soft_pdu_done(request2, 0); 1011 coalesced++; 1012 } 1013 #if 0 1014 if (coalesced > 1) { 1015 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 1016 coalesced, size); 1017 } 1018 #endif 1019 } 1020 available -= size; 1021 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 1022 NULL, MSG_DONTWAIT, curthread); 1023 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 1024 if (error != 0) { 1025 ICL_DEBUG("failed to send PDU, error %d; " 1026 "dropping connection", error); 1027 icl_soft_pdu_done(request, error); 1028 icl_conn_fail(ic); 1029 return; 1030 } 1031 icl_soft_pdu_done(request, 0); 1032 } 1033 } 1034 1035 static void 1036 icl_send_thread(void *arg) 1037 { 1038 struct icl_conn *ic; 1039 struct icl_pdu_stailq queue; 1040 1041 ic = arg; 1042 1043 STAILQ_INIT(&queue); 1044 1045 ICL_CONN_LOCK(ic); 1046 for (;;) { 1047 for (;;) { 1048 /* 1049 * If the local queue is empty, populate it from 1050 * the main one. This way the icl_conn_send_pdus() 1051 * can go through all the queued PDUs without holding 1052 * any locks. 1053 */ 1054 if (STAILQ_EMPTY(&queue)) 1055 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1056 1057 ic->ic_check_send_space = false; 1058 ICL_CONN_UNLOCK(ic); 1059 icl_conn_send_pdus(ic, &queue); 1060 ICL_CONN_LOCK(ic); 1061 1062 /* 1063 * The icl_soupcall_send() was called since the last 1064 * call to sbspace(); go around; 1065 */ 1066 if (ic->ic_check_send_space) 1067 continue; 1068 1069 /* 1070 * Local queue is empty, but we still have PDUs 1071 * in the main one; go around. 1072 */ 1073 if (STAILQ_EMPTY(&queue) && 1074 !STAILQ_EMPTY(&ic->ic_to_send)) 1075 continue; 1076 1077 /* 1078 * There might be some stuff in the local queue, 1079 * which didn't get sent due to not having enough send 1080 * space. Wait for socket upcall. 1081 */ 1082 break; 1083 } 1084 1085 if (ic->ic_disconnecting) { 1086 //ICL_DEBUG("terminating"); 1087 break; 1088 } 1089 1090 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1091 } 1092 1093 /* 1094 * We're exiting; move PDUs back to the main queue, so they can 1095 * get freed properly. At this point ordering doesn't matter. 1096 */ 1097 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1098 1099 ic->ic_send_running = false; 1100 cv_signal(&ic->ic_send_cv); 1101 ICL_CONN_UNLOCK(ic); 1102 kthread_exit(); 1103 } 1104 1105 static int 1106 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1107 { 1108 struct icl_conn *ic; 1109 1110 if (!sowriteable(so)) 1111 return (SU_OK); 1112 1113 ic = arg; 1114 1115 ICL_CONN_LOCK(ic); 1116 ic->ic_check_send_space = true; 1117 ICL_CONN_UNLOCK(ic); 1118 1119 cv_signal(&ic->ic_send_cv); 1120 1121 return (SU_OK); 1122 } 1123 1124 static int 1125 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1126 const void *addr, size_t len, int flags) 1127 { 1128 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1129 struct mbuf *mb, *newmb; 1130 size_t copylen, off = 0; 1131 1132 KASSERT(len > 0, ("len == 0")); 1133 1134 if (flags & ICL_NOCOPY) { 1135 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1136 if (newmb == NULL) { 1137 ICL_WARN("failed to allocate mbuf"); 1138 return (ENOMEM); 1139 } 1140 1141 newmb->m_flags |= M_RDONLY; 1142 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1143 icl_soft_mbuf_done, isp, NULL); 1144 newmb->m_len = len; 1145 } else { 1146 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1147 if (newmb == NULL) { 1148 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1149 return (ENOMEM); 1150 } 1151 1152 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1153 copylen = min(M_TRAILINGSPACE(mb), len - off); 1154 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1155 mb->m_len = copylen; 1156 off += copylen; 1157 } 1158 KASSERT(off == len, ("%s: off != len", __func__)); 1159 } 1160 1161 if (request->ip_data_mbuf == NULL) { 1162 request->ip_data_mbuf = newmb; 1163 request->ip_data_len = len; 1164 } else { 1165 m_cat(request->ip_data_mbuf, newmb); 1166 request->ip_data_len += len; 1167 } 1168 1169 return (0); 1170 } 1171 1172 void 1173 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1174 size_t off, void *addr, size_t len) 1175 { 1176 1177 m_copydata(ip->ip_data_mbuf, off, len, addr); 1178 } 1179 1180 static void 1181 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1182 { 1183 1184 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1185 } 1186 1187 static void 1188 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1189 icl_pdu_cb cb) 1190 { 1191 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1192 1193 ICL_CONN_LOCK_ASSERT(ic); 1194 isp->ref_cnt++; 1195 isp->cb = cb; 1196 1197 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1198 ICL_DEBUG("icl_pdu_queue on closed connection"); 1199 icl_soft_pdu_done(ip, ENOTCONN); 1200 return; 1201 } 1202 1203 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1204 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1205 /* 1206 * If the queue is not empty, someone else had already 1207 * signaled the send thread; no need to do that again, 1208 * just return. 1209 */ 1210 return; 1211 } 1212 1213 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1214 cv_signal(&ic->ic_send_cv); 1215 } 1216 1217 static struct icl_conn * 1218 icl_soft_new_conn(const char *name, struct mtx *lock) 1219 { 1220 struct icl_conn *ic; 1221 1222 refcount_acquire(&icl_ncons); 1223 1224 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1225 1226 STAILQ_INIT(&ic->ic_to_send); 1227 ic->ic_lock = lock; 1228 cv_init(&ic->ic_send_cv, "icl_tx"); 1229 cv_init(&ic->ic_receive_cv, "icl_rx"); 1230 #ifdef DIAGNOSTIC 1231 refcount_init(&ic->ic_outstanding_pdus, 0); 1232 #endif 1233 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1234 ic->ic_name = name; 1235 ic->ic_offload = "None"; 1236 ic->ic_unmapped = false; 1237 1238 return (ic); 1239 } 1240 1241 void 1242 icl_soft_conn_free(struct icl_conn *ic) 1243 { 1244 1245 #ifdef DIAGNOSTIC 1246 KASSERT(ic->ic_outstanding_pdus == 0, 1247 ("destroying session with %d outstanding PDUs", 1248 ic->ic_outstanding_pdus)); 1249 #endif 1250 cv_destroy(&ic->ic_send_cv); 1251 cv_destroy(&ic->ic_receive_cv); 1252 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1253 refcount_release(&icl_ncons); 1254 } 1255 1256 static int 1257 icl_conn_start(struct icl_conn *ic) 1258 { 1259 size_t minspace; 1260 struct sockopt opt; 1261 int error, one = 1; 1262 1263 ICL_CONN_LOCK(ic); 1264 1265 /* 1266 * XXX: Ugly hack. 1267 */ 1268 if (ic->ic_socket == NULL) { 1269 ICL_CONN_UNLOCK(ic); 1270 return (EINVAL); 1271 } 1272 1273 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1274 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1275 ic->ic_disconnecting = false; 1276 1277 ICL_CONN_UNLOCK(ic); 1278 1279 /* 1280 * For sendspace, this is required because the current code cannot 1281 * send a PDU in pieces; thus, the minimum buffer size is equal 1282 * to the maximum PDU size. "+4" is to account for possible padding. 1283 * 1284 * What we should actually do here is to use autoscaling, but set 1285 * some minimal buffer size to "minspace". I don't know a way to do 1286 * that, though. 1287 */ 1288 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1289 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1290 if (sendspace < minspace) { 1291 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1292 minspace); 1293 sendspace = minspace; 1294 } 1295 if (recvspace < minspace) { 1296 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1297 minspace); 1298 recvspace = minspace; 1299 } 1300 1301 error = soreserve(ic->ic_socket, sendspace, recvspace); 1302 if (error != 0) { 1303 ICL_WARN("soreserve failed with error %d", error); 1304 icl_soft_conn_close(ic); 1305 return (error); 1306 } 1307 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1308 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1309 1310 /* 1311 * Disable Nagle. 1312 */ 1313 bzero(&opt, sizeof(opt)); 1314 opt.sopt_dir = SOPT_SET; 1315 opt.sopt_level = IPPROTO_TCP; 1316 opt.sopt_name = TCP_NODELAY; 1317 opt.sopt_val = &one; 1318 opt.sopt_valsize = sizeof(one); 1319 error = sosetopt(ic->ic_socket, &opt); 1320 if (error != 0) { 1321 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1322 icl_soft_conn_close(ic); 1323 return (error); 1324 } 1325 1326 /* 1327 * Register socket upcall, to get notified about incoming PDUs 1328 * and free space to send outgoing ones. 1329 */ 1330 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1331 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1332 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1333 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1334 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1335 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1336 1337 /* 1338 * Start threads. 1339 */ 1340 ICL_CONN_LOCK(ic); 1341 ic->ic_send_running = ic->ic_receive_running = true; 1342 ICL_CONN_UNLOCK(ic); 1343 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1344 ic->ic_name); 1345 if (error != 0) { 1346 ICL_WARN("kthread_add(9) failed with error %d", error); 1347 ICL_CONN_LOCK(ic); 1348 ic->ic_send_running = ic->ic_receive_running = false; 1349 cv_signal(&ic->ic_send_cv); 1350 ICL_CONN_UNLOCK(ic); 1351 icl_soft_conn_close(ic); 1352 return (error); 1353 } 1354 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1355 ic->ic_name); 1356 if (error != 0) { 1357 ICL_WARN("kthread_add(9) failed with error %d", error); 1358 ICL_CONN_LOCK(ic); 1359 ic->ic_receive_running = false; 1360 cv_signal(&ic->ic_send_cv); 1361 ICL_CONN_UNLOCK(ic); 1362 icl_soft_conn_close(ic); 1363 return (error); 1364 } 1365 1366 return (0); 1367 } 1368 1369 int 1370 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1371 { 1372 struct file *fp; 1373 struct socket *so; 1374 cap_rights_t rights; 1375 int error; 1376 1377 ICL_CONN_LOCK_ASSERT_NOT(ic); 1378 1379 #ifdef ICL_KERNEL_PROXY 1380 /* 1381 * We're transitioning to Full Feature phase, and we don't 1382 * really care. 1383 */ 1384 if (fd == 0) { 1385 ICL_CONN_LOCK(ic); 1386 if (ic->ic_socket == NULL) { 1387 ICL_CONN_UNLOCK(ic); 1388 ICL_WARN("proxy handoff without connect"); 1389 return (EINVAL); 1390 } 1391 ICL_CONN_UNLOCK(ic); 1392 return (0); 1393 } 1394 #endif 1395 1396 /* 1397 * Steal the socket from userland. 1398 */ 1399 error = fget(curthread, fd, 1400 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1401 if (error != 0) 1402 return (error); 1403 if (fp->f_type != DTYPE_SOCKET) { 1404 fdrop(fp, curthread); 1405 return (EINVAL); 1406 } 1407 so = fp->f_data; 1408 if (so->so_type != SOCK_STREAM) { 1409 fdrop(fp, curthread); 1410 return (EINVAL); 1411 } 1412 1413 ICL_CONN_LOCK(ic); 1414 1415 if (ic->ic_socket != NULL) { 1416 ICL_CONN_UNLOCK(ic); 1417 fdrop(fp, curthread); 1418 return (EBUSY); 1419 } 1420 1421 ic->ic_socket = fp->f_data; 1422 fp->f_ops = &badfileops; 1423 fp->f_data = NULL; 1424 fdrop(fp, curthread); 1425 ICL_CONN_UNLOCK(ic); 1426 1427 error = icl_conn_start(ic); 1428 1429 return (error); 1430 } 1431 1432 void 1433 icl_soft_conn_close(struct icl_conn *ic) 1434 { 1435 struct icl_pdu *pdu; 1436 struct socket *so; 1437 1438 ICL_CONN_LOCK(ic); 1439 1440 /* 1441 * Wake up the threads, so they can properly terminate. 1442 */ 1443 ic->ic_disconnecting = true; 1444 while (ic->ic_receive_running || ic->ic_send_running) { 1445 cv_signal(&ic->ic_receive_cv); 1446 cv_signal(&ic->ic_send_cv); 1447 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1448 } 1449 1450 /* Some other thread could close the connection same time. */ 1451 so = ic->ic_socket; 1452 if (so == NULL) { 1453 ICL_CONN_UNLOCK(ic); 1454 return; 1455 } 1456 ic->ic_socket = NULL; 1457 1458 /* 1459 * Deregister socket upcalls. 1460 */ 1461 ICL_CONN_UNLOCK(ic); 1462 SOCKBUF_LOCK(&so->so_snd); 1463 if (so->so_snd.sb_upcall != NULL) 1464 soupcall_clear(so, SO_SND); 1465 SOCKBUF_UNLOCK(&so->so_snd); 1466 SOCKBUF_LOCK(&so->so_rcv); 1467 if (so->so_rcv.sb_upcall != NULL) 1468 soupcall_clear(so, SO_RCV); 1469 SOCKBUF_UNLOCK(&so->so_rcv); 1470 soclose(so); 1471 ICL_CONN_LOCK(ic); 1472 1473 if (ic->ic_receive_pdu != NULL) { 1474 //ICL_DEBUG("freeing partially received PDU"); 1475 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); 1476 ic->ic_receive_pdu = NULL; 1477 } 1478 1479 /* 1480 * Remove any outstanding PDUs from the send queue. 1481 */ 1482 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1483 pdu = STAILQ_FIRST(&ic->ic_to_send); 1484 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1485 icl_soft_pdu_done(pdu, ENOTCONN); 1486 } 1487 1488 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1489 ("destroying session with non-empty send queue")); 1490 ICL_CONN_UNLOCK(ic); 1491 } 1492 1493 int 1494 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1495 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1496 { 1497 1498 return (0); 1499 } 1500 1501 void 1502 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1503 { 1504 } 1505 1506 int 1507 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1508 uint32_t *transfer_tag, void **prvp) 1509 { 1510 1511 return (0); 1512 } 1513 1514 void 1515 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1516 { 1517 } 1518 1519 static int 1520 icl_soft_limits(struct icl_drv_limits *idl) 1521 { 1522 1523 idl->idl_max_recv_data_segment_length = 128 * 1024; 1524 idl->idl_max_send_data_segment_length = 128 * 1024; 1525 idl->idl_max_burst_length = 262144; 1526 idl->idl_first_burst_length = idl->idl_max_burst_length; 1527 1528 return (0); 1529 } 1530 1531 #ifdef ICL_KERNEL_PROXY 1532 int 1533 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1534 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1535 { 1536 1537 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1538 from_sa, to_sa)); 1539 } 1540 1541 int 1542 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1543 { 1544 int error; 1545 1546 ICL_CONN_LOCK_ASSERT_NOT(ic); 1547 1548 if (so->so_type != SOCK_STREAM) 1549 return (EINVAL); 1550 1551 ICL_CONN_LOCK(ic); 1552 if (ic->ic_socket != NULL) { 1553 ICL_CONN_UNLOCK(ic); 1554 return (EBUSY); 1555 } 1556 ic->ic_socket = so; 1557 ICL_CONN_UNLOCK(ic); 1558 1559 error = icl_conn_start(ic); 1560 1561 return (error); 1562 } 1563 #endif /* ICL_KERNEL_PROXY */ 1564 1565 static int 1566 icl_soft_load(void) 1567 { 1568 int error; 1569 1570 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1571 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1572 UMA_ALIGN_PTR, 0); 1573 refcount_init(&icl_ncons, 0); 1574 1575 /* 1576 * The reason we call this "none" is that to the user, 1577 * it's known as "offload driver"; "offload driver: soft" 1578 * doesn't make much sense. 1579 */ 1580 error = icl_register("none", false, 0, 1581 icl_soft_limits, icl_soft_new_conn); 1582 KASSERT(error == 0, ("failed to register")); 1583 1584 #if defined(ICL_KERNEL_PROXY) && 0 1585 /* 1586 * Debugging aid for kernel proxy functionality. 1587 */ 1588 error = icl_register("proxytest", true, 0, 1589 icl_soft_limits, icl_soft_new_conn); 1590 KASSERT(error == 0, ("failed to register")); 1591 #endif 1592 1593 return (error); 1594 } 1595 1596 static int 1597 icl_soft_unload(void) 1598 { 1599 1600 if (icl_ncons != 0) 1601 return (EBUSY); 1602 1603 icl_unregister("none", false); 1604 #if defined(ICL_KERNEL_PROXY) && 0 1605 icl_unregister("proxytest", true); 1606 #endif 1607 1608 uma_zdestroy(icl_soft_pdu_zone); 1609 1610 return (0); 1611 } 1612 1613 static int 1614 icl_soft_modevent(module_t mod, int what, void *arg) 1615 { 1616 1617 switch (what) { 1618 case MOD_LOAD: 1619 return (icl_soft_load()); 1620 case MOD_UNLOAD: 1621 return (icl_soft_unload()); 1622 default: 1623 return (EINVAL); 1624 } 1625 } 1626 1627 moduledata_t icl_soft_data = { 1628 "icl_soft", 1629 icl_soft_modevent, 1630 0 1631 }; 1632 1633 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1634 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1635 MODULE_VERSION(icl_soft, 1); 1636