1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed by Edward Tomasz Napierala under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 /* 34 * Software implementation of iSCSI Common Layer kobj(9) interface. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/capsicum.h> 42 #include <sys/condvar.h> 43 #include <sys/conf.h> 44 #include <sys/gsb_crc32.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/lock.h> 49 #include <sys/mbuf.h> 50 #include <sys/mutex.h> 51 #include <sys/module.h> 52 #include <sys/protosw.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sx.h> 58 #include <sys/uio.h> 59 #include <vm/uma.h> 60 #include <netinet/in.h> 61 #include <netinet/tcp.h> 62 63 #include <dev/iscsi/icl.h> 64 #include <dev/iscsi/iscsi_proto.h> 65 #include <icl_conn_if.h> 66 67 struct icl_soft_pdu { 68 struct icl_pdu ip; 69 70 /* soft specific stuff goes here. */ 71 u_int ref_cnt; 72 icl_pdu_cb cb; 73 int error; 74 }; 75 76 static int coalesce = 1; 77 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 78 &coalesce, 0, "Try to coalesce PDUs before sending"); 79 static int partial_receive_len = 128 * 1024; 80 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 81 &partial_receive_len, 0, "Minimum read size for partially received " 82 "data segment"); 83 static int sendspace = 1048576; 84 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 85 &sendspace, 0, "Default send socket buffer size"); 86 static int recvspace = 1048576; 87 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 88 &recvspace, 0, "Default receive socket buffer size"); 89 90 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 91 static uma_zone_t icl_soft_pdu_zone; 92 93 static volatile u_int icl_ncons; 94 95 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 96 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 97 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 98 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 99 100 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 101 102 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 103 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 104 static icl_conn_pdu_data_segment_length_t 105 icl_soft_conn_pdu_data_segment_length; 106 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 107 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 108 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 109 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 110 static icl_conn_handoff_t icl_soft_conn_handoff; 111 static icl_conn_free_t icl_soft_conn_free; 112 static icl_conn_close_t icl_soft_conn_close; 113 static icl_conn_task_setup_t icl_soft_conn_task_setup; 114 static icl_conn_task_done_t icl_soft_conn_task_done; 115 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 116 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 117 #ifdef ICL_KERNEL_PROXY 118 static icl_conn_connect_t icl_soft_conn_connect; 119 #endif 120 121 static kobj_method_t icl_soft_methods[] = { 122 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 123 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 124 KOBJMETHOD(icl_conn_pdu_data_segment_length, 125 icl_soft_conn_pdu_data_segment_length), 126 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 127 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 128 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 129 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 130 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 131 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 132 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 133 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 134 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 135 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 136 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 137 #ifdef ICL_KERNEL_PROXY 138 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 139 #endif 140 { 0, 0 } 141 }; 142 143 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 144 145 static void 146 icl_conn_fail(struct icl_conn *ic) 147 { 148 if (ic->ic_socket == NULL) 149 return; 150 151 /* 152 * XXX 153 */ 154 ic->ic_socket->so_error = EDOOFUS; 155 (ic->ic_error)(ic); 156 } 157 158 static struct mbuf * 159 icl_conn_receive(struct icl_conn *ic, size_t len) 160 { 161 struct uio uio; 162 struct socket *so; 163 struct mbuf *m; 164 int error, flags; 165 166 so = ic->ic_socket; 167 168 memset(&uio, 0, sizeof(uio)); 169 uio.uio_resid = len; 170 171 flags = MSG_DONTWAIT; 172 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 173 if (error != 0) { 174 ICL_DEBUG("soreceive error %d", error); 175 return (NULL); 176 } 177 if (uio.uio_resid != 0) { 178 m_freem(m); 179 ICL_DEBUG("short read"); 180 return (NULL); 181 } 182 183 return (m); 184 } 185 186 static int 187 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) 188 { 189 struct iovec iov[1]; 190 struct uio uio; 191 struct socket *so; 192 int error, flags; 193 194 so = ic->ic_socket; 195 196 memset(&uio, 0, sizeof(uio)); 197 iov[0].iov_base = buf; 198 iov[0].iov_len = len; 199 uio.uio_iov = iov; 200 uio.uio_iovcnt = 1; 201 uio.uio_offset = 0; 202 uio.uio_resid = len; 203 uio.uio_segflg = UIO_SYSSPACE; 204 uio.uio_rw = UIO_READ; 205 206 flags = MSG_DONTWAIT; 207 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 208 if (error != 0) { 209 ICL_DEBUG("soreceive error %d", error); 210 return (-1); 211 } 212 if (uio.uio_resid != 0) { 213 ICL_DEBUG("short read"); 214 return (-1); 215 } 216 217 return (0); 218 } 219 220 static void 221 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 222 { 223 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 224 225 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 226 m_freem(ip->ip_bhs_mbuf); 227 m_freem(ip->ip_ahs_mbuf); 228 m_freem(ip->ip_data_mbuf); 229 uma_zfree(icl_soft_pdu_zone, isp); 230 #ifdef DIAGNOSTIC 231 refcount_release(&ic->ic_outstanding_pdus); 232 #endif 233 } 234 235 static void 236 icl_soft_pdu_call_cb(struct icl_pdu *ip) 237 { 238 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 239 240 if (isp->cb != NULL) 241 isp->cb(ip, isp->error); 242 #ifdef DIAGNOSTIC 243 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 244 #endif 245 uma_zfree(icl_soft_pdu_zone, isp); 246 } 247 248 static void 249 icl_soft_pdu_done(struct icl_pdu *ip, int error) 250 { 251 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 252 253 if (error != 0) 254 isp->error = error; 255 256 m_freem(ip->ip_bhs_mbuf); 257 ip->ip_bhs_mbuf = NULL; 258 m_freem(ip->ip_ahs_mbuf); 259 ip->ip_ahs_mbuf = NULL; 260 m_freem(ip->ip_data_mbuf); 261 ip->ip_data_mbuf = NULL; 262 263 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 264 icl_soft_pdu_call_cb(ip); 265 } 266 267 static void 268 icl_soft_mbuf_done(struct mbuf *mb) 269 { 270 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 271 272 icl_soft_pdu_call_cb(&isp->ip); 273 } 274 275 /* 276 * Allocate icl_pdu with empty BHS to fill up by the caller. 277 */ 278 struct icl_pdu * 279 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 280 { 281 struct icl_soft_pdu *isp; 282 struct icl_pdu *ip; 283 284 #ifdef DIAGNOSTIC 285 refcount_acquire(&ic->ic_outstanding_pdus); 286 #endif 287 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 288 if (isp == NULL) { 289 ICL_WARN("failed to allocate soft PDU"); 290 #ifdef DIAGNOSTIC 291 refcount_release(&ic->ic_outstanding_pdus); 292 #endif 293 return (NULL); 294 } 295 ip = &isp->ip; 296 ip->ip_conn = ic; 297 298 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 299 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 300 if (ip->ip_bhs_mbuf == NULL) { 301 ICL_WARN("failed to allocate BHS mbuf"); 302 icl_soft_conn_pdu_free(ic, ip); 303 return (NULL); 304 } 305 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 306 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 307 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 308 309 return (ip); 310 } 311 312 static int 313 icl_pdu_ahs_length(const struct icl_pdu *request) 314 { 315 316 return (request->ip_bhs->bhs_total_ahs_len * 4); 317 } 318 319 static size_t 320 icl_pdu_data_segment_length(const struct icl_pdu *request) 321 { 322 uint32_t len = 0; 323 324 len += request->ip_bhs->bhs_data_segment_len[0]; 325 len <<= 8; 326 len += request->ip_bhs->bhs_data_segment_len[1]; 327 len <<= 8; 328 len += request->ip_bhs->bhs_data_segment_len[2]; 329 330 return (len); 331 } 332 333 size_t 334 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 335 const struct icl_pdu *request) 336 { 337 338 return (icl_pdu_data_segment_length(request)); 339 } 340 341 static void 342 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 343 { 344 345 response->ip_bhs->bhs_data_segment_len[2] = len; 346 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 347 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 348 } 349 350 static size_t 351 icl_pdu_padding(const struct icl_pdu *ip) 352 { 353 354 if ((ip->ip_data_len % 4) != 0) 355 return (4 - (ip->ip_data_len % 4)); 356 357 return (0); 358 } 359 360 static size_t 361 icl_pdu_size(const struct icl_pdu *response) 362 { 363 size_t len; 364 365 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 366 367 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 368 icl_pdu_padding(response); 369 if (response->ip_conn->ic_header_crc32c) 370 len += ISCSI_HEADER_DIGEST_SIZE; 371 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 372 len += ISCSI_DATA_DIGEST_SIZE; 373 374 return (len); 375 } 376 377 static int 378 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 379 { 380 381 if (icl_conn_receive_buf(request->ip_conn, 382 request->ip_bhs, sizeof(struct iscsi_bhs))) { 383 ICL_DEBUG("failed to receive BHS"); 384 return (-1); 385 } 386 387 *availablep -= sizeof(struct iscsi_bhs); 388 return (0); 389 } 390 391 static int 392 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 393 { 394 395 request->ip_ahs_len = icl_pdu_ahs_length(request); 396 if (request->ip_ahs_len == 0) 397 return (0); 398 399 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 400 request->ip_ahs_len); 401 if (request->ip_ahs_mbuf == NULL) { 402 ICL_DEBUG("failed to receive AHS"); 403 return (-1); 404 } 405 406 *availablep -= request->ip_ahs_len; 407 return (0); 408 } 409 410 static uint32_t 411 icl_mbuf_to_crc32c(const struct mbuf *m0) 412 { 413 uint32_t digest = 0xffffffff; 414 const struct mbuf *m; 415 416 for (m = m0; m != NULL; m = m->m_next) 417 digest = calculate_crc32c(digest, 418 mtod(m, const void *), m->m_len); 419 420 digest = digest ^ 0xffffffff; 421 422 return (digest); 423 } 424 425 static int 426 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 427 { 428 uint32_t received_digest, valid_digest; 429 430 if (request->ip_conn->ic_header_crc32c == false) 431 return (0); 432 433 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 434 if (icl_conn_receive_buf(request->ip_conn, 435 &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { 436 ICL_DEBUG("failed to receive header digest"); 437 return (-1); 438 } 439 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 440 441 /* Temporary attach AHS to BHS to calculate header digest. */ 442 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 443 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 444 request->ip_bhs_mbuf->m_next = NULL; 445 if (received_digest != valid_digest) { 446 ICL_WARN("header digest check failed; got 0x%x, " 447 "should be 0x%x", received_digest, valid_digest); 448 return (-1); 449 } 450 451 return (0); 452 } 453 454 /* 455 * Return the number of bytes that should be waiting in the receive socket 456 * before icl_pdu_receive_data_segment() gets called. 457 */ 458 static size_t 459 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 460 { 461 size_t len; 462 463 len = icl_pdu_data_segment_length(request); 464 if (len == 0) 465 return (0); 466 467 /* 468 * Account for the parts of data segment already read from 469 * the socket buffer. 470 */ 471 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 472 len -= request->ip_data_len; 473 474 /* 475 * Don't always wait for the full data segment to be delivered 476 * to the socket; this might badly affect performance due to 477 * TCP window scaling. 478 */ 479 if (len > partial_receive_len) { 480 #if 0 481 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 482 len, partial_receive_len)); 483 #endif 484 len = partial_receive_len; 485 486 return (len); 487 } 488 489 /* 490 * Account for padding. Note that due to the way code is written, 491 * the icl_pdu_receive_data_segment() must always receive padding 492 * along with the last part of data segment, because it would be 493 * impossible to tell whether we've already received the full data 494 * segment including padding, or without it. 495 */ 496 if ((len % 4) != 0) 497 len += 4 - (len % 4); 498 499 #if 0 500 ICL_DEBUG("need %zd bytes of data", len)); 501 #endif 502 503 return (len); 504 } 505 506 static int 507 icl_pdu_receive_data_segment(struct icl_pdu *request, 508 size_t *availablep, bool *more_neededp) 509 { 510 struct icl_conn *ic; 511 size_t len, padding = 0; 512 struct mbuf *m; 513 514 ic = request->ip_conn; 515 516 *more_neededp = false; 517 ic->ic_receive_len = 0; 518 519 len = icl_pdu_data_segment_length(request); 520 if (len == 0) 521 return (0); 522 523 if ((len % 4) != 0) 524 padding = 4 - (len % 4); 525 526 /* 527 * Account for already received parts of data segment. 528 */ 529 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 530 len -= request->ip_data_len; 531 532 if (len + padding > *availablep) { 533 /* 534 * Not enough data in the socket buffer. Receive as much 535 * as we can. Don't receive padding, since, obviously, it's 536 * not the end of data segment yet. 537 */ 538 #if 0 539 ICL_DEBUG("limited from %zd to %zd", 540 len + padding, *availablep - padding)); 541 #endif 542 len = *availablep - padding; 543 *more_neededp = true; 544 padding = 0; 545 } 546 547 /* 548 * Must not try to receive padding without at least one byte 549 * of actual data segment. 550 */ 551 if (len > 0) { 552 m = icl_conn_receive(request->ip_conn, len + padding); 553 if (m == NULL) { 554 ICL_DEBUG("failed to receive data segment"); 555 return (-1); 556 } 557 558 if (request->ip_data_mbuf == NULL) 559 request->ip_data_mbuf = m; 560 else 561 m_cat(request->ip_data_mbuf, m); 562 563 request->ip_data_len += len; 564 *availablep -= len + padding; 565 } else 566 ICL_DEBUG("len 0"); 567 568 if (*more_neededp) 569 ic->ic_receive_len = 570 icl_pdu_data_segment_receive_len(request); 571 572 return (0); 573 } 574 575 static int 576 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 577 { 578 uint32_t received_digest, valid_digest; 579 580 if (request->ip_conn->ic_data_crc32c == false) 581 return (0); 582 583 if (request->ip_data_len == 0) 584 return (0); 585 586 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 587 if (icl_conn_receive_buf(request->ip_conn, 588 &received_digest, ISCSI_DATA_DIGEST_SIZE)) { 589 ICL_DEBUG("failed to receive data digest"); 590 return (-1); 591 } 592 *availablep -= ISCSI_DATA_DIGEST_SIZE; 593 594 /* 595 * Note that ip_data_mbuf also contains padding; since digest 596 * calculation is supposed to include that, we iterate over 597 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 598 */ 599 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 600 if (received_digest != valid_digest) { 601 ICL_WARN("data digest check failed; got 0x%x, " 602 "should be 0x%x", received_digest, valid_digest); 603 return (-1); 604 } 605 606 return (0); 607 } 608 609 /* 610 * Somewhat contrary to the name, this attempts to receive only one 611 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 612 */ 613 static struct icl_pdu * 614 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 615 { 616 struct icl_pdu *request; 617 struct socket *so; 618 size_t len; 619 int error; 620 bool more_needed; 621 622 so = ic->ic_socket; 623 624 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 625 KASSERT(ic->ic_receive_pdu == NULL, 626 ("ic->ic_receive_pdu != NULL")); 627 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 628 if (request == NULL) { 629 ICL_DEBUG("failed to allocate PDU; " 630 "dropping connection"); 631 icl_conn_fail(ic); 632 return (NULL); 633 } 634 ic->ic_receive_pdu = request; 635 } else { 636 KASSERT(ic->ic_receive_pdu != NULL, 637 ("ic->ic_receive_pdu == NULL")); 638 request = ic->ic_receive_pdu; 639 } 640 641 if (*availablep < ic->ic_receive_len) { 642 #if 0 643 ICL_DEBUG("not enough data; need %zd, " 644 "have %zd", ic->ic_receive_len, *availablep); 645 #endif 646 return (NULL); 647 } 648 649 switch (ic->ic_receive_state) { 650 case ICL_CONN_STATE_BHS: 651 //ICL_DEBUG("receiving BHS"); 652 error = icl_pdu_receive_bhs(request, availablep); 653 if (error != 0) { 654 ICL_DEBUG("failed to receive BHS; " 655 "dropping connection"); 656 break; 657 } 658 659 /* 660 * We don't enforce any limit for AHS length; 661 * its length is stored in 8 bit field. 662 */ 663 664 len = icl_pdu_data_segment_length(request); 665 if (len > ic->ic_max_data_segment_length) { 666 ICL_WARN("received data segment " 667 "length %zd is larger than negotiated " 668 "MaxDataSegmentLength %zd; " 669 "dropping connection", 670 len, ic->ic_max_data_segment_length); 671 error = EINVAL; 672 break; 673 } 674 675 ic->ic_receive_state = ICL_CONN_STATE_AHS; 676 ic->ic_receive_len = icl_pdu_ahs_length(request); 677 break; 678 679 case ICL_CONN_STATE_AHS: 680 //ICL_DEBUG("receiving AHS"); 681 error = icl_pdu_receive_ahs(request, availablep); 682 if (error != 0) { 683 ICL_DEBUG("failed to receive AHS; " 684 "dropping connection"); 685 break; 686 } 687 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 688 if (ic->ic_header_crc32c == false) 689 ic->ic_receive_len = 0; 690 else 691 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 692 break; 693 694 case ICL_CONN_STATE_HEADER_DIGEST: 695 //ICL_DEBUG("receiving header digest"); 696 error = icl_pdu_check_header_digest(request, availablep); 697 if (error != 0) { 698 ICL_DEBUG("header digest failed; " 699 "dropping connection"); 700 break; 701 } 702 703 ic->ic_receive_state = ICL_CONN_STATE_DATA; 704 ic->ic_receive_len = 705 icl_pdu_data_segment_receive_len(request); 706 break; 707 708 case ICL_CONN_STATE_DATA: 709 //ICL_DEBUG("receiving data segment"); 710 error = icl_pdu_receive_data_segment(request, availablep, 711 &more_needed); 712 if (error != 0) { 713 ICL_DEBUG("failed to receive data segment;" 714 "dropping connection"); 715 break; 716 } 717 718 if (more_needed) 719 break; 720 721 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 722 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 723 ic->ic_receive_len = 0; 724 else 725 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 726 break; 727 728 case ICL_CONN_STATE_DATA_DIGEST: 729 //ICL_DEBUG("receiving data digest"); 730 error = icl_pdu_check_data_digest(request, availablep); 731 if (error != 0) { 732 ICL_DEBUG("data digest failed; " 733 "dropping connection"); 734 break; 735 } 736 737 /* 738 * We've received complete PDU; reset the receive state machine 739 * and return the PDU. 740 */ 741 ic->ic_receive_state = ICL_CONN_STATE_BHS; 742 ic->ic_receive_len = sizeof(struct iscsi_bhs); 743 ic->ic_receive_pdu = NULL; 744 return (request); 745 746 default: 747 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 748 } 749 750 if (error != 0) { 751 /* 752 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 753 * and will get freed in icl_soft_conn_close(). 754 */ 755 icl_conn_fail(ic); 756 } 757 758 return (NULL); 759 } 760 761 static void 762 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 763 { 764 struct icl_pdu *response; 765 struct socket *so; 766 767 so = ic->ic_socket; 768 769 /* 770 * This can never happen; we're careful to only mess with ic->ic_socket 771 * pointer when the send/receive threads are not running. 772 */ 773 KASSERT(so != NULL, ("NULL socket")); 774 775 for (;;) { 776 if (ic->ic_disconnecting) 777 return; 778 779 if (so->so_error != 0) { 780 ICL_DEBUG("connection error %d; " 781 "dropping connection", so->so_error); 782 icl_conn_fail(ic); 783 return; 784 } 785 786 /* 787 * Loop until we have a complete PDU or there is not enough 788 * data in the socket buffer. 789 */ 790 if (available < ic->ic_receive_len) { 791 #if 0 792 ICL_DEBUG("not enough data; have %zd, " 793 "need %zd", available, 794 ic->ic_receive_len); 795 #endif 796 return; 797 } 798 799 response = icl_conn_receive_pdu(ic, &available); 800 if (response == NULL) 801 continue; 802 803 if (response->ip_ahs_len > 0) { 804 ICL_WARN("received PDU with unsupported " 805 "AHS; opcode 0x%x; dropping connection", 806 response->ip_bhs->bhs_opcode); 807 icl_soft_conn_pdu_free(ic, response); 808 icl_conn_fail(ic); 809 return; 810 } 811 812 (ic->ic_receive)(response); 813 } 814 } 815 816 static void 817 icl_receive_thread(void *arg) 818 { 819 struct icl_conn *ic; 820 size_t available; 821 struct socket *so; 822 823 ic = arg; 824 so = ic->ic_socket; 825 826 for (;;) { 827 if (ic->ic_disconnecting) { 828 //ICL_DEBUG("terminating"); 829 break; 830 } 831 832 /* 833 * Set the low watermark, to be checked by 834 * soreadable() in icl_soupcall_receive() 835 * to avoid unnecessary wakeups until there 836 * is enough data received to read the PDU. 837 */ 838 SOCKBUF_LOCK(&so->so_rcv); 839 available = sbavail(&so->so_rcv); 840 if (available < ic->ic_receive_len) { 841 so->so_rcv.sb_lowat = ic->ic_receive_len; 842 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 843 } else 844 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 845 SOCKBUF_UNLOCK(&so->so_rcv); 846 847 icl_conn_receive_pdus(ic, available); 848 } 849 850 ICL_CONN_LOCK(ic); 851 ic->ic_receive_running = false; 852 cv_signal(&ic->ic_send_cv); 853 ICL_CONN_UNLOCK(ic); 854 kthread_exit(); 855 } 856 857 static int 858 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 859 { 860 struct icl_conn *ic; 861 862 if (!soreadable(so)) 863 return (SU_OK); 864 865 ic = arg; 866 cv_signal(&ic->ic_receive_cv); 867 return (SU_OK); 868 } 869 870 static int 871 icl_pdu_finalize(struct icl_pdu *request) 872 { 873 size_t padding, pdu_len; 874 uint32_t digest, zero = 0; 875 int ok; 876 struct icl_conn *ic; 877 878 ic = request->ip_conn; 879 880 icl_pdu_set_data_segment_length(request, request->ip_data_len); 881 882 pdu_len = icl_pdu_size(request); 883 884 if (ic->ic_header_crc32c) { 885 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 886 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 887 (void *)&digest); 888 if (ok != 1) { 889 ICL_WARN("failed to append header digest"); 890 return (1); 891 } 892 } 893 894 if (request->ip_data_len != 0) { 895 padding = icl_pdu_padding(request); 896 if (padding > 0) { 897 ok = m_append(request->ip_data_mbuf, padding, 898 (void *)&zero); 899 if (ok != 1) { 900 ICL_WARN("failed to append padding"); 901 return (1); 902 } 903 } 904 905 if (ic->ic_data_crc32c) { 906 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 907 908 ok = m_append(request->ip_data_mbuf, sizeof(digest), 909 (void *)&digest); 910 if (ok != 1) { 911 ICL_WARN("failed to append data digest"); 912 return (1); 913 } 914 } 915 916 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 917 request->ip_data_mbuf = NULL; 918 } 919 920 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 921 922 return (0); 923 } 924 925 static void 926 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 927 { 928 struct icl_pdu *request, *request2; 929 struct socket *so; 930 long available, size, size2; 931 int coalesced, error; 932 933 ICL_CONN_LOCK_ASSERT_NOT(ic); 934 935 so = ic->ic_socket; 936 937 SOCKBUF_LOCK(&so->so_snd); 938 /* 939 * Check how much space do we have for transmit. We can't just 940 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 941 * as it always frees the mbuf chain passed to it, even in case 942 * of error. 943 */ 944 available = sbspace(&so->so_snd); 945 946 /* 947 * Notify the socket upcall that we don't need wakeups 948 * for the time being. 949 */ 950 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 951 SOCKBUF_UNLOCK(&so->so_snd); 952 953 while (!STAILQ_EMPTY(queue)) { 954 request = STAILQ_FIRST(queue); 955 size = icl_pdu_size(request); 956 if (available < size) { 957 /* 958 * Set the low watermark, to be checked by 959 * sowriteable() in icl_soupcall_send() 960 * to avoid unnecessary wakeups until there 961 * is enough space for the PDU to fit. 962 */ 963 SOCKBUF_LOCK(&so->so_snd); 964 available = sbspace(&so->so_snd); 965 if (available < size) { 966 #if 1 967 ICL_DEBUG("no space to send; " 968 "have %ld, need %ld", 969 available, size); 970 #endif 971 so->so_snd.sb_lowat = max(size, 972 so->so_snd.sb_hiwat / 8); 973 SOCKBUF_UNLOCK(&so->so_snd); 974 return; 975 } 976 SOCKBUF_UNLOCK(&so->so_snd); 977 } 978 STAILQ_REMOVE_HEAD(queue, ip_next); 979 error = icl_pdu_finalize(request); 980 if (error != 0) { 981 ICL_DEBUG("failed to finalize PDU; " 982 "dropping connection"); 983 icl_soft_pdu_done(request, EIO); 984 icl_conn_fail(ic); 985 return; 986 } 987 if (coalesce) { 988 coalesced = 1; 989 for (;;) { 990 request2 = STAILQ_FIRST(queue); 991 if (request2 == NULL) 992 break; 993 size2 = icl_pdu_size(request2); 994 if (available < size + size2) 995 break; 996 STAILQ_REMOVE_HEAD(queue, ip_next); 997 error = icl_pdu_finalize(request2); 998 if (error != 0) { 999 ICL_DEBUG("failed to finalize PDU; " 1000 "dropping connection"); 1001 icl_soft_pdu_done(request, EIO); 1002 icl_soft_pdu_done(request2, EIO); 1003 icl_conn_fail(ic); 1004 return; 1005 } 1006 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 1007 request2->ip_bhs_mbuf = NULL; 1008 request->ip_bhs_mbuf->m_pkthdr.len += size2; 1009 size += size2; 1010 STAILQ_REMOVE_AFTER(queue, request, ip_next); 1011 icl_soft_pdu_done(request2, 0); 1012 coalesced++; 1013 } 1014 #if 0 1015 if (coalesced > 1) { 1016 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 1017 coalesced, size); 1018 } 1019 #endif 1020 } 1021 available -= size; 1022 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 1023 NULL, MSG_DONTWAIT, curthread); 1024 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 1025 if (error != 0) { 1026 ICL_DEBUG("failed to send PDU, error %d; " 1027 "dropping connection", error); 1028 icl_soft_pdu_done(request, error); 1029 icl_conn_fail(ic); 1030 return; 1031 } 1032 icl_soft_pdu_done(request, 0); 1033 } 1034 } 1035 1036 static void 1037 icl_send_thread(void *arg) 1038 { 1039 struct icl_conn *ic; 1040 struct icl_pdu_stailq queue; 1041 1042 ic = arg; 1043 1044 STAILQ_INIT(&queue); 1045 1046 ICL_CONN_LOCK(ic); 1047 for (;;) { 1048 for (;;) { 1049 /* 1050 * If the local queue is empty, populate it from 1051 * the main one. This way the icl_conn_send_pdus() 1052 * can go through all the queued PDUs without holding 1053 * any locks. 1054 */ 1055 if (STAILQ_EMPTY(&queue)) 1056 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1057 1058 ic->ic_check_send_space = false; 1059 ICL_CONN_UNLOCK(ic); 1060 icl_conn_send_pdus(ic, &queue); 1061 ICL_CONN_LOCK(ic); 1062 1063 /* 1064 * The icl_soupcall_send() was called since the last 1065 * call to sbspace(); go around; 1066 */ 1067 if (ic->ic_check_send_space) 1068 continue; 1069 1070 /* 1071 * Local queue is empty, but we still have PDUs 1072 * in the main one; go around. 1073 */ 1074 if (STAILQ_EMPTY(&queue) && 1075 !STAILQ_EMPTY(&ic->ic_to_send)) 1076 continue; 1077 1078 /* 1079 * There might be some stuff in the local queue, 1080 * which didn't get sent due to not having enough send 1081 * space. Wait for socket upcall. 1082 */ 1083 break; 1084 } 1085 1086 if (ic->ic_disconnecting) { 1087 //ICL_DEBUG("terminating"); 1088 break; 1089 } 1090 1091 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1092 } 1093 1094 /* 1095 * We're exiting; move PDUs back to the main queue, so they can 1096 * get freed properly. At this point ordering doesn't matter. 1097 */ 1098 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1099 1100 ic->ic_send_running = false; 1101 cv_signal(&ic->ic_send_cv); 1102 ICL_CONN_UNLOCK(ic); 1103 kthread_exit(); 1104 } 1105 1106 static int 1107 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1108 { 1109 struct icl_conn *ic; 1110 1111 if (!sowriteable(so)) 1112 return (SU_OK); 1113 1114 ic = arg; 1115 1116 ICL_CONN_LOCK(ic); 1117 ic->ic_check_send_space = true; 1118 ICL_CONN_UNLOCK(ic); 1119 1120 cv_signal(&ic->ic_send_cv); 1121 1122 return (SU_OK); 1123 } 1124 1125 static int 1126 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1127 const void *addr, size_t len, int flags) 1128 { 1129 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1130 struct mbuf *mb, *newmb; 1131 size_t copylen, off = 0; 1132 1133 KASSERT(len > 0, ("len == 0")); 1134 1135 if (flags & ICL_NOCOPY) { 1136 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1137 if (newmb == NULL) { 1138 ICL_WARN("failed to allocate mbuf"); 1139 return (ENOMEM); 1140 } 1141 1142 newmb->m_flags |= M_RDONLY; 1143 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1144 icl_soft_mbuf_done, isp, NULL); 1145 newmb->m_len = len; 1146 } else { 1147 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1148 if (newmb == NULL) { 1149 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1150 return (ENOMEM); 1151 } 1152 1153 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1154 copylen = min(M_TRAILINGSPACE(mb), len - off); 1155 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1156 mb->m_len = copylen; 1157 off += copylen; 1158 } 1159 KASSERT(off == len, ("%s: off != len", __func__)); 1160 } 1161 1162 if (request->ip_data_mbuf == NULL) { 1163 request->ip_data_mbuf = newmb; 1164 request->ip_data_len = len; 1165 } else { 1166 m_cat(request->ip_data_mbuf, newmb); 1167 request->ip_data_len += len; 1168 } 1169 1170 return (0); 1171 } 1172 1173 void 1174 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1175 size_t off, void *addr, size_t len) 1176 { 1177 1178 m_copydata(ip->ip_data_mbuf, off, len, addr); 1179 } 1180 1181 static void 1182 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1183 { 1184 1185 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1186 } 1187 1188 static void 1189 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1190 icl_pdu_cb cb) 1191 { 1192 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1193 1194 ICL_CONN_LOCK_ASSERT(ic); 1195 isp->ref_cnt++; 1196 isp->cb = cb; 1197 1198 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1199 ICL_DEBUG("icl_pdu_queue on closed connection"); 1200 icl_soft_pdu_done(ip, ENOTCONN); 1201 return; 1202 } 1203 1204 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1205 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1206 /* 1207 * If the queue is not empty, someone else had already 1208 * signaled the send thread; no need to do that again, 1209 * just return. 1210 */ 1211 return; 1212 } 1213 1214 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1215 cv_signal(&ic->ic_send_cv); 1216 } 1217 1218 static struct icl_conn * 1219 icl_soft_new_conn(const char *name, struct mtx *lock) 1220 { 1221 struct icl_conn *ic; 1222 1223 refcount_acquire(&icl_ncons); 1224 1225 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1226 1227 STAILQ_INIT(&ic->ic_to_send); 1228 ic->ic_lock = lock; 1229 cv_init(&ic->ic_send_cv, "icl_tx"); 1230 cv_init(&ic->ic_receive_cv, "icl_rx"); 1231 #ifdef DIAGNOSTIC 1232 refcount_init(&ic->ic_outstanding_pdus, 0); 1233 #endif 1234 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1235 ic->ic_name = name; 1236 ic->ic_offload = "None"; 1237 ic->ic_unmapped = false; 1238 1239 return (ic); 1240 } 1241 1242 void 1243 icl_soft_conn_free(struct icl_conn *ic) 1244 { 1245 1246 #ifdef DIAGNOSTIC 1247 KASSERT(ic->ic_outstanding_pdus == 0, 1248 ("destroying session with %d outstanding PDUs", 1249 ic->ic_outstanding_pdus)); 1250 #endif 1251 cv_destroy(&ic->ic_send_cv); 1252 cv_destroy(&ic->ic_receive_cv); 1253 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1254 refcount_release(&icl_ncons); 1255 } 1256 1257 static int 1258 icl_conn_start(struct icl_conn *ic) 1259 { 1260 size_t minspace; 1261 struct sockopt opt; 1262 int error, one = 1; 1263 1264 ICL_CONN_LOCK(ic); 1265 1266 /* 1267 * XXX: Ugly hack. 1268 */ 1269 if (ic->ic_socket == NULL) { 1270 ICL_CONN_UNLOCK(ic); 1271 return (EINVAL); 1272 } 1273 1274 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1275 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1276 ic->ic_disconnecting = false; 1277 1278 ICL_CONN_UNLOCK(ic); 1279 1280 /* 1281 * For sendspace, this is required because the current code cannot 1282 * send a PDU in pieces; thus, the minimum buffer size is equal 1283 * to the maximum PDU size. "+4" is to account for possible padding. 1284 * 1285 * What we should actually do here is to use autoscaling, but set 1286 * some minimal buffer size to "minspace". I don't know a way to do 1287 * that, though. 1288 */ 1289 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1290 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1291 if (sendspace < minspace) { 1292 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1293 minspace); 1294 sendspace = minspace; 1295 } 1296 if (recvspace < minspace) { 1297 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1298 minspace); 1299 recvspace = minspace; 1300 } 1301 1302 error = soreserve(ic->ic_socket, sendspace, recvspace); 1303 if (error != 0) { 1304 ICL_WARN("soreserve failed with error %d", error); 1305 icl_soft_conn_close(ic); 1306 return (error); 1307 } 1308 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1309 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1310 1311 /* 1312 * Disable Nagle. 1313 */ 1314 bzero(&opt, sizeof(opt)); 1315 opt.sopt_dir = SOPT_SET; 1316 opt.sopt_level = IPPROTO_TCP; 1317 opt.sopt_name = TCP_NODELAY; 1318 opt.sopt_val = &one; 1319 opt.sopt_valsize = sizeof(one); 1320 error = sosetopt(ic->ic_socket, &opt); 1321 if (error != 0) { 1322 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1323 icl_soft_conn_close(ic); 1324 return (error); 1325 } 1326 1327 /* 1328 * Register socket upcall, to get notified about incoming PDUs 1329 * and free space to send outgoing ones. 1330 */ 1331 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1332 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1333 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1334 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1335 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1336 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1337 1338 /* 1339 * Start threads. 1340 */ 1341 ICL_CONN_LOCK(ic); 1342 ic->ic_send_running = ic->ic_receive_running = true; 1343 ICL_CONN_UNLOCK(ic); 1344 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1345 ic->ic_name); 1346 if (error != 0) { 1347 ICL_WARN("kthread_add(9) failed with error %d", error); 1348 ICL_CONN_LOCK(ic); 1349 ic->ic_send_running = ic->ic_receive_running = false; 1350 cv_signal(&ic->ic_send_cv); 1351 ICL_CONN_UNLOCK(ic); 1352 icl_soft_conn_close(ic); 1353 return (error); 1354 } 1355 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1356 ic->ic_name); 1357 if (error != 0) { 1358 ICL_WARN("kthread_add(9) failed with error %d", error); 1359 ICL_CONN_LOCK(ic); 1360 ic->ic_receive_running = false; 1361 cv_signal(&ic->ic_send_cv); 1362 ICL_CONN_UNLOCK(ic); 1363 icl_soft_conn_close(ic); 1364 return (error); 1365 } 1366 1367 return (0); 1368 } 1369 1370 int 1371 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1372 { 1373 struct file *fp; 1374 struct socket *so; 1375 cap_rights_t rights; 1376 int error; 1377 1378 ICL_CONN_LOCK_ASSERT_NOT(ic); 1379 1380 #ifdef ICL_KERNEL_PROXY 1381 /* 1382 * We're transitioning to Full Feature phase, and we don't 1383 * really care. 1384 */ 1385 if (fd == 0) { 1386 ICL_CONN_LOCK(ic); 1387 if (ic->ic_socket == NULL) { 1388 ICL_CONN_UNLOCK(ic); 1389 ICL_WARN("proxy handoff without connect"); 1390 return (EINVAL); 1391 } 1392 ICL_CONN_UNLOCK(ic); 1393 return (0); 1394 } 1395 #endif 1396 1397 /* 1398 * Steal the socket from userland. 1399 */ 1400 error = fget(curthread, fd, 1401 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1402 if (error != 0) 1403 return (error); 1404 if (fp->f_type != DTYPE_SOCKET) { 1405 fdrop(fp, curthread); 1406 return (EINVAL); 1407 } 1408 so = fp->f_data; 1409 if (so->so_type != SOCK_STREAM) { 1410 fdrop(fp, curthread); 1411 return (EINVAL); 1412 } 1413 1414 ICL_CONN_LOCK(ic); 1415 1416 if (ic->ic_socket != NULL) { 1417 ICL_CONN_UNLOCK(ic); 1418 fdrop(fp, curthread); 1419 return (EBUSY); 1420 } 1421 1422 ic->ic_socket = fp->f_data; 1423 fp->f_ops = &badfileops; 1424 fp->f_data = NULL; 1425 fdrop(fp, curthread); 1426 ICL_CONN_UNLOCK(ic); 1427 1428 error = icl_conn_start(ic); 1429 1430 return (error); 1431 } 1432 1433 void 1434 icl_soft_conn_close(struct icl_conn *ic) 1435 { 1436 struct icl_pdu *pdu; 1437 struct socket *so; 1438 1439 ICL_CONN_LOCK(ic); 1440 1441 /* 1442 * Wake up the threads, so they can properly terminate. 1443 */ 1444 ic->ic_disconnecting = true; 1445 while (ic->ic_receive_running || ic->ic_send_running) { 1446 cv_signal(&ic->ic_receive_cv); 1447 cv_signal(&ic->ic_send_cv); 1448 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1449 } 1450 1451 /* Some other thread could close the connection same time. */ 1452 so = ic->ic_socket; 1453 if (so == NULL) { 1454 ICL_CONN_UNLOCK(ic); 1455 return; 1456 } 1457 ic->ic_socket = NULL; 1458 1459 /* 1460 * Deregister socket upcalls. 1461 */ 1462 ICL_CONN_UNLOCK(ic); 1463 SOCKBUF_LOCK(&so->so_snd); 1464 if (so->so_snd.sb_upcall != NULL) 1465 soupcall_clear(so, SO_SND); 1466 SOCKBUF_UNLOCK(&so->so_snd); 1467 SOCKBUF_LOCK(&so->so_rcv); 1468 if (so->so_rcv.sb_upcall != NULL) 1469 soupcall_clear(so, SO_RCV); 1470 SOCKBUF_UNLOCK(&so->so_rcv); 1471 soclose(so); 1472 ICL_CONN_LOCK(ic); 1473 1474 if (ic->ic_receive_pdu != NULL) { 1475 //ICL_DEBUG("freeing partially received PDU"); 1476 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); 1477 ic->ic_receive_pdu = NULL; 1478 } 1479 1480 /* 1481 * Remove any outstanding PDUs from the send queue. 1482 */ 1483 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1484 pdu = STAILQ_FIRST(&ic->ic_to_send); 1485 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1486 icl_soft_pdu_done(pdu, ENOTCONN); 1487 } 1488 1489 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1490 ("destroying session with non-empty send queue")); 1491 ICL_CONN_UNLOCK(ic); 1492 } 1493 1494 int 1495 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1496 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1497 { 1498 1499 return (0); 1500 } 1501 1502 void 1503 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1504 { 1505 } 1506 1507 int 1508 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1509 uint32_t *transfer_tag, void **prvp) 1510 { 1511 1512 return (0); 1513 } 1514 1515 void 1516 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1517 { 1518 } 1519 1520 static int 1521 icl_soft_limits(struct icl_drv_limits *idl) 1522 { 1523 1524 idl->idl_max_recv_data_segment_length = 128 * 1024; 1525 idl->idl_max_send_data_segment_length = 128 * 1024; 1526 idl->idl_max_burst_length = 262144; 1527 idl->idl_first_burst_length = 65536; 1528 1529 return (0); 1530 } 1531 1532 #ifdef ICL_KERNEL_PROXY 1533 int 1534 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1535 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1536 { 1537 1538 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1539 from_sa, to_sa)); 1540 } 1541 1542 int 1543 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1544 { 1545 int error; 1546 1547 ICL_CONN_LOCK_ASSERT_NOT(ic); 1548 1549 if (so->so_type != SOCK_STREAM) 1550 return (EINVAL); 1551 1552 ICL_CONN_LOCK(ic); 1553 if (ic->ic_socket != NULL) { 1554 ICL_CONN_UNLOCK(ic); 1555 return (EBUSY); 1556 } 1557 ic->ic_socket = so; 1558 ICL_CONN_UNLOCK(ic); 1559 1560 error = icl_conn_start(ic); 1561 1562 return (error); 1563 } 1564 #endif /* ICL_KERNEL_PROXY */ 1565 1566 static int 1567 icl_soft_load(void) 1568 { 1569 int error; 1570 1571 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1572 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1573 UMA_ALIGN_PTR, 0); 1574 refcount_init(&icl_ncons, 0); 1575 1576 /* 1577 * The reason we call this "none" is that to the user, 1578 * it's known as "offload driver"; "offload driver: soft" 1579 * doesn't make much sense. 1580 */ 1581 error = icl_register("none", false, 0, 1582 icl_soft_limits, icl_soft_new_conn); 1583 KASSERT(error == 0, ("failed to register")); 1584 1585 #if defined(ICL_KERNEL_PROXY) && 0 1586 /* 1587 * Debugging aid for kernel proxy functionality. 1588 */ 1589 error = icl_register("proxytest", true, 0, 1590 icl_soft_limits, icl_soft_new_conn); 1591 KASSERT(error == 0, ("failed to register")); 1592 #endif 1593 1594 return (error); 1595 } 1596 1597 static int 1598 icl_soft_unload(void) 1599 { 1600 1601 if (icl_ncons != 0) 1602 return (EBUSY); 1603 1604 icl_unregister("none", false); 1605 #if defined(ICL_KERNEL_PROXY) && 0 1606 icl_unregister("proxytest", true); 1607 #endif 1608 1609 uma_zdestroy(icl_soft_pdu_zone); 1610 1611 return (0); 1612 } 1613 1614 static int 1615 icl_soft_modevent(module_t mod, int what, void *arg) 1616 { 1617 1618 switch (what) { 1619 case MOD_LOAD: 1620 return (icl_soft_load()); 1621 case MOD_UNLOAD: 1622 return (icl_soft_unload()); 1623 default: 1624 return (EINVAL); 1625 } 1626 } 1627 1628 moduledata_t icl_soft_data = { 1629 "icl_soft", 1630 icl_soft_modevent, 1631 0 1632 }; 1633 1634 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1635 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1636 MODULE_VERSION(icl_soft, 1); 1637