1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed by Edward Tomasz Napierala under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 /* 34 * Software implementation of iSCSI Common Layer kobj(9) interface. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/capsicum.h> 42 #include <sys/condvar.h> 43 #include <sys/conf.h> 44 #include <sys/gsb_crc32.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/lock.h> 49 #include <sys/mbuf.h> 50 #include <sys/mutex.h> 51 #include <sys/module.h> 52 #include <sys/protosw.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sx.h> 58 #include <sys/uio.h> 59 #include <vm/uma.h> 60 #include <netinet/in.h> 61 #include <netinet/tcp.h> 62 63 #include <dev/iscsi/icl.h> 64 #include <dev/iscsi/iscsi_proto.h> 65 #include <icl_conn_if.h> 66 67 struct icl_soft_pdu { 68 struct icl_pdu ip; 69 70 /* soft specific stuff goes here. */ 71 u_int ref_cnt; 72 icl_pdu_cb cb; 73 int error; 74 }; 75 76 static int coalesce = 1; 77 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 78 &coalesce, 0, "Try to coalesce PDUs before sending"); 79 static int partial_receive_len = 128 * 1024; 80 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 81 &partial_receive_len, 0, "Minimum read size for partially received " 82 "data segment"); 83 static int sendspace = 1048576; 84 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 85 &sendspace, 0, "Default send socket buffer size"); 86 static int recvspace = 1048576; 87 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 88 &recvspace, 0, "Default receive socket buffer size"); 89 90 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 91 static uma_zone_t icl_soft_pdu_zone; 92 93 static volatile u_int icl_ncons; 94 95 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 96 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 97 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 98 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 99 100 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 101 102 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 103 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 104 static icl_conn_pdu_data_segment_length_t 105 icl_soft_conn_pdu_data_segment_length; 106 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 107 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 108 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 109 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 110 static icl_conn_handoff_t icl_soft_conn_handoff; 111 static icl_conn_free_t icl_soft_conn_free; 112 static icl_conn_close_t icl_soft_conn_close; 113 static icl_conn_task_setup_t icl_soft_conn_task_setup; 114 static icl_conn_task_done_t icl_soft_conn_task_done; 115 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 116 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 117 #ifdef ICL_KERNEL_PROXY 118 static icl_conn_connect_t icl_soft_conn_connect; 119 #endif 120 121 static kobj_method_t icl_soft_methods[] = { 122 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 123 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 124 KOBJMETHOD(icl_conn_pdu_data_segment_length, 125 icl_soft_conn_pdu_data_segment_length), 126 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 127 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 128 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 129 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 130 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 131 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 132 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 133 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 134 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 135 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 136 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 137 #ifdef ICL_KERNEL_PROXY 138 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 139 #endif 140 { 0, 0 } 141 }; 142 143 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 144 145 static void 146 icl_conn_fail(struct icl_conn *ic) 147 { 148 if (ic->ic_socket == NULL) 149 return; 150 151 /* 152 * XXX 153 */ 154 ic->ic_socket->so_error = EDOOFUS; 155 (ic->ic_error)(ic); 156 } 157 158 static struct mbuf * 159 icl_conn_receive(struct icl_conn *ic, size_t len) 160 { 161 struct uio uio; 162 struct socket *so; 163 struct mbuf *m; 164 int error, flags; 165 166 so = ic->ic_socket; 167 168 memset(&uio, 0, sizeof(uio)); 169 uio.uio_resid = len; 170 171 flags = MSG_DONTWAIT; 172 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 173 if (error != 0) { 174 ICL_DEBUG("soreceive error %d", error); 175 return (NULL); 176 } 177 if (uio.uio_resid != 0) { 178 m_freem(m); 179 ICL_DEBUG("short read"); 180 return (NULL); 181 } 182 183 return (m); 184 } 185 186 static int 187 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) 188 { 189 struct iovec iov[1]; 190 struct uio uio; 191 struct socket *so; 192 int error, flags; 193 194 so = ic->ic_socket; 195 196 memset(&uio, 0, sizeof(uio)); 197 iov[0].iov_base = buf; 198 iov[0].iov_len = len; 199 uio.uio_iov = iov; 200 uio.uio_iovcnt = 1; 201 uio.uio_offset = 0; 202 uio.uio_resid = len; 203 uio.uio_segflg = UIO_SYSSPACE; 204 uio.uio_rw = UIO_READ; 205 206 flags = MSG_DONTWAIT; 207 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 208 if (error != 0) { 209 ICL_DEBUG("soreceive error %d", error); 210 return (-1); 211 } 212 if (uio.uio_resid != 0) { 213 ICL_DEBUG("short read"); 214 return (-1); 215 } 216 217 return (0); 218 } 219 220 static void 221 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 222 { 223 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 224 225 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 226 m_freem(ip->ip_bhs_mbuf); 227 m_freem(ip->ip_ahs_mbuf); 228 m_freem(ip->ip_data_mbuf); 229 uma_zfree(icl_soft_pdu_zone, isp); 230 #ifdef DIAGNOSTIC 231 refcount_release(&ic->ic_outstanding_pdus); 232 #endif 233 } 234 235 static void 236 icl_soft_pdu_call_cb(struct icl_pdu *ip) 237 { 238 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 239 240 if (isp->cb != NULL) 241 isp->cb(ip, isp->error); 242 #ifdef DIAGNOSTIC 243 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 244 #endif 245 uma_zfree(icl_soft_pdu_zone, isp); 246 } 247 248 static void 249 icl_soft_pdu_done(struct icl_pdu *ip, int error) 250 { 251 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 252 253 if (error != 0) 254 isp->error = error; 255 256 m_freem(ip->ip_bhs_mbuf); 257 ip->ip_bhs_mbuf = NULL; 258 m_freem(ip->ip_ahs_mbuf); 259 ip->ip_ahs_mbuf = NULL; 260 m_freem(ip->ip_data_mbuf); 261 ip->ip_data_mbuf = NULL; 262 263 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 264 icl_soft_pdu_call_cb(ip); 265 } 266 267 static void 268 icl_soft_mbuf_done(struct mbuf *mb) 269 { 270 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 271 272 icl_soft_pdu_call_cb(&isp->ip); 273 } 274 275 /* 276 * Allocate icl_pdu with empty BHS to fill up by the caller. 277 */ 278 struct icl_pdu * 279 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 280 { 281 struct icl_soft_pdu *isp; 282 struct icl_pdu *ip; 283 284 #ifdef DIAGNOSTIC 285 refcount_acquire(&ic->ic_outstanding_pdus); 286 #endif 287 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 288 if (isp == NULL) { 289 ICL_WARN("failed to allocate soft PDU"); 290 #ifdef DIAGNOSTIC 291 refcount_release(&ic->ic_outstanding_pdus); 292 #endif 293 return (NULL); 294 } 295 ip = &isp->ip; 296 ip->ip_conn = ic; 297 298 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 299 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 300 if (ip->ip_bhs_mbuf == NULL) { 301 ICL_WARN("failed to allocate BHS mbuf"); 302 icl_soft_conn_pdu_free(ic, ip); 303 return (NULL); 304 } 305 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 306 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 307 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 308 309 return (ip); 310 } 311 312 static int 313 icl_pdu_ahs_length(const struct icl_pdu *request) 314 { 315 316 return (request->ip_bhs->bhs_total_ahs_len * 4); 317 } 318 319 static size_t 320 icl_pdu_data_segment_length(const struct icl_pdu *request) 321 { 322 uint32_t len = 0; 323 324 len += request->ip_bhs->bhs_data_segment_len[0]; 325 len <<= 8; 326 len += request->ip_bhs->bhs_data_segment_len[1]; 327 len <<= 8; 328 len += request->ip_bhs->bhs_data_segment_len[2]; 329 330 return (len); 331 } 332 333 size_t 334 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 335 const struct icl_pdu *request) 336 { 337 338 return (icl_pdu_data_segment_length(request)); 339 } 340 341 static void 342 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 343 { 344 345 response->ip_bhs->bhs_data_segment_len[2] = len; 346 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 347 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 348 } 349 350 static size_t 351 icl_pdu_padding(const struct icl_pdu *ip) 352 { 353 354 if ((ip->ip_data_len % 4) != 0) 355 return (4 - (ip->ip_data_len % 4)); 356 357 return (0); 358 } 359 360 static size_t 361 icl_pdu_size(const struct icl_pdu *response) 362 { 363 size_t len; 364 365 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 366 367 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 368 icl_pdu_padding(response); 369 if (response->ip_conn->ic_header_crc32c) 370 len += ISCSI_HEADER_DIGEST_SIZE; 371 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 372 len += ISCSI_DATA_DIGEST_SIZE; 373 374 return (len); 375 } 376 377 static int 378 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 379 { 380 381 if (icl_conn_receive_buf(request->ip_conn, 382 request->ip_bhs, sizeof(struct iscsi_bhs))) { 383 ICL_DEBUG("failed to receive BHS"); 384 return (-1); 385 } 386 387 *availablep -= sizeof(struct iscsi_bhs); 388 return (0); 389 } 390 391 static int 392 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 393 { 394 395 request->ip_ahs_len = icl_pdu_ahs_length(request); 396 if (request->ip_ahs_len == 0) 397 return (0); 398 399 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 400 request->ip_ahs_len); 401 if (request->ip_ahs_mbuf == NULL) { 402 ICL_DEBUG("failed to receive AHS"); 403 return (-1); 404 } 405 406 *availablep -= request->ip_ahs_len; 407 return (0); 408 } 409 410 static uint32_t 411 icl_mbuf_to_crc32c(const struct mbuf *m0) 412 { 413 uint32_t digest = 0xffffffff; 414 const struct mbuf *m; 415 416 for (m = m0; m != NULL; m = m->m_next) 417 digest = calculate_crc32c(digest, 418 mtod(m, const void *), m->m_len); 419 420 digest = digest ^ 0xffffffff; 421 422 return (digest); 423 } 424 425 static int 426 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 427 { 428 uint32_t received_digest, valid_digest; 429 430 if (request->ip_conn->ic_header_crc32c == false) 431 return (0); 432 433 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 434 if (icl_conn_receive_buf(request->ip_conn, 435 &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { 436 ICL_DEBUG("failed to receive header digest"); 437 return (-1); 438 } 439 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 440 441 /* Temporary attach AHS to BHS to calculate header digest. */ 442 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 443 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 444 request->ip_bhs_mbuf->m_next = NULL; 445 if (received_digest != valid_digest) { 446 ICL_WARN("header digest check failed; got 0x%x, " 447 "should be 0x%x", received_digest, valid_digest); 448 return (-1); 449 } 450 451 return (0); 452 } 453 454 /* 455 * Return the number of bytes that should be waiting in the receive socket 456 * before icl_pdu_receive_data_segment() gets called. 457 */ 458 static size_t 459 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 460 { 461 size_t len; 462 463 len = icl_pdu_data_segment_length(request); 464 if (len == 0) 465 return (0); 466 467 /* 468 * Account for the parts of data segment already read from 469 * the socket buffer. 470 */ 471 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 472 len -= request->ip_data_len; 473 474 /* 475 * Don't always wait for the full data segment to be delivered 476 * to the socket; this might badly affect performance due to 477 * TCP window scaling. 478 */ 479 if (len > partial_receive_len) { 480 #if 0 481 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 482 len, partial_receive_len)); 483 #endif 484 len = partial_receive_len; 485 486 return (len); 487 } 488 489 /* 490 * Account for padding. Note that due to the way code is written, 491 * the icl_pdu_receive_data_segment() must always receive padding 492 * along with the last part of data segment, because it would be 493 * impossible to tell whether we've already received the full data 494 * segment including padding, or without it. 495 */ 496 if ((len % 4) != 0) 497 len += 4 - (len % 4); 498 499 #if 0 500 ICL_DEBUG("need %zd bytes of data", len)); 501 #endif 502 503 return (len); 504 } 505 506 static int 507 icl_pdu_receive_data_segment(struct icl_pdu *request, 508 size_t *availablep, bool *more_neededp) 509 { 510 struct icl_conn *ic; 511 size_t len, padding = 0; 512 struct mbuf *m; 513 514 ic = request->ip_conn; 515 516 *more_neededp = false; 517 ic->ic_receive_len = 0; 518 519 len = icl_pdu_data_segment_length(request); 520 if (len == 0) 521 return (0); 522 523 if ((len % 4) != 0) 524 padding = 4 - (len % 4); 525 526 /* 527 * Account for already received parts of data segment. 528 */ 529 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 530 len -= request->ip_data_len; 531 532 if (len + padding > *availablep) { 533 /* 534 * Not enough data in the socket buffer. Receive as much 535 * as we can. Don't receive padding, since, obviously, it's 536 * not the end of data segment yet. 537 */ 538 #if 0 539 ICL_DEBUG("limited from %zd to %zd", 540 len + padding, *availablep - padding)); 541 #endif 542 len = *availablep - padding; 543 *more_neededp = true; 544 padding = 0; 545 } 546 547 /* 548 * Must not try to receive padding without at least one byte 549 * of actual data segment. 550 */ 551 if (len > 0) { 552 m = icl_conn_receive(request->ip_conn, len + padding); 553 if (m == NULL) { 554 ICL_DEBUG("failed to receive data segment"); 555 return (-1); 556 } 557 558 if (request->ip_data_mbuf == NULL) 559 request->ip_data_mbuf = m; 560 else 561 m_cat(request->ip_data_mbuf, m); 562 563 request->ip_data_len += len; 564 *availablep -= len + padding; 565 } else 566 ICL_DEBUG("len 0"); 567 568 if (*more_neededp) 569 ic->ic_receive_len = 570 icl_pdu_data_segment_receive_len(request); 571 572 return (0); 573 } 574 575 static int 576 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 577 { 578 uint32_t received_digest, valid_digest; 579 580 if (request->ip_conn->ic_data_crc32c == false) 581 return (0); 582 583 if (request->ip_data_len == 0) 584 return (0); 585 586 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 587 if (icl_conn_receive_buf(request->ip_conn, 588 &received_digest, ISCSI_DATA_DIGEST_SIZE)) { 589 ICL_DEBUG("failed to receive data digest"); 590 return (-1); 591 } 592 *availablep -= ISCSI_DATA_DIGEST_SIZE; 593 594 /* 595 * Note that ip_data_mbuf also contains padding; since digest 596 * calculation is supposed to include that, we iterate over 597 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 598 */ 599 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 600 if (received_digest != valid_digest) { 601 ICL_WARN("data digest check failed; got 0x%x, " 602 "should be 0x%x", received_digest, valid_digest); 603 return (-1); 604 } 605 606 return (0); 607 } 608 609 /* 610 * Somewhat contrary to the name, this attempts to receive only one 611 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 612 */ 613 static struct icl_pdu * 614 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 615 { 616 struct icl_pdu *request; 617 struct socket *so; 618 size_t len; 619 int error; 620 bool more_needed; 621 622 so = ic->ic_socket; 623 624 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 625 KASSERT(ic->ic_receive_pdu == NULL, 626 ("ic->ic_receive_pdu != NULL")); 627 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 628 if (request == NULL) { 629 ICL_DEBUG("failed to allocate PDU; " 630 "dropping connection"); 631 icl_conn_fail(ic); 632 return (NULL); 633 } 634 ic->ic_receive_pdu = request; 635 } else { 636 KASSERT(ic->ic_receive_pdu != NULL, 637 ("ic->ic_receive_pdu == NULL")); 638 request = ic->ic_receive_pdu; 639 } 640 641 if (*availablep < ic->ic_receive_len) { 642 #if 0 643 ICL_DEBUG("not enough data; need %zd, " 644 "have %zd", ic->ic_receive_len, *availablep); 645 #endif 646 return (NULL); 647 } 648 649 switch (ic->ic_receive_state) { 650 case ICL_CONN_STATE_BHS: 651 //ICL_DEBUG("receiving BHS"); 652 error = icl_pdu_receive_bhs(request, availablep); 653 if (error != 0) { 654 ICL_DEBUG("failed to receive BHS; " 655 "dropping connection"); 656 break; 657 } 658 659 /* 660 * We don't enforce any limit for AHS length; 661 * its length is stored in 8 bit field. 662 */ 663 664 len = icl_pdu_data_segment_length(request); 665 if (len > ic->ic_max_data_segment_length) { 666 ICL_WARN("received data segment " 667 "length %zd is larger than negotiated " 668 "MaxDataSegmentLength %zd; " 669 "dropping connection", 670 len, ic->ic_max_data_segment_length); 671 error = EINVAL; 672 break; 673 } 674 675 ic->ic_receive_state = ICL_CONN_STATE_AHS; 676 ic->ic_receive_len = icl_pdu_ahs_length(request); 677 break; 678 679 case ICL_CONN_STATE_AHS: 680 //ICL_DEBUG("receiving AHS"); 681 error = icl_pdu_receive_ahs(request, availablep); 682 if (error != 0) { 683 ICL_DEBUG("failed to receive AHS; " 684 "dropping connection"); 685 break; 686 } 687 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 688 if (ic->ic_header_crc32c == false) 689 ic->ic_receive_len = 0; 690 else 691 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 692 break; 693 694 case ICL_CONN_STATE_HEADER_DIGEST: 695 //ICL_DEBUG("receiving header digest"); 696 error = icl_pdu_check_header_digest(request, availablep); 697 if (error != 0) { 698 ICL_DEBUG("header digest failed; " 699 "dropping connection"); 700 break; 701 } 702 703 ic->ic_receive_state = ICL_CONN_STATE_DATA; 704 ic->ic_receive_len = 705 icl_pdu_data_segment_receive_len(request); 706 break; 707 708 case ICL_CONN_STATE_DATA: 709 //ICL_DEBUG("receiving data segment"); 710 error = icl_pdu_receive_data_segment(request, availablep, 711 &more_needed); 712 if (error != 0) { 713 ICL_DEBUG("failed to receive data segment;" 714 "dropping connection"); 715 break; 716 } 717 718 if (more_needed) 719 break; 720 721 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 722 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 723 ic->ic_receive_len = 0; 724 else 725 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 726 break; 727 728 case ICL_CONN_STATE_DATA_DIGEST: 729 //ICL_DEBUG("receiving data digest"); 730 error = icl_pdu_check_data_digest(request, availablep); 731 if (error != 0) { 732 ICL_DEBUG("data digest failed; " 733 "dropping connection"); 734 break; 735 } 736 737 /* 738 * We've received complete PDU; reset the receive state machine 739 * and return the PDU. 740 */ 741 ic->ic_receive_state = ICL_CONN_STATE_BHS; 742 ic->ic_receive_len = sizeof(struct iscsi_bhs); 743 ic->ic_receive_pdu = NULL; 744 return (request); 745 746 default: 747 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 748 } 749 750 if (error != 0) { 751 /* 752 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 753 * and will get freed in icl_soft_conn_close(). 754 */ 755 icl_conn_fail(ic); 756 } 757 758 return (NULL); 759 } 760 761 static void 762 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 763 { 764 struct icl_pdu *response; 765 struct socket *so; 766 767 so = ic->ic_socket; 768 769 /* 770 * This can never happen; we're careful to only mess with ic->ic_socket 771 * pointer when the send/receive threads are not running. 772 */ 773 KASSERT(so != NULL, ("NULL socket")); 774 775 for (;;) { 776 if (ic->ic_disconnecting) 777 return; 778 779 if (so->so_error != 0) { 780 ICL_DEBUG("connection error %d; " 781 "dropping connection", so->so_error); 782 icl_conn_fail(ic); 783 return; 784 } 785 786 /* 787 * Loop until we have a complete PDU or there is not enough 788 * data in the socket buffer. 789 */ 790 if (available < ic->ic_receive_len) { 791 #if 0 792 ICL_DEBUG("not enough data; have %zd, " 793 "need %zd", available, 794 ic->ic_receive_len); 795 #endif 796 return; 797 } 798 799 response = icl_conn_receive_pdu(ic, &available); 800 if (response == NULL) 801 continue; 802 803 if (response->ip_ahs_len > 0) { 804 ICL_WARN("received PDU with unsupported " 805 "AHS; opcode 0x%x; dropping connection", 806 response->ip_bhs->bhs_opcode); 807 icl_soft_conn_pdu_free(ic, response); 808 icl_conn_fail(ic); 809 return; 810 } 811 812 (ic->ic_receive)(response); 813 } 814 } 815 816 static void 817 icl_receive_thread(void *arg) 818 { 819 struct icl_conn *ic; 820 size_t available; 821 struct socket *so; 822 823 ic = arg; 824 so = ic->ic_socket; 825 826 for (;;) { 827 if (ic->ic_disconnecting) { 828 //ICL_DEBUG("terminating"); 829 break; 830 } 831 832 /* 833 * Set the low watermark, to be checked by 834 * soreadable() in icl_soupcall_receive() 835 * to avoid unnecessary wakeups until there 836 * is enough data received to read the PDU. 837 */ 838 SOCKBUF_LOCK(&so->so_rcv); 839 available = sbavail(&so->so_rcv); 840 if (available < ic->ic_receive_len) { 841 so->so_rcv.sb_lowat = ic->ic_receive_len; 842 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 843 } else 844 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 845 SOCKBUF_UNLOCK(&so->so_rcv); 846 847 icl_conn_receive_pdus(ic, available); 848 } 849 850 ICL_CONN_LOCK(ic); 851 ic->ic_receive_running = false; 852 cv_signal(&ic->ic_send_cv); 853 ICL_CONN_UNLOCK(ic); 854 kthread_exit(); 855 } 856 857 static int 858 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 859 { 860 struct icl_conn *ic; 861 862 if (!soreadable(so)) 863 return (SU_OK); 864 865 ic = arg; 866 cv_signal(&ic->ic_receive_cv); 867 return (SU_OK); 868 } 869 870 static int 871 icl_pdu_finalize(struct icl_pdu *request) 872 { 873 size_t padding, pdu_len; 874 uint32_t digest, zero = 0; 875 int ok; 876 struct icl_conn *ic; 877 878 ic = request->ip_conn; 879 880 icl_pdu_set_data_segment_length(request, request->ip_data_len); 881 882 pdu_len = icl_pdu_size(request); 883 884 if (ic->ic_header_crc32c) { 885 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 886 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 887 (void *)&digest); 888 if (ok != 1) { 889 ICL_WARN("failed to append header digest"); 890 return (1); 891 } 892 } 893 894 if (request->ip_data_len != 0) { 895 padding = icl_pdu_padding(request); 896 if (padding > 0) { 897 ok = m_append(request->ip_data_mbuf, padding, 898 (void *)&zero); 899 if (ok != 1) { 900 ICL_WARN("failed to append padding"); 901 return (1); 902 } 903 } 904 905 if (ic->ic_data_crc32c) { 906 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 907 908 ok = m_append(request->ip_data_mbuf, sizeof(digest), 909 (void *)&digest); 910 if (ok != 1) { 911 ICL_WARN("failed to append data digest"); 912 return (1); 913 } 914 } 915 916 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 917 request->ip_data_mbuf = NULL; 918 } 919 920 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 921 922 return (0); 923 } 924 925 static void 926 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 927 { 928 struct icl_pdu *request, *request2; 929 struct socket *so; 930 long available, size, size2; 931 int coalesced, error; 932 933 ICL_CONN_LOCK_ASSERT_NOT(ic); 934 935 so = ic->ic_socket; 936 937 SOCKBUF_LOCK(&so->so_snd); 938 /* 939 * Check how much space do we have for transmit. We can't just 940 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 941 * as it always frees the mbuf chain passed to it, even in case 942 * of error. 943 */ 944 available = sbspace(&so->so_snd); 945 946 /* 947 * Notify the socket upcall that we don't need wakeups 948 * for the time being. 949 */ 950 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 951 SOCKBUF_UNLOCK(&so->so_snd); 952 953 while (!STAILQ_EMPTY(queue)) { 954 request = STAILQ_FIRST(queue); 955 size = icl_pdu_size(request); 956 if (available < size) { 957 958 /* 959 * Set the low watermark, to be checked by 960 * sowriteable() in icl_soupcall_send() 961 * to avoid unnecessary wakeups until there 962 * is enough space for the PDU to fit. 963 */ 964 SOCKBUF_LOCK(&so->so_snd); 965 available = sbspace(&so->so_snd); 966 if (available < size) { 967 #if 1 968 ICL_DEBUG("no space to send; " 969 "have %ld, need %ld", 970 available, size); 971 #endif 972 so->so_snd.sb_lowat = max(size, 973 so->so_snd.sb_hiwat / 8); 974 SOCKBUF_UNLOCK(&so->so_snd); 975 return; 976 } 977 SOCKBUF_UNLOCK(&so->so_snd); 978 } 979 STAILQ_REMOVE_HEAD(queue, ip_next); 980 error = icl_pdu_finalize(request); 981 if (error != 0) { 982 ICL_DEBUG("failed to finalize PDU; " 983 "dropping connection"); 984 icl_soft_pdu_done(request, EIO); 985 icl_conn_fail(ic); 986 return; 987 } 988 if (coalesce) { 989 coalesced = 1; 990 for (;;) { 991 request2 = STAILQ_FIRST(queue); 992 if (request2 == NULL) 993 break; 994 size2 = icl_pdu_size(request2); 995 if (available < size + size2) 996 break; 997 STAILQ_REMOVE_HEAD(queue, ip_next); 998 error = icl_pdu_finalize(request2); 999 if (error != 0) { 1000 ICL_DEBUG("failed to finalize PDU; " 1001 "dropping connection"); 1002 icl_soft_pdu_done(request, EIO); 1003 icl_soft_pdu_done(request2, EIO); 1004 icl_conn_fail(ic); 1005 return; 1006 } 1007 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 1008 request2->ip_bhs_mbuf = NULL; 1009 request->ip_bhs_mbuf->m_pkthdr.len += size2; 1010 size += size2; 1011 STAILQ_REMOVE_AFTER(queue, request, ip_next); 1012 icl_soft_pdu_done(request2, 0); 1013 coalesced++; 1014 } 1015 #if 0 1016 if (coalesced > 1) { 1017 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 1018 coalesced, size); 1019 } 1020 #endif 1021 } 1022 available -= size; 1023 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 1024 NULL, MSG_DONTWAIT, curthread); 1025 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 1026 if (error != 0) { 1027 ICL_DEBUG("failed to send PDU, error %d; " 1028 "dropping connection", error); 1029 icl_soft_pdu_done(request, error); 1030 icl_conn_fail(ic); 1031 return; 1032 } 1033 icl_soft_pdu_done(request, 0); 1034 } 1035 } 1036 1037 static void 1038 icl_send_thread(void *arg) 1039 { 1040 struct icl_conn *ic; 1041 struct icl_pdu_stailq queue; 1042 1043 ic = arg; 1044 1045 STAILQ_INIT(&queue); 1046 1047 ICL_CONN_LOCK(ic); 1048 for (;;) { 1049 for (;;) { 1050 /* 1051 * If the local queue is empty, populate it from 1052 * the main one. This way the icl_conn_send_pdus() 1053 * can go through all the queued PDUs without holding 1054 * any locks. 1055 */ 1056 if (STAILQ_EMPTY(&queue)) 1057 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1058 1059 ic->ic_check_send_space = false; 1060 ICL_CONN_UNLOCK(ic); 1061 icl_conn_send_pdus(ic, &queue); 1062 ICL_CONN_LOCK(ic); 1063 1064 /* 1065 * The icl_soupcall_send() was called since the last 1066 * call to sbspace(); go around; 1067 */ 1068 if (ic->ic_check_send_space) 1069 continue; 1070 1071 /* 1072 * Local queue is empty, but we still have PDUs 1073 * in the main one; go around. 1074 */ 1075 if (STAILQ_EMPTY(&queue) && 1076 !STAILQ_EMPTY(&ic->ic_to_send)) 1077 continue; 1078 1079 /* 1080 * There might be some stuff in the local queue, 1081 * which didn't get sent due to not having enough send 1082 * space. Wait for socket upcall. 1083 */ 1084 break; 1085 } 1086 1087 if (ic->ic_disconnecting) { 1088 //ICL_DEBUG("terminating"); 1089 break; 1090 } 1091 1092 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1093 } 1094 1095 /* 1096 * We're exiting; move PDUs back to the main queue, so they can 1097 * get freed properly. At this point ordering doesn't matter. 1098 */ 1099 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1100 1101 ic->ic_send_running = false; 1102 cv_signal(&ic->ic_send_cv); 1103 ICL_CONN_UNLOCK(ic); 1104 kthread_exit(); 1105 } 1106 1107 static int 1108 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1109 { 1110 struct icl_conn *ic; 1111 1112 if (!sowriteable(so)) 1113 return (SU_OK); 1114 1115 ic = arg; 1116 1117 ICL_CONN_LOCK(ic); 1118 ic->ic_check_send_space = true; 1119 ICL_CONN_UNLOCK(ic); 1120 1121 cv_signal(&ic->ic_send_cv); 1122 1123 return (SU_OK); 1124 } 1125 1126 static int 1127 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1128 const void *addr, size_t len, int flags) 1129 { 1130 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1131 struct mbuf *mb, *newmb; 1132 size_t copylen, off = 0; 1133 1134 KASSERT(len > 0, ("len == 0")); 1135 1136 if (flags & ICL_NOCOPY) { 1137 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1138 if (newmb == NULL) { 1139 ICL_WARN("failed to allocate mbuf"); 1140 return (ENOMEM); 1141 } 1142 1143 newmb->m_flags |= M_RDONLY; 1144 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1145 icl_soft_mbuf_done, isp, NULL); 1146 newmb->m_len = len; 1147 } else { 1148 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1149 if (newmb == NULL) { 1150 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1151 return (ENOMEM); 1152 } 1153 1154 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1155 copylen = min(M_TRAILINGSPACE(mb), len - off); 1156 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1157 mb->m_len = copylen; 1158 off += copylen; 1159 } 1160 KASSERT(off == len, ("%s: off != len", __func__)); 1161 } 1162 1163 if (request->ip_data_mbuf == NULL) { 1164 request->ip_data_mbuf = newmb; 1165 request->ip_data_len = len; 1166 } else { 1167 m_cat(request->ip_data_mbuf, newmb); 1168 request->ip_data_len += len; 1169 } 1170 1171 return (0); 1172 } 1173 1174 void 1175 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1176 size_t off, void *addr, size_t len) 1177 { 1178 1179 m_copydata(ip->ip_data_mbuf, off, len, addr); 1180 } 1181 1182 static void 1183 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1184 { 1185 1186 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1187 } 1188 1189 static void 1190 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1191 icl_pdu_cb cb) 1192 { 1193 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1194 1195 ICL_CONN_LOCK_ASSERT(ic); 1196 isp->ref_cnt++; 1197 isp->cb = cb; 1198 1199 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1200 ICL_DEBUG("icl_pdu_queue on closed connection"); 1201 icl_soft_pdu_done(ip, ENOTCONN); 1202 return; 1203 } 1204 1205 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1206 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1207 /* 1208 * If the queue is not empty, someone else had already 1209 * signaled the send thread; no need to do that again, 1210 * just return. 1211 */ 1212 return; 1213 } 1214 1215 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1216 cv_signal(&ic->ic_send_cv); 1217 } 1218 1219 static struct icl_conn * 1220 icl_soft_new_conn(const char *name, struct mtx *lock) 1221 { 1222 struct icl_conn *ic; 1223 1224 refcount_acquire(&icl_ncons); 1225 1226 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1227 1228 STAILQ_INIT(&ic->ic_to_send); 1229 ic->ic_lock = lock; 1230 cv_init(&ic->ic_send_cv, "icl_tx"); 1231 cv_init(&ic->ic_receive_cv, "icl_rx"); 1232 #ifdef DIAGNOSTIC 1233 refcount_init(&ic->ic_outstanding_pdus, 0); 1234 #endif 1235 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1236 ic->ic_name = name; 1237 ic->ic_offload = "None"; 1238 ic->ic_unmapped = false; 1239 1240 return (ic); 1241 } 1242 1243 void 1244 icl_soft_conn_free(struct icl_conn *ic) 1245 { 1246 1247 #ifdef DIAGNOSTIC 1248 KASSERT(ic->ic_outstanding_pdus == 0, 1249 ("destroying session with %d outstanding PDUs", 1250 ic->ic_outstanding_pdus)); 1251 #endif 1252 cv_destroy(&ic->ic_send_cv); 1253 cv_destroy(&ic->ic_receive_cv); 1254 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1255 refcount_release(&icl_ncons); 1256 } 1257 1258 static int 1259 icl_conn_start(struct icl_conn *ic) 1260 { 1261 size_t minspace; 1262 struct sockopt opt; 1263 int error, one = 1; 1264 1265 ICL_CONN_LOCK(ic); 1266 1267 /* 1268 * XXX: Ugly hack. 1269 */ 1270 if (ic->ic_socket == NULL) { 1271 ICL_CONN_UNLOCK(ic); 1272 return (EINVAL); 1273 } 1274 1275 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1276 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1277 ic->ic_disconnecting = false; 1278 1279 ICL_CONN_UNLOCK(ic); 1280 1281 /* 1282 * For sendspace, this is required because the current code cannot 1283 * send a PDU in pieces; thus, the minimum buffer size is equal 1284 * to the maximum PDU size. "+4" is to account for possible padding. 1285 * 1286 * What we should actually do here is to use autoscaling, but set 1287 * some minimal buffer size to "minspace". I don't know a way to do 1288 * that, though. 1289 */ 1290 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1291 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1292 if (sendspace < minspace) { 1293 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1294 minspace); 1295 sendspace = minspace; 1296 } 1297 if (recvspace < minspace) { 1298 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1299 minspace); 1300 recvspace = minspace; 1301 } 1302 1303 error = soreserve(ic->ic_socket, sendspace, recvspace); 1304 if (error != 0) { 1305 ICL_WARN("soreserve failed with error %d", error); 1306 icl_soft_conn_close(ic); 1307 return (error); 1308 } 1309 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1310 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1311 1312 /* 1313 * Disable Nagle. 1314 */ 1315 bzero(&opt, sizeof(opt)); 1316 opt.sopt_dir = SOPT_SET; 1317 opt.sopt_level = IPPROTO_TCP; 1318 opt.sopt_name = TCP_NODELAY; 1319 opt.sopt_val = &one; 1320 opt.sopt_valsize = sizeof(one); 1321 error = sosetopt(ic->ic_socket, &opt); 1322 if (error != 0) { 1323 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1324 icl_soft_conn_close(ic); 1325 return (error); 1326 } 1327 1328 /* 1329 * Register socket upcall, to get notified about incoming PDUs 1330 * and free space to send outgoing ones. 1331 */ 1332 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1333 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1334 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1335 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1336 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1337 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1338 1339 /* 1340 * Start threads. 1341 */ 1342 ICL_CONN_LOCK(ic); 1343 ic->ic_send_running = ic->ic_receive_running = true; 1344 ICL_CONN_UNLOCK(ic); 1345 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1346 ic->ic_name); 1347 if (error != 0) { 1348 ICL_WARN("kthread_add(9) failed with error %d", error); 1349 ICL_CONN_LOCK(ic); 1350 ic->ic_send_running = ic->ic_receive_running = false; 1351 cv_signal(&ic->ic_send_cv); 1352 ICL_CONN_UNLOCK(ic); 1353 icl_soft_conn_close(ic); 1354 return (error); 1355 } 1356 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1357 ic->ic_name); 1358 if (error != 0) { 1359 ICL_WARN("kthread_add(9) failed with error %d", error); 1360 ICL_CONN_LOCK(ic); 1361 ic->ic_receive_running = false; 1362 cv_signal(&ic->ic_send_cv); 1363 ICL_CONN_UNLOCK(ic); 1364 icl_soft_conn_close(ic); 1365 return (error); 1366 } 1367 1368 return (0); 1369 } 1370 1371 int 1372 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1373 { 1374 struct file *fp; 1375 struct socket *so; 1376 cap_rights_t rights; 1377 int error; 1378 1379 ICL_CONN_LOCK_ASSERT_NOT(ic); 1380 1381 #ifdef ICL_KERNEL_PROXY 1382 /* 1383 * We're transitioning to Full Feature phase, and we don't 1384 * really care. 1385 */ 1386 if (fd == 0) { 1387 ICL_CONN_LOCK(ic); 1388 if (ic->ic_socket == NULL) { 1389 ICL_CONN_UNLOCK(ic); 1390 ICL_WARN("proxy handoff without connect"); 1391 return (EINVAL); 1392 } 1393 ICL_CONN_UNLOCK(ic); 1394 return (0); 1395 } 1396 #endif 1397 1398 /* 1399 * Steal the socket from userland. 1400 */ 1401 error = fget(curthread, fd, 1402 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1403 if (error != 0) 1404 return (error); 1405 if (fp->f_type != DTYPE_SOCKET) { 1406 fdrop(fp, curthread); 1407 return (EINVAL); 1408 } 1409 so = fp->f_data; 1410 if (so->so_type != SOCK_STREAM) { 1411 fdrop(fp, curthread); 1412 return (EINVAL); 1413 } 1414 1415 ICL_CONN_LOCK(ic); 1416 1417 if (ic->ic_socket != NULL) { 1418 ICL_CONN_UNLOCK(ic); 1419 fdrop(fp, curthread); 1420 return (EBUSY); 1421 } 1422 1423 ic->ic_socket = fp->f_data; 1424 fp->f_ops = &badfileops; 1425 fp->f_data = NULL; 1426 fdrop(fp, curthread); 1427 ICL_CONN_UNLOCK(ic); 1428 1429 error = icl_conn_start(ic); 1430 1431 return (error); 1432 } 1433 1434 void 1435 icl_soft_conn_close(struct icl_conn *ic) 1436 { 1437 struct icl_pdu *pdu; 1438 struct socket *so; 1439 1440 ICL_CONN_LOCK(ic); 1441 1442 /* 1443 * Wake up the threads, so they can properly terminate. 1444 */ 1445 ic->ic_disconnecting = true; 1446 while (ic->ic_receive_running || ic->ic_send_running) { 1447 cv_signal(&ic->ic_receive_cv); 1448 cv_signal(&ic->ic_send_cv); 1449 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1450 } 1451 1452 /* Some other thread could close the connection same time. */ 1453 so = ic->ic_socket; 1454 if (so == NULL) { 1455 ICL_CONN_UNLOCK(ic); 1456 return; 1457 } 1458 ic->ic_socket = NULL; 1459 1460 /* 1461 * Deregister socket upcalls. 1462 */ 1463 ICL_CONN_UNLOCK(ic); 1464 SOCKBUF_LOCK(&so->so_snd); 1465 if (so->so_snd.sb_upcall != NULL) 1466 soupcall_clear(so, SO_SND); 1467 SOCKBUF_UNLOCK(&so->so_snd); 1468 SOCKBUF_LOCK(&so->so_rcv); 1469 if (so->so_rcv.sb_upcall != NULL) 1470 soupcall_clear(so, SO_RCV); 1471 SOCKBUF_UNLOCK(&so->so_rcv); 1472 soclose(so); 1473 ICL_CONN_LOCK(ic); 1474 1475 if (ic->ic_receive_pdu != NULL) { 1476 //ICL_DEBUG("freeing partially received PDU"); 1477 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); 1478 ic->ic_receive_pdu = NULL; 1479 } 1480 1481 /* 1482 * Remove any outstanding PDUs from the send queue. 1483 */ 1484 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1485 pdu = STAILQ_FIRST(&ic->ic_to_send); 1486 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1487 icl_soft_pdu_done(pdu, ENOTCONN); 1488 } 1489 1490 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1491 ("destroying session with non-empty send queue")); 1492 ICL_CONN_UNLOCK(ic); 1493 } 1494 1495 int 1496 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1497 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1498 { 1499 1500 return (0); 1501 } 1502 1503 void 1504 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1505 { 1506 } 1507 1508 int 1509 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1510 uint32_t *transfer_tag, void **prvp) 1511 { 1512 1513 return (0); 1514 } 1515 1516 void 1517 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1518 { 1519 } 1520 1521 static int 1522 icl_soft_limits(struct icl_drv_limits *idl) 1523 { 1524 1525 idl->idl_max_recv_data_segment_length = 128 * 1024; 1526 idl->idl_max_send_data_segment_length = 128 * 1024; 1527 idl->idl_max_burst_length = 262144; 1528 idl->idl_first_burst_length = 65536; 1529 1530 return (0); 1531 } 1532 1533 #ifdef ICL_KERNEL_PROXY 1534 int 1535 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1536 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1537 { 1538 1539 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1540 from_sa, to_sa)); 1541 } 1542 1543 int 1544 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1545 { 1546 int error; 1547 1548 ICL_CONN_LOCK_ASSERT_NOT(ic); 1549 1550 if (so->so_type != SOCK_STREAM) 1551 return (EINVAL); 1552 1553 ICL_CONN_LOCK(ic); 1554 if (ic->ic_socket != NULL) { 1555 ICL_CONN_UNLOCK(ic); 1556 return (EBUSY); 1557 } 1558 ic->ic_socket = so; 1559 ICL_CONN_UNLOCK(ic); 1560 1561 error = icl_conn_start(ic); 1562 1563 return (error); 1564 } 1565 #endif /* ICL_KERNEL_PROXY */ 1566 1567 static int 1568 icl_soft_load(void) 1569 { 1570 int error; 1571 1572 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1573 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1574 UMA_ALIGN_PTR, 0); 1575 refcount_init(&icl_ncons, 0); 1576 1577 /* 1578 * The reason we call this "none" is that to the user, 1579 * it's known as "offload driver"; "offload driver: soft" 1580 * doesn't make much sense. 1581 */ 1582 error = icl_register("none", false, 0, 1583 icl_soft_limits, icl_soft_new_conn); 1584 KASSERT(error == 0, ("failed to register")); 1585 1586 #if defined(ICL_KERNEL_PROXY) && 0 1587 /* 1588 * Debugging aid for kernel proxy functionality. 1589 */ 1590 error = icl_register("proxytest", true, 0, 1591 icl_soft_limits, icl_soft_new_conn); 1592 KASSERT(error == 0, ("failed to register")); 1593 #endif 1594 1595 return (error); 1596 } 1597 1598 static int 1599 icl_soft_unload(void) 1600 { 1601 1602 if (icl_ncons != 0) 1603 return (EBUSY); 1604 1605 icl_unregister("none", false); 1606 #if defined(ICL_KERNEL_PROXY) && 0 1607 icl_unregister("proxytest", true); 1608 #endif 1609 1610 uma_zdestroy(icl_soft_pdu_zone); 1611 1612 return (0); 1613 } 1614 1615 static int 1616 icl_soft_modevent(module_t mod, int what, void *arg) 1617 { 1618 1619 switch (what) { 1620 case MOD_LOAD: 1621 return (icl_soft_load()); 1622 case MOD_UNLOAD: 1623 return (icl_soft_unload()); 1624 default: 1625 return (EINVAL); 1626 } 1627 } 1628 1629 moduledata_t icl_soft_data = { 1630 "icl_soft", 1631 icl_soft_modevent, 1632 0 1633 }; 1634 1635 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1636 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1637 MODULE_VERSION(icl_soft, 1); 1638