1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/condvar.h> 42 #include <sys/conf.h> 43 #include <sys/gsb_crc32.h> 44 #include <sys/file.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mbuf.h> 49 #include <sys/mutex.h> 50 #include <sys/module.h> 51 #include <sys/protosw.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/sysctl.h> 55 #include <sys/systm.h> 56 #include <sys/sx.h> 57 #include <sys/uio.h> 58 #include <vm/uma.h> 59 #include <netinet/in.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/iscsi/icl.h> 63 #include <dev/iscsi/iscsi_proto.h> 64 #include <icl_conn_if.h> 65 66 struct icl_soft_pdu { 67 struct icl_pdu ip; 68 69 /* soft specific stuff goes here. */ 70 u_int ref_cnt; 71 icl_pdu_cb cb; 72 int error; 73 }; 74 75 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 76 "Software iSCSI"); 77 static int coalesce = 1; 78 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 79 &coalesce, 0, "Try to coalesce PDUs before sending"); 80 static int partial_receive_len = 256 * 1024; 81 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 82 &partial_receive_len, 0, "Minimum read size for partially received " 83 "data segment"); 84 static int max_data_segment_length = 256 * 1024; 85 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 86 &max_data_segment_length, 0, "Maximum data segment length"); 87 static int first_burst_length = 1024 * 1024; 88 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 89 &first_burst_length, 0, "First burst length"); 90 static int max_burst_length = 1024 * 1024; 91 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 92 &max_burst_length, 0, "Maximum burst length"); 93 static int sendspace = 1536 * 1024; 94 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 95 &sendspace, 0, "Default send socket buffer size"); 96 static int recvspace = 1536 * 1024; 97 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 98 &recvspace, 0, "Default receive socket buffer size"); 99 100 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 101 static uma_zone_t icl_soft_pdu_zone; 102 103 static volatile u_int icl_ncons; 104 105 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 106 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 107 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 108 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 109 110 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 111 112 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 113 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 114 static icl_conn_pdu_data_segment_length_t 115 icl_soft_conn_pdu_data_segment_length; 116 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 117 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 118 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 119 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 120 static icl_conn_handoff_t icl_soft_conn_handoff; 121 static icl_conn_free_t icl_soft_conn_free; 122 static icl_conn_close_t icl_soft_conn_close; 123 static icl_conn_task_setup_t icl_soft_conn_task_setup; 124 static icl_conn_task_done_t icl_soft_conn_task_done; 125 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 126 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 127 #ifdef ICL_KERNEL_PROXY 128 static icl_conn_connect_t icl_soft_conn_connect; 129 #endif 130 131 static kobj_method_t icl_soft_methods[] = { 132 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 133 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 134 KOBJMETHOD(icl_conn_pdu_data_segment_length, 135 icl_soft_conn_pdu_data_segment_length), 136 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 137 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 138 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 139 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 140 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 141 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 142 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 143 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 144 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 145 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 146 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 147 #ifdef ICL_KERNEL_PROXY 148 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 149 #endif 150 { 0, 0 } 151 }; 152 153 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 154 155 static void 156 icl_conn_fail(struct icl_conn *ic) 157 { 158 if (ic->ic_socket == NULL) 159 return; 160 161 /* 162 * XXX 163 */ 164 ic->ic_socket->so_error = EDOOFUS; 165 (ic->ic_error)(ic); 166 } 167 168 static struct mbuf * 169 icl_conn_receive(struct icl_conn *ic, size_t len) 170 { 171 struct uio uio; 172 struct socket *so; 173 struct mbuf *m; 174 int error, flags; 175 176 so = ic->ic_socket; 177 178 memset(&uio, 0, sizeof(uio)); 179 uio.uio_resid = len; 180 181 flags = MSG_DONTWAIT; 182 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 183 if (error != 0) { 184 ICL_DEBUG("soreceive error %d", error); 185 return (NULL); 186 } 187 if (uio.uio_resid != 0) { 188 m_freem(m); 189 ICL_DEBUG("short read"); 190 return (NULL); 191 } 192 193 return (m); 194 } 195 196 static int 197 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) 198 { 199 struct iovec iov[1]; 200 struct uio uio; 201 struct socket *so; 202 int error, flags; 203 204 so = ic->ic_socket; 205 206 memset(&uio, 0, sizeof(uio)); 207 iov[0].iov_base = buf; 208 iov[0].iov_len = len; 209 uio.uio_iov = iov; 210 uio.uio_iovcnt = 1; 211 uio.uio_offset = 0; 212 uio.uio_resid = len; 213 uio.uio_segflg = UIO_SYSSPACE; 214 uio.uio_rw = UIO_READ; 215 216 flags = MSG_DONTWAIT; 217 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 218 if (error != 0) { 219 ICL_DEBUG("soreceive error %d", error); 220 return (-1); 221 } 222 if (uio.uio_resid != 0) { 223 ICL_DEBUG("short read"); 224 return (-1); 225 } 226 227 return (0); 228 } 229 230 static void 231 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 232 { 233 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 234 235 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 236 m_freem(ip->ip_bhs_mbuf); 237 m_freem(ip->ip_ahs_mbuf); 238 m_freem(ip->ip_data_mbuf); 239 uma_zfree(icl_soft_pdu_zone, isp); 240 #ifdef DIAGNOSTIC 241 refcount_release(&ic->ic_outstanding_pdus); 242 #endif 243 } 244 245 static void 246 icl_soft_pdu_call_cb(struct icl_pdu *ip) 247 { 248 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 249 250 if (isp->cb != NULL) 251 isp->cb(ip, isp->error); 252 #ifdef DIAGNOSTIC 253 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 254 #endif 255 uma_zfree(icl_soft_pdu_zone, isp); 256 } 257 258 static void 259 icl_soft_pdu_done(struct icl_pdu *ip, int error) 260 { 261 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 262 263 if (error != 0) 264 isp->error = error; 265 266 m_freem(ip->ip_bhs_mbuf); 267 ip->ip_bhs_mbuf = NULL; 268 m_freem(ip->ip_ahs_mbuf); 269 ip->ip_ahs_mbuf = NULL; 270 m_freem(ip->ip_data_mbuf); 271 ip->ip_data_mbuf = NULL; 272 273 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 274 icl_soft_pdu_call_cb(ip); 275 } 276 277 static void 278 icl_soft_mbuf_done(struct mbuf *mb) 279 { 280 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 281 282 icl_soft_pdu_call_cb(&isp->ip); 283 } 284 285 /* 286 * Allocate icl_pdu with empty BHS to fill up by the caller. 287 */ 288 struct icl_pdu * 289 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 290 { 291 struct icl_soft_pdu *isp; 292 struct icl_pdu *ip; 293 294 #ifdef DIAGNOSTIC 295 refcount_acquire(&ic->ic_outstanding_pdus); 296 #endif 297 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 298 if (isp == NULL) { 299 ICL_WARN("failed to allocate soft PDU"); 300 #ifdef DIAGNOSTIC 301 refcount_release(&ic->ic_outstanding_pdus); 302 #endif 303 return (NULL); 304 } 305 ip = &isp->ip; 306 ip->ip_conn = ic; 307 308 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 309 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 310 if (ip->ip_bhs_mbuf == NULL) { 311 ICL_WARN("failed to allocate BHS mbuf"); 312 icl_soft_conn_pdu_free(ic, ip); 313 return (NULL); 314 } 315 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 316 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 317 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 318 319 return (ip); 320 } 321 322 static int 323 icl_pdu_ahs_length(const struct icl_pdu *request) 324 { 325 326 return (request->ip_bhs->bhs_total_ahs_len * 4); 327 } 328 329 static size_t 330 icl_pdu_data_segment_length(const struct icl_pdu *request) 331 { 332 uint32_t len = 0; 333 334 len += request->ip_bhs->bhs_data_segment_len[0]; 335 len <<= 8; 336 len += request->ip_bhs->bhs_data_segment_len[1]; 337 len <<= 8; 338 len += request->ip_bhs->bhs_data_segment_len[2]; 339 340 return (len); 341 } 342 343 size_t 344 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 345 const struct icl_pdu *request) 346 { 347 348 return (icl_pdu_data_segment_length(request)); 349 } 350 351 static void 352 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 353 { 354 355 response->ip_bhs->bhs_data_segment_len[2] = len; 356 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 357 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 358 } 359 360 static size_t 361 icl_pdu_padding(const struct icl_pdu *ip) 362 { 363 364 if ((ip->ip_data_len % 4) != 0) 365 return (4 - (ip->ip_data_len % 4)); 366 367 return (0); 368 } 369 370 static size_t 371 icl_pdu_size(const struct icl_pdu *response) 372 { 373 size_t len; 374 375 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 376 377 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 378 icl_pdu_padding(response); 379 if (response->ip_conn->ic_header_crc32c) 380 len += ISCSI_HEADER_DIGEST_SIZE; 381 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 382 len += ISCSI_DATA_DIGEST_SIZE; 383 384 return (len); 385 } 386 387 static int 388 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 389 { 390 391 if (icl_conn_receive_buf(request->ip_conn, 392 request->ip_bhs, sizeof(struct iscsi_bhs))) { 393 ICL_DEBUG("failed to receive BHS"); 394 return (-1); 395 } 396 397 *availablep -= sizeof(struct iscsi_bhs); 398 return (0); 399 } 400 401 static int 402 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 403 { 404 405 request->ip_ahs_len = icl_pdu_ahs_length(request); 406 if (request->ip_ahs_len == 0) 407 return (0); 408 409 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 410 request->ip_ahs_len); 411 if (request->ip_ahs_mbuf == NULL) { 412 ICL_DEBUG("failed to receive AHS"); 413 return (-1); 414 } 415 416 *availablep -= request->ip_ahs_len; 417 return (0); 418 } 419 420 static uint32_t 421 icl_mbuf_to_crc32c(const struct mbuf *m0) 422 { 423 uint32_t digest = 0xffffffff; 424 const struct mbuf *m; 425 426 for (m = m0; m != NULL; m = m->m_next) 427 digest = calculate_crc32c(digest, 428 mtod(m, const void *), m->m_len); 429 430 digest = digest ^ 0xffffffff; 431 432 return (digest); 433 } 434 435 static int 436 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 437 { 438 uint32_t received_digest, valid_digest; 439 440 if (request->ip_conn->ic_header_crc32c == false) 441 return (0); 442 443 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 444 if (icl_conn_receive_buf(request->ip_conn, 445 &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { 446 ICL_DEBUG("failed to receive header digest"); 447 return (-1); 448 } 449 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 450 451 /* Temporary attach AHS to BHS to calculate header digest. */ 452 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 453 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 454 request->ip_bhs_mbuf->m_next = NULL; 455 if (received_digest != valid_digest) { 456 ICL_WARN("header digest check failed; got 0x%x, " 457 "should be 0x%x", received_digest, valid_digest); 458 return (-1); 459 } 460 461 return (0); 462 } 463 464 /* 465 * Return the number of bytes that should be waiting in the receive socket 466 * before icl_pdu_receive_data_segment() gets called. 467 */ 468 static size_t 469 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 470 { 471 size_t len; 472 473 len = icl_pdu_data_segment_length(request); 474 if (len == 0) 475 return (0); 476 477 /* 478 * Account for the parts of data segment already read from 479 * the socket buffer. 480 */ 481 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 482 len -= request->ip_data_len; 483 484 /* 485 * Don't always wait for the full data segment to be delivered 486 * to the socket; this might badly affect performance due to 487 * TCP window scaling. 488 */ 489 if (len > partial_receive_len) { 490 #if 0 491 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 492 len, partial_receive_len)); 493 #endif 494 len = partial_receive_len; 495 496 return (len); 497 } 498 499 /* 500 * Account for padding. Note that due to the way code is written, 501 * the icl_pdu_receive_data_segment() must always receive padding 502 * along with the last part of data segment, because it would be 503 * impossible to tell whether we've already received the full data 504 * segment including padding, or without it. 505 */ 506 if ((len % 4) != 0) 507 len += 4 - (len % 4); 508 509 #if 0 510 ICL_DEBUG("need %zd bytes of data", len)); 511 #endif 512 513 return (len); 514 } 515 516 static int 517 icl_pdu_receive_data_segment(struct icl_pdu *request, 518 size_t *availablep, bool *more_neededp) 519 { 520 struct icl_conn *ic; 521 size_t len, padding = 0; 522 struct mbuf *m; 523 524 ic = request->ip_conn; 525 526 *more_neededp = false; 527 ic->ic_receive_len = 0; 528 529 len = icl_pdu_data_segment_length(request); 530 if (len == 0) 531 return (0); 532 533 if ((len % 4) != 0) 534 padding = 4 - (len % 4); 535 536 /* 537 * Account for already received parts of data segment. 538 */ 539 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 540 len -= request->ip_data_len; 541 542 if (len + padding > *availablep) { 543 /* 544 * Not enough data in the socket buffer. Receive as much 545 * as we can. Don't receive padding, since, obviously, it's 546 * not the end of data segment yet. 547 */ 548 #if 0 549 ICL_DEBUG("limited from %zd to %zd", 550 len + padding, *availablep - padding)); 551 #endif 552 len = *availablep - padding; 553 *more_neededp = true; 554 padding = 0; 555 } 556 557 /* 558 * Must not try to receive padding without at least one byte 559 * of actual data segment. 560 */ 561 if (len > 0) { 562 m = icl_conn_receive(request->ip_conn, len + padding); 563 if (m == NULL) { 564 ICL_DEBUG("failed to receive data segment"); 565 return (-1); 566 } 567 568 if (request->ip_data_mbuf == NULL) 569 request->ip_data_mbuf = m; 570 else 571 m_cat(request->ip_data_mbuf, m); 572 573 request->ip_data_len += len; 574 *availablep -= len + padding; 575 } else 576 ICL_DEBUG("len 0"); 577 578 if (*more_neededp) 579 ic->ic_receive_len = 580 icl_pdu_data_segment_receive_len(request); 581 582 return (0); 583 } 584 585 static int 586 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 587 { 588 uint32_t received_digest, valid_digest; 589 590 if (request->ip_conn->ic_data_crc32c == false) 591 return (0); 592 593 if (request->ip_data_len == 0) 594 return (0); 595 596 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 597 if (icl_conn_receive_buf(request->ip_conn, 598 &received_digest, ISCSI_DATA_DIGEST_SIZE)) { 599 ICL_DEBUG("failed to receive data digest"); 600 return (-1); 601 } 602 *availablep -= ISCSI_DATA_DIGEST_SIZE; 603 604 /* 605 * Note that ip_data_mbuf also contains padding; since digest 606 * calculation is supposed to include that, we iterate over 607 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 608 */ 609 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 610 if (received_digest != valid_digest) { 611 ICL_WARN("data digest check failed; got 0x%x, " 612 "should be 0x%x", received_digest, valid_digest); 613 return (-1); 614 } 615 616 return (0); 617 } 618 619 /* 620 * Somewhat contrary to the name, this attempts to receive only one 621 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 622 */ 623 static struct icl_pdu * 624 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 625 { 626 struct icl_pdu *request; 627 struct socket *so; 628 size_t len; 629 int error; 630 bool more_needed; 631 632 so = ic->ic_socket; 633 634 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 635 KASSERT(ic->ic_receive_pdu == NULL, 636 ("ic->ic_receive_pdu != NULL")); 637 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 638 if (request == NULL) { 639 ICL_DEBUG("failed to allocate PDU; " 640 "dropping connection"); 641 icl_conn_fail(ic); 642 return (NULL); 643 } 644 ic->ic_receive_pdu = request; 645 } else { 646 KASSERT(ic->ic_receive_pdu != NULL, 647 ("ic->ic_receive_pdu == NULL")); 648 request = ic->ic_receive_pdu; 649 } 650 651 if (*availablep < ic->ic_receive_len) { 652 #if 0 653 ICL_DEBUG("not enough data; need %zd, " 654 "have %zd", ic->ic_receive_len, *availablep); 655 #endif 656 return (NULL); 657 } 658 659 switch (ic->ic_receive_state) { 660 case ICL_CONN_STATE_BHS: 661 //ICL_DEBUG("receiving BHS"); 662 error = icl_pdu_receive_bhs(request, availablep); 663 if (error != 0) { 664 ICL_DEBUG("failed to receive BHS; " 665 "dropping connection"); 666 break; 667 } 668 669 /* 670 * We don't enforce any limit for AHS length; 671 * its length is stored in 8 bit field. 672 */ 673 674 len = icl_pdu_data_segment_length(request); 675 if (len > ic->ic_max_data_segment_length) { 676 ICL_WARN("received data segment " 677 "length %zd is larger than negotiated; " 678 "dropping connection", len); 679 error = EINVAL; 680 break; 681 } 682 683 ic->ic_receive_state = ICL_CONN_STATE_AHS; 684 ic->ic_receive_len = icl_pdu_ahs_length(request); 685 break; 686 687 case ICL_CONN_STATE_AHS: 688 //ICL_DEBUG("receiving AHS"); 689 error = icl_pdu_receive_ahs(request, availablep); 690 if (error != 0) { 691 ICL_DEBUG("failed to receive AHS; " 692 "dropping connection"); 693 break; 694 } 695 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 696 if (ic->ic_header_crc32c == false) 697 ic->ic_receive_len = 0; 698 else 699 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 700 break; 701 702 case ICL_CONN_STATE_HEADER_DIGEST: 703 //ICL_DEBUG("receiving header digest"); 704 error = icl_pdu_check_header_digest(request, availablep); 705 if (error != 0) { 706 ICL_DEBUG("header digest failed; " 707 "dropping connection"); 708 break; 709 } 710 711 ic->ic_receive_state = ICL_CONN_STATE_DATA; 712 ic->ic_receive_len = 713 icl_pdu_data_segment_receive_len(request); 714 break; 715 716 case ICL_CONN_STATE_DATA: 717 //ICL_DEBUG("receiving data segment"); 718 error = icl_pdu_receive_data_segment(request, availablep, 719 &more_needed); 720 if (error != 0) { 721 ICL_DEBUG("failed to receive data segment;" 722 "dropping connection"); 723 break; 724 } 725 726 if (more_needed) 727 break; 728 729 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 730 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 731 ic->ic_receive_len = 0; 732 else 733 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 734 break; 735 736 case ICL_CONN_STATE_DATA_DIGEST: 737 //ICL_DEBUG("receiving data digest"); 738 error = icl_pdu_check_data_digest(request, availablep); 739 if (error != 0) { 740 ICL_DEBUG("data digest failed; " 741 "dropping connection"); 742 break; 743 } 744 745 /* 746 * We've received complete PDU; reset the receive state machine 747 * and return the PDU. 748 */ 749 ic->ic_receive_state = ICL_CONN_STATE_BHS; 750 ic->ic_receive_len = sizeof(struct iscsi_bhs); 751 ic->ic_receive_pdu = NULL; 752 return (request); 753 754 default: 755 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 756 } 757 758 if (error != 0) { 759 /* 760 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 761 * and will get freed in icl_soft_conn_close(). 762 */ 763 icl_conn_fail(ic); 764 } 765 766 return (NULL); 767 } 768 769 static void 770 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 771 { 772 struct icl_pdu *response; 773 struct socket *so; 774 775 so = ic->ic_socket; 776 777 /* 778 * This can never happen; we're careful to only mess with ic->ic_socket 779 * pointer when the send/receive threads are not running. 780 */ 781 KASSERT(so != NULL, ("NULL socket")); 782 783 for (;;) { 784 if (ic->ic_disconnecting) 785 return; 786 787 if (so->so_error != 0) { 788 ICL_DEBUG("connection error %d; " 789 "dropping connection", so->so_error); 790 icl_conn_fail(ic); 791 return; 792 } 793 794 /* 795 * Loop until we have a complete PDU or there is not enough 796 * data in the socket buffer. 797 */ 798 if (available < ic->ic_receive_len) { 799 #if 0 800 ICL_DEBUG("not enough data; have %zd, " 801 "need %zd", available, 802 ic->ic_receive_len); 803 #endif 804 return; 805 } 806 807 response = icl_conn_receive_pdu(ic, &available); 808 if (response == NULL) 809 continue; 810 811 if (response->ip_ahs_len > 0) { 812 ICL_WARN("received PDU with unsupported " 813 "AHS; opcode 0x%x; dropping connection", 814 response->ip_bhs->bhs_opcode); 815 icl_soft_conn_pdu_free(ic, response); 816 icl_conn_fail(ic); 817 return; 818 } 819 820 (ic->ic_receive)(response); 821 } 822 } 823 824 static void 825 icl_receive_thread(void *arg) 826 { 827 struct icl_conn *ic; 828 size_t available; 829 struct socket *so; 830 831 ic = arg; 832 so = ic->ic_socket; 833 834 for (;;) { 835 if (ic->ic_disconnecting) { 836 //ICL_DEBUG("terminating"); 837 break; 838 } 839 840 /* 841 * Set the low watermark, to be checked by 842 * soreadable() in icl_soupcall_receive() 843 * to avoid unnecessary wakeups until there 844 * is enough data received to read the PDU. 845 */ 846 SOCKBUF_LOCK(&so->so_rcv); 847 available = sbavail(&so->so_rcv); 848 if (available < ic->ic_receive_len) { 849 so->so_rcv.sb_lowat = ic->ic_receive_len; 850 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 851 } else 852 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 853 SOCKBUF_UNLOCK(&so->so_rcv); 854 855 icl_conn_receive_pdus(ic, available); 856 } 857 858 ICL_CONN_LOCK(ic); 859 ic->ic_receive_running = false; 860 cv_signal(&ic->ic_send_cv); 861 ICL_CONN_UNLOCK(ic); 862 kthread_exit(); 863 } 864 865 static int 866 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 867 { 868 struct icl_conn *ic; 869 870 if (!soreadable(so)) 871 return (SU_OK); 872 873 ic = arg; 874 cv_signal(&ic->ic_receive_cv); 875 return (SU_OK); 876 } 877 878 static int 879 icl_pdu_finalize(struct icl_pdu *request) 880 { 881 size_t padding, pdu_len; 882 uint32_t digest, zero = 0; 883 int ok; 884 struct icl_conn *ic; 885 886 ic = request->ip_conn; 887 888 icl_pdu_set_data_segment_length(request, request->ip_data_len); 889 890 pdu_len = icl_pdu_size(request); 891 892 if (ic->ic_header_crc32c) { 893 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 894 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 895 (void *)&digest); 896 if (ok != 1) { 897 ICL_WARN("failed to append header digest"); 898 return (1); 899 } 900 } 901 902 if (request->ip_data_len != 0) { 903 padding = icl_pdu_padding(request); 904 if (padding > 0) { 905 ok = m_append(request->ip_data_mbuf, padding, 906 (void *)&zero); 907 if (ok != 1) { 908 ICL_WARN("failed to append padding"); 909 return (1); 910 } 911 } 912 913 if (ic->ic_data_crc32c) { 914 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 915 916 ok = m_append(request->ip_data_mbuf, sizeof(digest), 917 (void *)&digest); 918 if (ok != 1) { 919 ICL_WARN("failed to append data digest"); 920 return (1); 921 } 922 } 923 924 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 925 request->ip_data_mbuf = NULL; 926 } 927 928 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 929 930 return (0); 931 } 932 933 static void 934 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 935 { 936 struct icl_pdu *request, *request2; 937 struct socket *so; 938 long available, size, size2; 939 int coalesced, error; 940 941 ICL_CONN_LOCK_ASSERT_NOT(ic); 942 943 so = ic->ic_socket; 944 945 SOCKBUF_LOCK(&so->so_snd); 946 /* 947 * Check how much space do we have for transmit. We can't just 948 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 949 * as it always frees the mbuf chain passed to it, even in case 950 * of error. 951 */ 952 available = sbspace(&so->so_snd); 953 954 /* 955 * Notify the socket upcall that we don't need wakeups 956 * for the time being. 957 */ 958 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 959 SOCKBUF_UNLOCK(&so->so_snd); 960 961 while (!STAILQ_EMPTY(queue)) { 962 request = STAILQ_FIRST(queue); 963 size = icl_pdu_size(request); 964 if (available < size) { 965 /* 966 * Set the low watermark, to be checked by 967 * sowriteable() in icl_soupcall_send() 968 * to avoid unnecessary wakeups until there 969 * is enough space for the PDU to fit. 970 */ 971 SOCKBUF_LOCK(&so->so_snd); 972 available = sbspace(&so->so_snd); 973 if (available < size) { 974 #if 1 975 ICL_DEBUG("no space to send; " 976 "have %ld, need %ld", 977 available, size); 978 #endif 979 so->so_snd.sb_lowat = max(size, 980 so->so_snd.sb_hiwat / 8); 981 SOCKBUF_UNLOCK(&so->so_snd); 982 return; 983 } 984 SOCKBUF_UNLOCK(&so->so_snd); 985 } 986 STAILQ_REMOVE_HEAD(queue, ip_next); 987 error = icl_pdu_finalize(request); 988 if (error != 0) { 989 ICL_DEBUG("failed to finalize PDU; " 990 "dropping connection"); 991 icl_soft_pdu_done(request, EIO); 992 icl_conn_fail(ic); 993 return; 994 } 995 if (coalesce) { 996 coalesced = 1; 997 for (;;) { 998 request2 = STAILQ_FIRST(queue); 999 if (request2 == NULL) 1000 break; 1001 size2 = icl_pdu_size(request2); 1002 if (available < size + size2) 1003 break; 1004 STAILQ_REMOVE_HEAD(queue, ip_next); 1005 error = icl_pdu_finalize(request2); 1006 if (error != 0) { 1007 ICL_DEBUG("failed to finalize PDU; " 1008 "dropping connection"); 1009 icl_soft_pdu_done(request, EIO); 1010 icl_soft_pdu_done(request2, EIO); 1011 icl_conn_fail(ic); 1012 return; 1013 } 1014 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 1015 request2->ip_bhs_mbuf = NULL; 1016 request->ip_bhs_mbuf->m_pkthdr.len += size2; 1017 size += size2; 1018 STAILQ_REMOVE_AFTER(queue, request, ip_next); 1019 icl_soft_pdu_done(request2, 0); 1020 coalesced++; 1021 } 1022 #if 0 1023 if (coalesced > 1) { 1024 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 1025 coalesced, size); 1026 } 1027 #endif 1028 } 1029 available -= size; 1030 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 1031 NULL, MSG_DONTWAIT, curthread); 1032 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 1033 if (error != 0) { 1034 ICL_DEBUG("failed to send PDU, error %d; " 1035 "dropping connection", error); 1036 icl_soft_pdu_done(request, error); 1037 icl_conn_fail(ic); 1038 return; 1039 } 1040 icl_soft_pdu_done(request, 0); 1041 } 1042 } 1043 1044 static void 1045 icl_send_thread(void *arg) 1046 { 1047 struct icl_conn *ic; 1048 struct icl_pdu_stailq queue; 1049 1050 ic = arg; 1051 1052 STAILQ_INIT(&queue); 1053 1054 ICL_CONN_LOCK(ic); 1055 for (;;) { 1056 for (;;) { 1057 /* 1058 * If the local queue is empty, populate it from 1059 * the main one. This way the icl_conn_send_pdus() 1060 * can go through all the queued PDUs without holding 1061 * any locks. 1062 */ 1063 if (STAILQ_EMPTY(&queue)) 1064 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1065 1066 ic->ic_check_send_space = false; 1067 ICL_CONN_UNLOCK(ic); 1068 icl_conn_send_pdus(ic, &queue); 1069 ICL_CONN_LOCK(ic); 1070 1071 /* 1072 * The icl_soupcall_send() was called since the last 1073 * call to sbspace(); go around; 1074 */ 1075 if (ic->ic_check_send_space) 1076 continue; 1077 1078 /* 1079 * Local queue is empty, but we still have PDUs 1080 * in the main one; go around. 1081 */ 1082 if (STAILQ_EMPTY(&queue) && 1083 !STAILQ_EMPTY(&ic->ic_to_send)) 1084 continue; 1085 1086 /* 1087 * There might be some stuff in the local queue, 1088 * which didn't get sent due to not having enough send 1089 * space. Wait for socket upcall. 1090 */ 1091 break; 1092 } 1093 1094 if (ic->ic_disconnecting) { 1095 //ICL_DEBUG("terminating"); 1096 break; 1097 } 1098 1099 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1100 } 1101 1102 /* 1103 * We're exiting; move PDUs back to the main queue, so they can 1104 * get freed properly. At this point ordering doesn't matter. 1105 */ 1106 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1107 1108 ic->ic_send_running = false; 1109 cv_signal(&ic->ic_send_cv); 1110 ICL_CONN_UNLOCK(ic); 1111 kthread_exit(); 1112 } 1113 1114 static int 1115 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1116 { 1117 struct icl_conn *ic; 1118 1119 if (!sowriteable(so)) 1120 return (SU_OK); 1121 1122 ic = arg; 1123 1124 ICL_CONN_LOCK(ic); 1125 ic->ic_check_send_space = true; 1126 ICL_CONN_UNLOCK(ic); 1127 1128 cv_signal(&ic->ic_send_cv); 1129 1130 return (SU_OK); 1131 } 1132 1133 static int 1134 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1135 const void *addr, size_t len, int flags) 1136 { 1137 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1138 struct mbuf *mb, *newmb; 1139 size_t copylen, off = 0; 1140 1141 KASSERT(len > 0, ("len == 0")); 1142 1143 if (flags & ICL_NOCOPY) { 1144 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1145 if (newmb == NULL) { 1146 ICL_WARN("failed to allocate mbuf"); 1147 return (ENOMEM); 1148 } 1149 1150 newmb->m_flags |= M_RDONLY; 1151 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1152 icl_soft_mbuf_done, isp, NULL); 1153 newmb->m_len = len; 1154 } else { 1155 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1156 if (newmb == NULL) { 1157 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1158 return (ENOMEM); 1159 } 1160 1161 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1162 copylen = min(M_TRAILINGSPACE(mb), len - off); 1163 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1164 mb->m_len = copylen; 1165 off += copylen; 1166 } 1167 KASSERT(off == len, ("%s: off != len", __func__)); 1168 } 1169 1170 if (request->ip_data_mbuf == NULL) { 1171 request->ip_data_mbuf = newmb; 1172 request->ip_data_len = len; 1173 } else { 1174 m_cat(request->ip_data_mbuf, newmb); 1175 request->ip_data_len += len; 1176 } 1177 1178 return (0); 1179 } 1180 1181 void 1182 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1183 size_t off, void *addr, size_t len) 1184 { 1185 1186 m_copydata(ip->ip_data_mbuf, off, len, addr); 1187 } 1188 1189 static void 1190 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1191 { 1192 1193 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1194 } 1195 1196 static void 1197 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1198 icl_pdu_cb cb) 1199 { 1200 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1201 1202 ICL_CONN_LOCK_ASSERT(ic); 1203 isp->ref_cnt++; 1204 isp->cb = cb; 1205 1206 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1207 ICL_DEBUG("icl_pdu_queue on closed connection"); 1208 icl_soft_pdu_done(ip, ENOTCONN); 1209 return; 1210 } 1211 1212 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1213 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1214 /* 1215 * If the queue is not empty, someone else had already 1216 * signaled the send thread; no need to do that again, 1217 * just return. 1218 */ 1219 return; 1220 } 1221 1222 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1223 cv_signal(&ic->ic_send_cv); 1224 } 1225 1226 static struct icl_conn * 1227 icl_soft_new_conn(const char *name, struct mtx *lock) 1228 { 1229 struct icl_conn *ic; 1230 1231 refcount_acquire(&icl_ncons); 1232 1233 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1234 1235 STAILQ_INIT(&ic->ic_to_send); 1236 ic->ic_lock = lock; 1237 cv_init(&ic->ic_send_cv, "icl_tx"); 1238 cv_init(&ic->ic_receive_cv, "icl_rx"); 1239 #ifdef DIAGNOSTIC 1240 refcount_init(&ic->ic_outstanding_pdus, 0); 1241 #endif 1242 ic->ic_max_data_segment_length = max_data_segment_length; 1243 ic->ic_name = name; 1244 ic->ic_offload = "None"; 1245 ic->ic_unmapped = false; 1246 1247 return (ic); 1248 } 1249 1250 void 1251 icl_soft_conn_free(struct icl_conn *ic) 1252 { 1253 1254 #ifdef DIAGNOSTIC 1255 KASSERT(ic->ic_outstanding_pdus == 0, 1256 ("destroying session with %d outstanding PDUs", 1257 ic->ic_outstanding_pdus)); 1258 #endif 1259 cv_destroy(&ic->ic_send_cv); 1260 cv_destroy(&ic->ic_receive_cv); 1261 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1262 refcount_release(&icl_ncons); 1263 } 1264 1265 static int 1266 icl_conn_start(struct icl_conn *ic) 1267 { 1268 size_t minspace; 1269 struct sockopt opt; 1270 int error, one = 1; 1271 1272 ICL_CONN_LOCK(ic); 1273 1274 /* 1275 * XXX: Ugly hack. 1276 */ 1277 if (ic->ic_socket == NULL) { 1278 ICL_CONN_UNLOCK(ic); 1279 return (EINVAL); 1280 } 1281 1282 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1283 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1284 ic->ic_disconnecting = false; 1285 1286 ICL_CONN_UNLOCK(ic); 1287 1288 /* 1289 * For sendspace, this is required because the current code cannot 1290 * send a PDU in pieces; thus, the minimum buffer size is equal 1291 * to the maximum PDU size. "+4" is to account for possible padding. 1292 */ 1293 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1294 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1295 if (sendspace < minspace) { 1296 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1297 minspace); 1298 sendspace = minspace; 1299 } 1300 if (recvspace < minspace) { 1301 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1302 minspace); 1303 recvspace = minspace; 1304 } 1305 1306 error = soreserve(ic->ic_socket, sendspace, recvspace); 1307 if (error != 0) { 1308 ICL_WARN("soreserve failed with error %d", error); 1309 icl_soft_conn_close(ic); 1310 return (error); 1311 } 1312 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1313 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1314 1315 /* 1316 * Disable Nagle. 1317 */ 1318 bzero(&opt, sizeof(opt)); 1319 opt.sopt_dir = SOPT_SET; 1320 opt.sopt_level = IPPROTO_TCP; 1321 opt.sopt_name = TCP_NODELAY; 1322 opt.sopt_val = &one; 1323 opt.sopt_valsize = sizeof(one); 1324 error = sosetopt(ic->ic_socket, &opt); 1325 if (error != 0) { 1326 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1327 icl_soft_conn_close(ic); 1328 return (error); 1329 } 1330 1331 /* 1332 * Register socket upcall, to get notified about incoming PDUs 1333 * and free space to send outgoing ones. 1334 */ 1335 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1336 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1337 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1338 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1339 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1340 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1341 1342 /* 1343 * Start threads. 1344 */ 1345 ICL_CONN_LOCK(ic); 1346 ic->ic_send_running = ic->ic_receive_running = true; 1347 ICL_CONN_UNLOCK(ic); 1348 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1349 ic->ic_name); 1350 if (error != 0) { 1351 ICL_WARN("kthread_add(9) failed with error %d", error); 1352 ICL_CONN_LOCK(ic); 1353 ic->ic_send_running = ic->ic_receive_running = false; 1354 cv_signal(&ic->ic_send_cv); 1355 ICL_CONN_UNLOCK(ic); 1356 icl_soft_conn_close(ic); 1357 return (error); 1358 } 1359 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1360 ic->ic_name); 1361 if (error != 0) { 1362 ICL_WARN("kthread_add(9) failed with error %d", error); 1363 ICL_CONN_LOCK(ic); 1364 ic->ic_receive_running = false; 1365 cv_signal(&ic->ic_send_cv); 1366 ICL_CONN_UNLOCK(ic); 1367 icl_soft_conn_close(ic); 1368 return (error); 1369 } 1370 1371 return (0); 1372 } 1373 1374 int 1375 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1376 { 1377 struct file *fp; 1378 struct socket *so; 1379 cap_rights_t rights; 1380 int error; 1381 1382 ICL_CONN_LOCK_ASSERT_NOT(ic); 1383 1384 #ifdef ICL_KERNEL_PROXY 1385 /* 1386 * We're transitioning to Full Feature phase, and we don't 1387 * really care. 1388 */ 1389 if (fd == 0) { 1390 ICL_CONN_LOCK(ic); 1391 if (ic->ic_socket == NULL) { 1392 ICL_CONN_UNLOCK(ic); 1393 ICL_WARN("proxy handoff without connect"); 1394 return (EINVAL); 1395 } 1396 ICL_CONN_UNLOCK(ic); 1397 return (0); 1398 } 1399 #endif 1400 1401 /* 1402 * Steal the socket from userland. 1403 */ 1404 error = fget(curthread, fd, 1405 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1406 if (error != 0) 1407 return (error); 1408 if (fp->f_type != DTYPE_SOCKET) { 1409 fdrop(fp, curthread); 1410 return (EINVAL); 1411 } 1412 so = fp->f_data; 1413 if (so->so_type != SOCK_STREAM) { 1414 fdrop(fp, curthread); 1415 return (EINVAL); 1416 } 1417 1418 ICL_CONN_LOCK(ic); 1419 1420 if (ic->ic_socket != NULL) { 1421 ICL_CONN_UNLOCK(ic); 1422 fdrop(fp, curthread); 1423 return (EBUSY); 1424 } 1425 1426 ic->ic_socket = fp->f_data; 1427 fp->f_ops = &badfileops; 1428 fp->f_data = NULL; 1429 fdrop(fp, curthread); 1430 ICL_CONN_UNLOCK(ic); 1431 1432 error = icl_conn_start(ic); 1433 1434 return (error); 1435 } 1436 1437 void 1438 icl_soft_conn_close(struct icl_conn *ic) 1439 { 1440 struct icl_pdu *pdu; 1441 struct socket *so; 1442 1443 ICL_CONN_LOCK(ic); 1444 1445 /* 1446 * Wake up the threads, so they can properly terminate. 1447 */ 1448 ic->ic_disconnecting = true; 1449 while (ic->ic_receive_running || ic->ic_send_running) { 1450 cv_signal(&ic->ic_receive_cv); 1451 cv_signal(&ic->ic_send_cv); 1452 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1453 } 1454 1455 /* Some other thread could close the connection same time. */ 1456 so = ic->ic_socket; 1457 if (so == NULL) { 1458 ICL_CONN_UNLOCK(ic); 1459 return; 1460 } 1461 ic->ic_socket = NULL; 1462 1463 /* 1464 * Deregister socket upcalls. 1465 */ 1466 ICL_CONN_UNLOCK(ic); 1467 SOCKBUF_LOCK(&so->so_snd); 1468 if (so->so_snd.sb_upcall != NULL) 1469 soupcall_clear(so, SO_SND); 1470 SOCKBUF_UNLOCK(&so->so_snd); 1471 SOCKBUF_LOCK(&so->so_rcv); 1472 if (so->so_rcv.sb_upcall != NULL) 1473 soupcall_clear(so, SO_RCV); 1474 SOCKBUF_UNLOCK(&so->so_rcv); 1475 soclose(so); 1476 ICL_CONN_LOCK(ic); 1477 1478 if (ic->ic_receive_pdu != NULL) { 1479 //ICL_DEBUG("freeing partially received PDU"); 1480 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); 1481 ic->ic_receive_pdu = NULL; 1482 } 1483 1484 /* 1485 * Remove any outstanding PDUs from the send queue. 1486 */ 1487 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1488 pdu = STAILQ_FIRST(&ic->ic_to_send); 1489 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1490 icl_soft_pdu_done(pdu, ENOTCONN); 1491 } 1492 1493 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1494 ("destroying session with non-empty send queue")); 1495 ICL_CONN_UNLOCK(ic); 1496 } 1497 1498 int 1499 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1500 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1501 { 1502 1503 return (0); 1504 } 1505 1506 void 1507 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1508 { 1509 } 1510 1511 int 1512 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1513 uint32_t *transfer_tag, void **prvp) 1514 { 1515 1516 return (0); 1517 } 1518 1519 void 1520 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1521 { 1522 } 1523 1524 static int 1525 icl_soft_limits(struct icl_drv_limits *idl) 1526 { 1527 1528 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1529 idl->idl_max_send_data_segment_length = max_data_segment_length; 1530 idl->idl_max_burst_length = max_burst_length; 1531 idl->idl_first_burst_length = first_burst_length; 1532 1533 return (0); 1534 } 1535 1536 #ifdef ICL_KERNEL_PROXY 1537 int 1538 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1539 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1540 { 1541 1542 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1543 from_sa, to_sa)); 1544 } 1545 1546 int 1547 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1548 { 1549 int error; 1550 1551 ICL_CONN_LOCK_ASSERT_NOT(ic); 1552 1553 if (so->so_type != SOCK_STREAM) 1554 return (EINVAL); 1555 1556 ICL_CONN_LOCK(ic); 1557 if (ic->ic_socket != NULL) { 1558 ICL_CONN_UNLOCK(ic); 1559 return (EBUSY); 1560 } 1561 ic->ic_socket = so; 1562 ICL_CONN_UNLOCK(ic); 1563 1564 error = icl_conn_start(ic); 1565 1566 return (error); 1567 } 1568 #endif /* ICL_KERNEL_PROXY */ 1569 1570 static int 1571 icl_soft_load(void) 1572 { 1573 int error; 1574 1575 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1576 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1577 UMA_ALIGN_PTR, 0); 1578 refcount_init(&icl_ncons, 0); 1579 1580 /* 1581 * The reason we call this "none" is that to the user, 1582 * it's known as "offload driver"; "offload driver: soft" 1583 * doesn't make much sense. 1584 */ 1585 error = icl_register("none", false, 0, 1586 icl_soft_limits, icl_soft_new_conn); 1587 KASSERT(error == 0, ("failed to register")); 1588 1589 #if defined(ICL_KERNEL_PROXY) && 0 1590 /* 1591 * Debugging aid for kernel proxy functionality. 1592 */ 1593 error = icl_register("proxytest", true, 0, 1594 icl_soft_limits, icl_soft_new_conn); 1595 KASSERT(error == 0, ("failed to register")); 1596 #endif 1597 1598 return (error); 1599 } 1600 1601 static int 1602 icl_soft_unload(void) 1603 { 1604 1605 if (icl_ncons != 0) 1606 return (EBUSY); 1607 1608 icl_unregister("none", false); 1609 #if defined(ICL_KERNEL_PROXY) && 0 1610 icl_unregister("proxytest", true); 1611 #endif 1612 1613 uma_zdestroy(icl_soft_pdu_zone); 1614 1615 return (0); 1616 } 1617 1618 static int 1619 icl_soft_modevent(module_t mod, int what, void *arg) 1620 { 1621 1622 switch (what) { 1623 case MOD_LOAD: 1624 return (icl_soft_load()); 1625 case MOD_UNLOAD: 1626 return (icl_soft_unload()); 1627 default: 1628 return (EINVAL); 1629 } 1630 } 1631 1632 moduledata_t icl_soft_data = { 1633 "icl_soft", 1634 icl_soft_modevent, 1635 0 1636 }; 1637 1638 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1639 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1640 MODULE_VERSION(icl_soft, 1); 1641