1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 /* 32 * Software implementation of iSCSI Common Layer kobj(9) interface. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/capsicum.h> 40 #include <sys/condvar.h> 41 #include <sys/conf.h> 42 #include <sys/file.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/mutex.h> 48 #include <sys/module.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/sysctl.h> 53 #include <sys/systm.h> 54 #include <sys/sx.h> 55 #include <sys/uio.h> 56 #include <vm/uma.h> 57 #include <netinet/in.h> 58 #include <netinet/tcp.h> 59 60 #include <dev/iscsi/icl.h> 61 #include <dev/iscsi/iscsi_proto.h> 62 #include <icl_conn_if.h> 63 64 static int coalesce = 1; 65 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 66 &coalesce, 0, "Try to coalesce PDUs before sending"); 67 static int partial_receive_len = 128 * 1024; 68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 69 &partial_receive_len, 0, "Minimum read size for partially received " 70 "data segment"); 71 static int sendspace = 1048576; 72 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 73 &sendspace, 0, "Default send socket buffer size"); 74 static int recvspace = 1048576; 75 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 76 &recvspace, 0, "Default receive socket buffer size"); 77 78 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 79 static uma_zone_t icl_pdu_zone; 80 81 static volatile u_int icl_ncons; 82 83 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 84 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 85 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 86 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 87 88 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 89 90 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 91 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 92 static icl_conn_pdu_data_segment_length_t 93 icl_soft_conn_pdu_data_segment_length; 94 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 95 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 96 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 97 static icl_conn_handoff_t icl_soft_conn_handoff; 98 static icl_conn_free_t icl_soft_conn_free; 99 static icl_conn_close_t icl_soft_conn_close; 100 static icl_conn_task_setup_t icl_soft_conn_task_setup; 101 static icl_conn_task_done_t icl_soft_conn_task_done; 102 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 103 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 104 #ifdef ICL_KERNEL_PROXY 105 static icl_conn_connect_t icl_soft_conn_connect; 106 #endif 107 108 static kobj_method_t icl_soft_methods[] = { 109 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 110 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 111 KOBJMETHOD(icl_conn_pdu_data_segment_length, 112 icl_soft_conn_pdu_data_segment_length), 113 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 114 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 115 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 116 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 117 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 118 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 119 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 120 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 121 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 122 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 123 #ifdef ICL_KERNEL_PROXY 124 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 125 #endif 126 { 0, 0 } 127 }; 128 129 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 130 131 static void 132 icl_conn_fail(struct icl_conn *ic) 133 { 134 if (ic->ic_socket == NULL) 135 return; 136 137 /* 138 * XXX 139 */ 140 ic->ic_socket->so_error = EDOOFUS; 141 (ic->ic_error)(ic); 142 } 143 144 static struct mbuf * 145 icl_conn_receive(struct icl_conn *ic, size_t len) 146 { 147 struct uio uio; 148 struct socket *so; 149 struct mbuf *m; 150 int error, flags; 151 152 so = ic->ic_socket; 153 154 memset(&uio, 0, sizeof(uio)); 155 uio.uio_resid = len; 156 157 flags = MSG_DONTWAIT; 158 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 159 if (error != 0) { 160 ICL_DEBUG("soreceive error %d", error); 161 return (NULL); 162 } 163 if (uio.uio_resid != 0) { 164 m_freem(m); 165 ICL_DEBUG("short read"); 166 return (NULL); 167 } 168 169 return (m); 170 } 171 172 static int 173 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) 174 { 175 struct iovec iov[1]; 176 struct uio uio; 177 struct socket *so; 178 int error, flags; 179 180 so = ic->ic_socket; 181 182 memset(&uio, 0, sizeof(uio)); 183 iov[0].iov_base = buf; 184 iov[0].iov_len = len; 185 uio.uio_iov = iov; 186 uio.uio_iovcnt = 1; 187 uio.uio_offset = 0; 188 uio.uio_resid = len; 189 uio.uio_segflg = UIO_SYSSPACE; 190 uio.uio_rw = UIO_READ; 191 192 flags = MSG_DONTWAIT; 193 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 194 if (error != 0) { 195 ICL_DEBUG("soreceive error %d", error); 196 return (-1); 197 } 198 if (uio.uio_resid != 0) { 199 ICL_DEBUG("short read"); 200 return (-1); 201 } 202 203 return (0); 204 } 205 206 static struct icl_pdu * 207 icl_pdu_new_empty(struct icl_conn *ic, int flags) 208 { 209 struct icl_pdu *ip; 210 211 #ifdef DIAGNOSTIC 212 refcount_acquire(&ic->ic_outstanding_pdus); 213 #endif 214 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 215 if (ip == NULL) { 216 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 217 #ifdef DIAGNOSTIC 218 refcount_release(&ic->ic_outstanding_pdus); 219 #endif 220 return (NULL); 221 } 222 223 ip->ip_conn = ic; 224 225 return (ip); 226 } 227 228 static void 229 icl_pdu_free(struct icl_pdu *ip) 230 { 231 struct icl_conn *ic; 232 233 ic = ip->ip_conn; 234 235 m_freem(ip->ip_bhs_mbuf); 236 m_freem(ip->ip_ahs_mbuf); 237 m_freem(ip->ip_data_mbuf); 238 uma_zfree(icl_pdu_zone, ip); 239 #ifdef DIAGNOSTIC 240 refcount_release(&ic->ic_outstanding_pdus); 241 #endif 242 } 243 244 void 245 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 246 { 247 248 icl_pdu_free(ip); 249 } 250 251 /* 252 * Allocate icl_pdu with empty BHS to fill up by the caller. 253 */ 254 struct icl_pdu * 255 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 256 { 257 struct icl_pdu *ip; 258 259 ip = icl_pdu_new_empty(ic, flags); 260 if (ip == NULL) 261 return (NULL); 262 263 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 264 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 265 if (ip->ip_bhs_mbuf == NULL) { 266 ICL_WARN("failed to allocate BHS mbuf"); 267 icl_pdu_free(ip); 268 return (NULL); 269 } 270 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 271 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 272 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 273 274 return (ip); 275 } 276 277 static int 278 icl_pdu_ahs_length(const struct icl_pdu *request) 279 { 280 281 return (request->ip_bhs->bhs_total_ahs_len * 4); 282 } 283 284 static size_t 285 icl_pdu_data_segment_length(const struct icl_pdu *request) 286 { 287 uint32_t len = 0; 288 289 len += request->ip_bhs->bhs_data_segment_len[0]; 290 len <<= 8; 291 len += request->ip_bhs->bhs_data_segment_len[1]; 292 len <<= 8; 293 len += request->ip_bhs->bhs_data_segment_len[2]; 294 295 return (len); 296 } 297 298 size_t 299 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 300 const struct icl_pdu *request) 301 { 302 303 return (icl_pdu_data_segment_length(request)); 304 } 305 306 static void 307 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 308 { 309 310 response->ip_bhs->bhs_data_segment_len[2] = len; 311 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 312 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 313 } 314 315 static size_t 316 icl_pdu_padding(const struct icl_pdu *ip) 317 { 318 319 if ((ip->ip_data_len % 4) != 0) 320 return (4 - (ip->ip_data_len % 4)); 321 322 return (0); 323 } 324 325 static size_t 326 icl_pdu_size(const struct icl_pdu *response) 327 { 328 size_t len; 329 330 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 331 332 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 333 icl_pdu_padding(response); 334 if (response->ip_conn->ic_header_crc32c) 335 len += ISCSI_HEADER_DIGEST_SIZE; 336 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 337 len += ISCSI_DATA_DIGEST_SIZE; 338 339 return (len); 340 } 341 342 static int 343 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 344 { 345 346 if (icl_conn_receive_buf(request->ip_conn, 347 request->ip_bhs, sizeof(struct iscsi_bhs))) { 348 ICL_DEBUG("failed to receive BHS"); 349 return (-1); 350 } 351 352 *availablep -= sizeof(struct iscsi_bhs); 353 return (0); 354 } 355 356 static int 357 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 358 { 359 360 request->ip_ahs_len = icl_pdu_ahs_length(request); 361 if (request->ip_ahs_len == 0) 362 return (0); 363 364 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 365 request->ip_ahs_len); 366 if (request->ip_ahs_mbuf == NULL) { 367 ICL_DEBUG("failed to receive AHS"); 368 return (-1); 369 } 370 371 *availablep -= request->ip_ahs_len; 372 return (0); 373 } 374 375 static uint32_t 376 icl_mbuf_to_crc32c(const struct mbuf *m0) 377 { 378 uint32_t digest = 0xffffffff; 379 const struct mbuf *m; 380 381 for (m = m0; m != NULL; m = m->m_next) 382 digest = calculate_crc32c(digest, 383 mtod(m, const void *), m->m_len); 384 385 digest = digest ^ 0xffffffff; 386 387 return (digest); 388 } 389 390 static int 391 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 392 { 393 uint32_t received_digest, valid_digest; 394 395 if (request->ip_conn->ic_header_crc32c == false) 396 return (0); 397 398 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 399 if (icl_conn_receive_buf(request->ip_conn, 400 &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { 401 ICL_DEBUG("failed to receive header digest"); 402 return (-1); 403 } 404 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 405 406 /* Temporary attach AHS to BHS to calculate header digest. */ 407 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 408 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 409 request->ip_bhs_mbuf->m_next = NULL; 410 if (received_digest != valid_digest) { 411 ICL_WARN("header digest check failed; got 0x%x, " 412 "should be 0x%x", received_digest, valid_digest); 413 return (-1); 414 } 415 416 return (0); 417 } 418 419 /* 420 * Return the number of bytes that should be waiting in the receive socket 421 * before icl_pdu_receive_data_segment() gets called. 422 */ 423 static size_t 424 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 425 { 426 size_t len; 427 428 len = icl_pdu_data_segment_length(request); 429 if (len == 0) 430 return (0); 431 432 /* 433 * Account for the parts of data segment already read from 434 * the socket buffer. 435 */ 436 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 437 len -= request->ip_data_len; 438 439 /* 440 * Don't always wait for the full data segment to be delivered 441 * to the socket; this might badly affect performance due to 442 * TCP window scaling. 443 */ 444 if (len > partial_receive_len) { 445 #if 0 446 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 447 len, partial_receive_len)); 448 #endif 449 len = partial_receive_len; 450 451 return (len); 452 } 453 454 /* 455 * Account for padding. Note that due to the way code is written, 456 * the icl_pdu_receive_data_segment() must always receive padding 457 * along with the last part of data segment, because it would be 458 * impossible to tell whether we've already received the full data 459 * segment including padding, or without it. 460 */ 461 if ((len % 4) != 0) 462 len += 4 - (len % 4); 463 464 #if 0 465 ICL_DEBUG("need %zd bytes of data", len)); 466 #endif 467 468 return (len); 469 } 470 471 static int 472 icl_pdu_receive_data_segment(struct icl_pdu *request, 473 size_t *availablep, bool *more_neededp) 474 { 475 struct icl_conn *ic; 476 size_t len, padding = 0; 477 struct mbuf *m; 478 479 ic = request->ip_conn; 480 481 *more_neededp = false; 482 ic->ic_receive_len = 0; 483 484 len = icl_pdu_data_segment_length(request); 485 if (len == 0) 486 return (0); 487 488 if ((len % 4) != 0) 489 padding = 4 - (len % 4); 490 491 /* 492 * Account for already received parts of data segment. 493 */ 494 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 495 len -= request->ip_data_len; 496 497 if (len + padding > *availablep) { 498 /* 499 * Not enough data in the socket buffer. Receive as much 500 * as we can. Don't receive padding, since, obviously, it's 501 * not the end of data segment yet. 502 */ 503 #if 0 504 ICL_DEBUG("limited from %zd to %zd", 505 len + padding, *availablep - padding)); 506 #endif 507 len = *availablep - padding; 508 *more_neededp = true; 509 padding = 0; 510 } 511 512 /* 513 * Must not try to receive padding without at least one byte 514 * of actual data segment. 515 */ 516 if (len > 0) { 517 m = icl_conn_receive(request->ip_conn, len + padding); 518 if (m == NULL) { 519 ICL_DEBUG("failed to receive data segment"); 520 return (-1); 521 } 522 523 if (request->ip_data_mbuf == NULL) 524 request->ip_data_mbuf = m; 525 else 526 m_cat(request->ip_data_mbuf, m); 527 528 request->ip_data_len += len; 529 *availablep -= len + padding; 530 } else 531 ICL_DEBUG("len 0"); 532 533 if (*more_neededp) 534 ic->ic_receive_len = 535 icl_pdu_data_segment_receive_len(request); 536 537 return (0); 538 } 539 540 static int 541 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 542 { 543 uint32_t received_digest, valid_digest; 544 545 if (request->ip_conn->ic_data_crc32c == false) 546 return (0); 547 548 if (request->ip_data_len == 0) 549 return (0); 550 551 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 552 if (icl_conn_receive_buf(request->ip_conn, 553 &received_digest, ISCSI_DATA_DIGEST_SIZE)) { 554 ICL_DEBUG("failed to receive data digest"); 555 return (-1); 556 } 557 *availablep -= ISCSI_DATA_DIGEST_SIZE; 558 559 /* 560 * Note that ip_data_mbuf also contains padding; since digest 561 * calculation is supposed to include that, we iterate over 562 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 563 */ 564 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 565 if (received_digest != valid_digest) { 566 ICL_WARN("data digest check failed; got 0x%x, " 567 "should be 0x%x", received_digest, valid_digest); 568 return (-1); 569 } 570 571 return (0); 572 } 573 574 /* 575 * Somewhat contrary to the name, this attempts to receive only one 576 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 577 */ 578 static struct icl_pdu * 579 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 580 { 581 struct icl_pdu *request; 582 struct socket *so; 583 size_t len; 584 int error; 585 bool more_needed; 586 587 so = ic->ic_socket; 588 589 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 590 KASSERT(ic->ic_receive_pdu == NULL, 591 ("ic->ic_receive_pdu != NULL")); 592 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 593 if (request == NULL) { 594 ICL_DEBUG("failed to allocate PDU; " 595 "dropping connection"); 596 icl_conn_fail(ic); 597 return (NULL); 598 } 599 ic->ic_receive_pdu = request; 600 } else { 601 KASSERT(ic->ic_receive_pdu != NULL, 602 ("ic->ic_receive_pdu == NULL")); 603 request = ic->ic_receive_pdu; 604 } 605 606 if (*availablep < ic->ic_receive_len) { 607 #if 0 608 ICL_DEBUG("not enough data; need %zd, " 609 "have %zd", ic->ic_receive_len, *availablep); 610 #endif 611 return (NULL); 612 } 613 614 switch (ic->ic_receive_state) { 615 case ICL_CONN_STATE_BHS: 616 //ICL_DEBUG("receiving BHS"); 617 error = icl_pdu_receive_bhs(request, availablep); 618 if (error != 0) { 619 ICL_DEBUG("failed to receive BHS; " 620 "dropping connection"); 621 break; 622 } 623 624 /* 625 * We don't enforce any limit for AHS length; 626 * its length is stored in 8 bit field. 627 */ 628 629 len = icl_pdu_data_segment_length(request); 630 if (len > ic->ic_max_data_segment_length) { 631 ICL_WARN("received data segment " 632 "length %zd is larger than negotiated " 633 "MaxDataSegmentLength %zd; " 634 "dropping connection", 635 len, ic->ic_max_data_segment_length); 636 error = EINVAL; 637 break; 638 } 639 640 ic->ic_receive_state = ICL_CONN_STATE_AHS; 641 ic->ic_receive_len = icl_pdu_ahs_length(request); 642 break; 643 644 case ICL_CONN_STATE_AHS: 645 //ICL_DEBUG("receiving AHS"); 646 error = icl_pdu_receive_ahs(request, availablep); 647 if (error != 0) { 648 ICL_DEBUG("failed to receive AHS; " 649 "dropping connection"); 650 break; 651 } 652 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 653 if (ic->ic_header_crc32c == false) 654 ic->ic_receive_len = 0; 655 else 656 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 657 break; 658 659 case ICL_CONN_STATE_HEADER_DIGEST: 660 //ICL_DEBUG("receiving header digest"); 661 error = icl_pdu_check_header_digest(request, availablep); 662 if (error != 0) { 663 ICL_DEBUG("header digest failed; " 664 "dropping connection"); 665 break; 666 } 667 668 ic->ic_receive_state = ICL_CONN_STATE_DATA; 669 ic->ic_receive_len = 670 icl_pdu_data_segment_receive_len(request); 671 break; 672 673 case ICL_CONN_STATE_DATA: 674 //ICL_DEBUG("receiving data segment"); 675 error = icl_pdu_receive_data_segment(request, availablep, 676 &more_needed); 677 if (error != 0) { 678 ICL_DEBUG("failed to receive data segment;" 679 "dropping connection"); 680 break; 681 } 682 683 if (more_needed) 684 break; 685 686 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 687 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 688 ic->ic_receive_len = 0; 689 else 690 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 691 break; 692 693 case ICL_CONN_STATE_DATA_DIGEST: 694 //ICL_DEBUG("receiving data digest"); 695 error = icl_pdu_check_data_digest(request, availablep); 696 if (error != 0) { 697 ICL_DEBUG("data digest failed; " 698 "dropping connection"); 699 break; 700 } 701 702 /* 703 * We've received complete PDU; reset the receive state machine 704 * and return the PDU. 705 */ 706 ic->ic_receive_state = ICL_CONN_STATE_BHS; 707 ic->ic_receive_len = sizeof(struct iscsi_bhs); 708 ic->ic_receive_pdu = NULL; 709 return (request); 710 711 default: 712 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 713 } 714 715 if (error != 0) { 716 /* 717 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 718 * and will get freed in icl_soft_conn_close(). 719 */ 720 icl_conn_fail(ic); 721 } 722 723 return (NULL); 724 } 725 726 static void 727 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 728 { 729 struct icl_pdu *response; 730 struct socket *so; 731 732 so = ic->ic_socket; 733 734 /* 735 * This can never happen; we're careful to only mess with ic->ic_socket 736 * pointer when the send/receive threads are not running. 737 */ 738 KASSERT(so != NULL, ("NULL socket")); 739 740 for (;;) { 741 if (ic->ic_disconnecting) 742 return; 743 744 if (so->so_error != 0) { 745 ICL_DEBUG("connection error %d; " 746 "dropping connection", so->so_error); 747 icl_conn_fail(ic); 748 return; 749 } 750 751 /* 752 * Loop until we have a complete PDU or there is not enough 753 * data in the socket buffer. 754 */ 755 if (available < ic->ic_receive_len) { 756 #if 0 757 ICL_DEBUG("not enough data; have %zd, " 758 "need %zd", available, 759 ic->ic_receive_len); 760 #endif 761 return; 762 } 763 764 response = icl_conn_receive_pdu(ic, &available); 765 if (response == NULL) 766 continue; 767 768 if (response->ip_ahs_len > 0) { 769 ICL_WARN("received PDU with unsupported " 770 "AHS; opcode 0x%x; dropping connection", 771 response->ip_bhs->bhs_opcode); 772 icl_pdu_free(response); 773 icl_conn_fail(ic); 774 return; 775 } 776 777 (ic->ic_receive)(response); 778 } 779 } 780 781 static void 782 icl_receive_thread(void *arg) 783 { 784 struct icl_conn *ic; 785 size_t available; 786 struct socket *so; 787 788 ic = arg; 789 so = ic->ic_socket; 790 791 for (;;) { 792 if (ic->ic_disconnecting) { 793 //ICL_DEBUG("terminating"); 794 break; 795 } 796 797 /* 798 * Set the low watermark, to be checked by 799 * soreadable() in icl_soupcall_receive() 800 * to avoid unnecessary wakeups until there 801 * is enough data received to read the PDU. 802 */ 803 SOCKBUF_LOCK(&so->so_rcv); 804 available = sbavail(&so->so_rcv); 805 if (available < ic->ic_receive_len) { 806 so->so_rcv.sb_lowat = ic->ic_receive_len; 807 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 808 } else 809 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 810 SOCKBUF_UNLOCK(&so->so_rcv); 811 812 icl_conn_receive_pdus(ic, available); 813 } 814 815 ICL_CONN_LOCK(ic); 816 ic->ic_receive_running = false; 817 cv_signal(&ic->ic_send_cv); 818 ICL_CONN_UNLOCK(ic); 819 kthread_exit(); 820 } 821 822 static int 823 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 824 { 825 struct icl_conn *ic; 826 827 if (!soreadable(so)) 828 return (SU_OK); 829 830 ic = arg; 831 cv_signal(&ic->ic_receive_cv); 832 return (SU_OK); 833 } 834 835 static int 836 icl_pdu_finalize(struct icl_pdu *request) 837 { 838 size_t padding, pdu_len; 839 uint32_t digest, zero = 0; 840 int ok; 841 struct icl_conn *ic; 842 843 ic = request->ip_conn; 844 845 icl_pdu_set_data_segment_length(request, request->ip_data_len); 846 847 pdu_len = icl_pdu_size(request); 848 849 if (ic->ic_header_crc32c) { 850 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 851 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 852 (void *)&digest); 853 if (ok != 1) { 854 ICL_WARN("failed to append header digest"); 855 return (1); 856 } 857 } 858 859 if (request->ip_data_len != 0) { 860 padding = icl_pdu_padding(request); 861 if (padding > 0) { 862 ok = m_append(request->ip_data_mbuf, padding, 863 (void *)&zero); 864 if (ok != 1) { 865 ICL_WARN("failed to append padding"); 866 return (1); 867 } 868 } 869 870 if (ic->ic_data_crc32c) { 871 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 872 873 ok = m_append(request->ip_data_mbuf, sizeof(digest), 874 (void *)&digest); 875 if (ok != 1) { 876 ICL_WARN("failed to append data digest"); 877 return (1); 878 } 879 } 880 881 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 882 request->ip_data_mbuf = NULL; 883 } 884 885 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 886 887 return (0); 888 } 889 890 static void 891 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 892 { 893 struct icl_pdu *request, *request2; 894 struct socket *so; 895 long available, size, size2; 896 int coalesced, error; 897 898 ICL_CONN_LOCK_ASSERT_NOT(ic); 899 900 so = ic->ic_socket; 901 902 SOCKBUF_LOCK(&so->so_snd); 903 /* 904 * Check how much space do we have for transmit. We can't just 905 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 906 * as it always frees the mbuf chain passed to it, even in case 907 * of error. 908 */ 909 available = sbspace(&so->so_snd); 910 911 /* 912 * Notify the socket upcall that we don't need wakeups 913 * for the time being. 914 */ 915 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 916 SOCKBUF_UNLOCK(&so->so_snd); 917 918 while (!STAILQ_EMPTY(queue)) { 919 request = STAILQ_FIRST(queue); 920 size = icl_pdu_size(request); 921 if (available < size) { 922 923 /* 924 * Set the low watermark, to be checked by 925 * sowriteable() in icl_soupcall_send() 926 * to avoid unnecessary wakeups until there 927 * is enough space for the PDU to fit. 928 */ 929 SOCKBUF_LOCK(&so->so_snd); 930 available = sbspace(&so->so_snd); 931 if (available < size) { 932 #if 1 933 ICL_DEBUG("no space to send; " 934 "have %ld, need %ld", 935 available, size); 936 #endif 937 so->so_snd.sb_lowat = size; 938 SOCKBUF_UNLOCK(&so->so_snd); 939 return; 940 } 941 SOCKBUF_UNLOCK(&so->so_snd); 942 } 943 STAILQ_REMOVE_HEAD(queue, ip_next); 944 error = icl_pdu_finalize(request); 945 if (error != 0) { 946 ICL_DEBUG("failed to finalize PDU; " 947 "dropping connection"); 948 icl_conn_fail(ic); 949 icl_pdu_free(request); 950 return; 951 } 952 if (coalesce) { 953 coalesced = 1; 954 for (;;) { 955 request2 = STAILQ_FIRST(queue); 956 if (request2 == NULL) 957 break; 958 size2 = icl_pdu_size(request2); 959 if (available < size + size2) 960 break; 961 STAILQ_REMOVE_HEAD(queue, ip_next); 962 error = icl_pdu_finalize(request2); 963 if (error != 0) { 964 ICL_DEBUG("failed to finalize PDU; " 965 "dropping connection"); 966 icl_conn_fail(ic); 967 icl_pdu_free(request); 968 icl_pdu_free(request2); 969 return; 970 } 971 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 972 request2->ip_bhs_mbuf = NULL; 973 request->ip_bhs_mbuf->m_pkthdr.len += size2; 974 size += size2; 975 STAILQ_REMOVE_AFTER(queue, request, ip_next); 976 icl_pdu_free(request2); 977 coalesced++; 978 } 979 #if 0 980 if (coalesced > 1) { 981 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 982 coalesced, size); 983 } 984 #endif 985 } 986 available -= size; 987 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 988 NULL, MSG_DONTWAIT, curthread); 989 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 990 if (error != 0) { 991 ICL_DEBUG("failed to send PDU, error %d; " 992 "dropping connection", error); 993 icl_conn_fail(ic); 994 icl_pdu_free(request); 995 return; 996 } 997 icl_pdu_free(request); 998 } 999 } 1000 1001 static void 1002 icl_send_thread(void *arg) 1003 { 1004 struct icl_conn *ic; 1005 struct icl_pdu_stailq queue; 1006 1007 ic = arg; 1008 1009 STAILQ_INIT(&queue); 1010 1011 ICL_CONN_LOCK(ic); 1012 for (;;) { 1013 for (;;) { 1014 /* 1015 * If the local queue is empty, populate it from 1016 * the main one. This way the icl_conn_send_pdus() 1017 * can go through all the queued PDUs without holding 1018 * any locks. 1019 */ 1020 if (STAILQ_EMPTY(&queue)) 1021 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1022 1023 ic->ic_check_send_space = false; 1024 ICL_CONN_UNLOCK(ic); 1025 icl_conn_send_pdus(ic, &queue); 1026 ICL_CONN_LOCK(ic); 1027 1028 /* 1029 * The icl_soupcall_send() was called since the last 1030 * call to sbspace(); go around; 1031 */ 1032 if (ic->ic_check_send_space) 1033 continue; 1034 1035 /* 1036 * Local queue is empty, but we still have PDUs 1037 * in the main one; go around. 1038 */ 1039 if (STAILQ_EMPTY(&queue) && 1040 !STAILQ_EMPTY(&ic->ic_to_send)) 1041 continue; 1042 1043 /* 1044 * There might be some stuff in the local queue, 1045 * which didn't get sent due to not having enough send 1046 * space. Wait for socket upcall. 1047 */ 1048 break; 1049 } 1050 1051 if (ic->ic_disconnecting) { 1052 //ICL_DEBUG("terminating"); 1053 break; 1054 } 1055 1056 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1057 } 1058 1059 /* 1060 * We're exiting; move PDUs back to the main queue, so they can 1061 * get freed properly. At this point ordering doesn't matter. 1062 */ 1063 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1064 1065 ic->ic_send_running = false; 1066 cv_signal(&ic->ic_send_cv); 1067 ICL_CONN_UNLOCK(ic); 1068 kthread_exit(); 1069 } 1070 1071 static int 1072 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1073 { 1074 struct icl_conn *ic; 1075 1076 if (!sowriteable(so)) 1077 return (SU_OK); 1078 1079 ic = arg; 1080 1081 ICL_CONN_LOCK(ic); 1082 ic->ic_check_send_space = true; 1083 ICL_CONN_UNLOCK(ic); 1084 1085 cv_signal(&ic->ic_send_cv); 1086 1087 return (SU_OK); 1088 } 1089 1090 static int 1091 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, 1092 int flags) 1093 { 1094 struct mbuf *mb, *newmb; 1095 size_t copylen, off = 0; 1096 1097 KASSERT(len > 0, ("len == 0")); 1098 1099 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1100 if (newmb == NULL) { 1101 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1102 return (ENOMEM); 1103 } 1104 1105 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1106 copylen = min(M_TRAILINGSPACE(mb), len - off); 1107 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1108 mb->m_len = copylen; 1109 off += copylen; 1110 } 1111 KASSERT(off == len, ("%s: off != len", __func__)); 1112 1113 if (request->ip_data_mbuf == NULL) { 1114 request->ip_data_mbuf = newmb; 1115 request->ip_data_len = len; 1116 } else { 1117 m_cat(request->ip_data_mbuf, newmb); 1118 request->ip_data_len += len; 1119 } 1120 1121 return (0); 1122 } 1123 1124 int 1125 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1126 const void *addr, size_t len, int flags) 1127 { 1128 1129 return (icl_pdu_append_data(request, addr, len, flags)); 1130 } 1131 1132 static void 1133 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) 1134 { 1135 1136 m_copydata(ip->ip_data_mbuf, off, len, addr); 1137 } 1138 1139 void 1140 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1141 size_t off, void *addr, size_t len) 1142 { 1143 1144 return (icl_pdu_get_data(ip, off, addr, len)); 1145 } 1146 1147 static void 1148 icl_pdu_queue(struct icl_pdu *ip) 1149 { 1150 struct icl_conn *ic; 1151 1152 ic = ip->ip_conn; 1153 1154 ICL_CONN_LOCK_ASSERT(ic); 1155 1156 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1157 ICL_DEBUG("icl_pdu_queue on closed connection"); 1158 icl_pdu_free(ip); 1159 return; 1160 } 1161 1162 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1163 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1164 /* 1165 * If the queue is not empty, someone else had already 1166 * signaled the send thread; no need to do that again, 1167 * just return. 1168 */ 1169 return; 1170 } 1171 1172 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1173 cv_signal(&ic->ic_send_cv); 1174 } 1175 1176 void 1177 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1178 { 1179 1180 icl_pdu_queue(ip); 1181 } 1182 1183 static struct icl_conn * 1184 icl_soft_new_conn(const char *name, struct mtx *lock) 1185 { 1186 struct icl_conn *ic; 1187 1188 refcount_acquire(&icl_ncons); 1189 1190 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1191 1192 STAILQ_INIT(&ic->ic_to_send); 1193 ic->ic_lock = lock; 1194 cv_init(&ic->ic_send_cv, "icl_tx"); 1195 cv_init(&ic->ic_receive_cv, "icl_rx"); 1196 #ifdef DIAGNOSTIC 1197 refcount_init(&ic->ic_outstanding_pdus, 0); 1198 #endif 1199 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1200 ic->ic_name = name; 1201 ic->ic_offload = "None"; 1202 ic->ic_unmapped = false; 1203 1204 return (ic); 1205 } 1206 1207 void 1208 icl_soft_conn_free(struct icl_conn *ic) 1209 { 1210 1211 cv_destroy(&ic->ic_send_cv); 1212 cv_destroy(&ic->ic_receive_cv); 1213 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1214 refcount_release(&icl_ncons); 1215 } 1216 1217 static int 1218 icl_conn_start(struct icl_conn *ic) 1219 { 1220 size_t minspace; 1221 struct sockopt opt; 1222 int error, one = 1; 1223 1224 ICL_CONN_LOCK(ic); 1225 1226 /* 1227 * XXX: Ugly hack. 1228 */ 1229 if (ic->ic_socket == NULL) { 1230 ICL_CONN_UNLOCK(ic); 1231 return (EINVAL); 1232 } 1233 1234 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1235 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1236 ic->ic_disconnecting = false; 1237 1238 ICL_CONN_UNLOCK(ic); 1239 1240 /* 1241 * For sendspace, this is required because the current code cannot 1242 * send a PDU in pieces; thus, the minimum buffer size is equal 1243 * to the maximum PDU size. "+4" is to account for possible padding. 1244 * 1245 * What we should actually do here is to use autoscaling, but set 1246 * some minimal buffer size to "minspace". I don't know a way to do 1247 * that, though. 1248 */ 1249 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1250 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1251 if (sendspace < minspace) { 1252 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1253 minspace); 1254 sendspace = minspace; 1255 } 1256 if (recvspace < minspace) { 1257 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1258 minspace); 1259 recvspace = minspace; 1260 } 1261 1262 error = soreserve(ic->ic_socket, sendspace, recvspace); 1263 if (error != 0) { 1264 ICL_WARN("soreserve failed with error %d", error); 1265 icl_soft_conn_close(ic); 1266 return (error); 1267 } 1268 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1269 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1270 1271 /* 1272 * Disable Nagle. 1273 */ 1274 bzero(&opt, sizeof(opt)); 1275 opt.sopt_dir = SOPT_SET; 1276 opt.sopt_level = IPPROTO_TCP; 1277 opt.sopt_name = TCP_NODELAY; 1278 opt.sopt_val = &one; 1279 opt.sopt_valsize = sizeof(one); 1280 error = sosetopt(ic->ic_socket, &opt); 1281 if (error != 0) { 1282 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1283 icl_soft_conn_close(ic); 1284 return (error); 1285 } 1286 1287 /* 1288 * Register socket upcall, to get notified about incoming PDUs 1289 * and free space to send outgoing ones. 1290 */ 1291 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1292 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1293 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1294 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1295 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1296 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1297 1298 /* 1299 * Start threads. 1300 */ 1301 ICL_CONN_LOCK(ic); 1302 ic->ic_send_running = ic->ic_receive_running = true; 1303 ICL_CONN_UNLOCK(ic); 1304 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1305 ic->ic_name); 1306 if (error != 0) { 1307 ICL_WARN("kthread_add(9) failed with error %d", error); 1308 ICL_CONN_LOCK(ic); 1309 ic->ic_send_running = ic->ic_receive_running = false; 1310 cv_signal(&ic->ic_send_cv); 1311 ICL_CONN_UNLOCK(ic); 1312 icl_soft_conn_close(ic); 1313 return (error); 1314 } 1315 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1316 ic->ic_name); 1317 if (error != 0) { 1318 ICL_WARN("kthread_add(9) failed with error %d", error); 1319 ICL_CONN_LOCK(ic); 1320 ic->ic_receive_running = false; 1321 cv_signal(&ic->ic_send_cv); 1322 ICL_CONN_UNLOCK(ic); 1323 icl_soft_conn_close(ic); 1324 return (error); 1325 } 1326 1327 return (0); 1328 } 1329 1330 int 1331 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1332 { 1333 struct file *fp; 1334 struct socket *so; 1335 cap_rights_t rights; 1336 int error; 1337 1338 ICL_CONN_LOCK_ASSERT_NOT(ic); 1339 1340 #ifdef ICL_KERNEL_PROXY 1341 /* 1342 * We're transitioning to Full Feature phase, and we don't 1343 * really care. 1344 */ 1345 if (fd == 0) { 1346 ICL_CONN_LOCK(ic); 1347 if (ic->ic_socket == NULL) { 1348 ICL_CONN_UNLOCK(ic); 1349 ICL_WARN("proxy handoff without connect"); 1350 return (EINVAL); 1351 } 1352 ICL_CONN_UNLOCK(ic); 1353 return (0); 1354 } 1355 #endif 1356 1357 /* 1358 * Steal the socket from userland. 1359 */ 1360 error = fget(curthread, fd, 1361 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1362 if (error != 0) 1363 return (error); 1364 if (fp->f_type != DTYPE_SOCKET) { 1365 fdrop(fp, curthread); 1366 return (EINVAL); 1367 } 1368 so = fp->f_data; 1369 if (so->so_type != SOCK_STREAM) { 1370 fdrop(fp, curthread); 1371 return (EINVAL); 1372 } 1373 1374 ICL_CONN_LOCK(ic); 1375 1376 if (ic->ic_socket != NULL) { 1377 ICL_CONN_UNLOCK(ic); 1378 fdrop(fp, curthread); 1379 return (EBUSY); 1380 } 1381 1382 ic->ic_socket = fp->f_data; 1383 fp->f_ops = &badfileops; 1384 fp->f_data = NULL; 1385 fdrop(fp, curthread); 1386 ICL_CONN_UNLOCK(ic); 1387 1388 error = icl_conn_start(ic); 1389 1390 return (error); 1391 } 1392 1393 void 1394 icl_soft_conn_close(struct icl_conn *ic) 1395 { 1396 struct icl_pdu *pdu; 1397 struct socket *so; 1398 1399 ICL_CONN_LOCK(ic); 1400 1401 /* 1402 * Wake up the threads, so they can properly terminate. 1403 */ 1404 ic->ic_disconnecting = true; 1405 while (ic->ic_receive_running || ic->ic_send_running) { 1406 cv_signal(&ic->ic_receive_cv); 1407 cv_signal(&ic->ic_send_cv); 1408 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1409 } 1410 1411 /* Some other thread could close the connection same time. */ 1412 so = ic->ic_socket; 1413 if (so == NULL) { 1414 ICL_CONN_UNLOCK(ic); 1415 return; 1416 } 1417 ic->ic_socket = NULL; 1418 1419 /* 1420 * Deregister socket upcalls. 1421 */ 1422 ICL_CONN_UNLOCK(ic); 1423 SOCKBUF_LOCK(&so->so_snd); 1424 if (so->so_snd.sb_upcall != NULL) 1425 soupcall_clear(so, SO_SND); 1426 SOCKBUF_UNLOCK(&so->so_snd); 1427 SOCKBUF_LOCK(&so->so_rcv); 1428 if (so->so_rcv.sb_upcall != NULL) 1429 soupcall_clear(so, SO_RCV); 1430 SOCKBUF_UNLOCK(&so->so_rcv); 1431 soclose(so); 1432 ICL_CONN_LOCK(ic); 1433 1434 if (ic->ic_receive_pdu != NULL) { 1435 //ICL_DEBUG("freeing partially received PDU"); 1436 icl_pdu_free(ic->ic_receive_pdu); 1437 ic->ic_receive_pdu = NULL; 1438 } 1439 1440 /* 1441 * Remove any outstanding PDUs from the send queue. 1442 */ 1443 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1444 pdu = STAILQ_FIRST(&ic->ic_to_send); 1445 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1446 icl_pdu_free(pdu); 1447 } 1448 1449 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1450 ("destroying session with non-empty send queue")); 1451 #ifdef DIAGNOSTIC 1452 KASSERT(ic->ic_outstanding_pdus == 0, 1453 ("destroying session with %d outstanding PDUs", 1454 ic->ic_outstanding_pdus)); 1455 #endif 1456 ICL_CONN_UNLOCK(ic); 1457 } 1458 1459 int 1460 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1461 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1462 { 1463 1464 return (0); 1465 } 1466 1467 void 1468 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1469 { 1470 } 1471 1472 int 1473 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1474 uint32_t *transfer_tag, void **prvp) 1475 { 1476 1477 return (0); 1478 } 1479 1480 void 1481 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1482 { 1483 } 1484 1485 static int 1486 icl_soft_limits(struct icl_drv_limits *idl) 1487 { 1488 1489 idl->idl_max_recv_data_segment_length = 128 * 1024; 1490 1491 return (0); 1492 } 1493 1494 #ifdef ICL_KERNEL_PROXY 1495 int 1496 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1497 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1498 { 1499 1500 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1501 from_sa, to_sa)); 1502 } 1503 1504 int 1505 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1506 { 1507 int error; 1508 1509 ICL_CONN_LOCK_ASSERT_NOT(ic); 1510 1511 if (so->so_type != SOCK_STREAM) 1512 return (EINVAL); 1513 1514 ICL_CONN_LOCK(ic); 1515 if (ic->ic_socket != NULL) { 1516 ICL_CONN_UNLOCK(ic); 1517 return (EBUSY); 1518 } 1519 ic->ic_socket = so; 1520 ICL_CONN_UNLOCK(ic); 1521 1522 error = icl_conn_start(ic); 1523 1524 return (error); 1525 } 1526 #endif /* ICL_KERNEL_PROXY */ 1527 1528 static int 1529 icl_soft_load(void) 1530 { 1531 int error; 1532 1533 icl_pdu_zone = uma_zcreate("icl_pdu", 1534 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, 1535 UMA_ALIGN_PTR, 0); 1536 refcount_init(&icl_ncons, 0); 1537 1538 /* 1539 * The reason we call this "none" is that to the user, 1540 * it's known as "offload driver"; "offload driver: soft" 1541 * doesn't make much sense. 1542 */ 1543 error = icl_register("none", false, 0, 1544 icl_soft_limits, icl_soft_new_conn); 1545 KASSERT(error == 0, ("failed to register")); 1546 1547 #if defined(ICL_KERNEL_PROXY) && 0 1548 /* 1549 * Debugging aid for kernel proxy functionality. 1550 */ 1551 error = icl_register("proxytest", true, 0, 1552 icl_soft_limits, icl_soft_new_conn); 1553 KASSERT(error == 0, ("failed to register")); 1554 #endif 1555 1556 return (error); 1557 } 1558 1559 static int 1560 icl_soft_unload(void) 1561 { 1562 1563 if (icl_ncons != 0) 1564 return (EBUSY); 1565 1566 icl_unregister("none", false); 1567 #if defined(ICL_KERNEL_PROXY) && 0 1568 icl_unregister("proxytest", true); 1569 #endif 1570 1571 uma_zdestroy(icl_pdu_zone); 1572 1573 return (0); 1574 } 1575 1576 static int 1577 icl_soft_modevent(module_t mod, int what, void *arg) 1578 { 1579 1580 switch (what) { 1581 case MOD_LOAD: 1582 return (icl_soft_load()); 1583 case MOD_UNLOAD: 1584 return (icl_soft_unload()); 1585 default: 1586 return (EINVAL); 1587 } 1588 } 1589 1590 moduledata_t icl_soft_data = { 1591 "icl_soft", 1592 icl_soft_modevent, 1593 0 1594 }; 1595 1596 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1597 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1598 MODULE_VERSION(icl_soft, 1); 1599