1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 /* 32 * Software implementation of iSCSI Common Layer kobj(9) interface. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/capsicum.h> 40 #include <sys/condvar.h> 41 #include <sys/conf.h> 42 #include <sys/file.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/mutex.h> 48 #include <sys/module.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/sysctl.h> 53 #include <sys/systm.h> 54 #include <sys/sx.h> 55 #include <sys/uio.h> 56 #include <vm/uma.h> 57 #include <netinet/in.h> 58 #include <netinet/tcp.h> 59 60 #include <dev/iscsi/icl.h> 61 #include <dev/iscsi/iscsi_proto.h> 62 #include <icl_conn_if.h> 63 64 static int coalesce = 1; 65 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 66 &coalesce, 0, "Try to coalesce PDUs before sending"); 67 static int partial_receive_len = 128 * 1024; 68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 69 &partial_receive_len, 0, "Minimum read size for partially received " 70 "data segment"); 71 static int sendspace = 1048576; 72 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 73 &sendspace, 0, "Default send socket buffer size"); 74 static int recvspace = 1048576; 75 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 76 &recvspace, 0, "Default receive socket buffer size"); 77 78 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 79 static uma_zone_t icl_pdu_zone; 80 81 static volatile u_int icl_ncons; 82 83 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 84 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 85 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 86 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 87 88 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 89 90 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 91 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 92 static icl_conn_pdu_data_segment_length_t 93 icl_soft_conn_pdu_data_segment_length; 94 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 95 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 96 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 97 static icl_conn_handoff_t icl_soft_conn_handoff; 98 static icl_conn_free_t icl_soft_conn_free; 99 static icl_conn_close_t icl_soft_conn_close; 100 static icl_conn_task_setup_t icl_soft_conn_task_setup; 101 static icl_conn_task_done_t icl_soft_conn_task_done; 102 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 103 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 104 #ifdef ICL_KERNEL_PROXY 105 static icl_conn_connect_t icl_soft_conn_connect; 106 #endif 107 108 static kobj_method_t icl_soft_methods[] = { 109 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 110 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 111 KOBJMETHOD(icl_conn_pdu_data_segment_length, 112 icl_soft_conn_pdu_data_segment_length), 113 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 114 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 115 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 116 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 117 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 118 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 119 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 120 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 121 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 122 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 123 #ifdef ICL_KERNEL_PROXY 124 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 125 #endif 126 { 0, 0 } 127 }; 128 129 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 130 131 static void 132 icl_conn_fail(struct icl_conn *ic) 133 { 134 if (ic->ic_socket == NULL) 135 return; 136 137 /* 138 * XXX 139 */ 140 ic->ic_socket->so_error = EDOOFUS; 141 (ic->ic_error)(ic); 142 } 143 144 static struct mbuf * 145 icl_conn_receive(struct icl_conn *ic, size_t len) 146 { 147 struct uio uio; 148 struct socket *so; 149 struct mbuf *m; 150 int error, flags; 151 152 so = ic->ic_socket; 153 154 memset(&uio, 0, sizeof(uio)); 155 uio.uio_resid = len; 156 157 flags = MSG_DONTWAIT; 158 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 159 if (error != 0) { 160 ICL_DEBUG("soreceive error %d", error); 161 return (NULL); 162 } 163 if (uio.uio_resid != 0) { 164 m_freem(m); 165 ICL_DEBUG("short read"); 166 return (NULL); 167 } 168 169 return (m); 170 } 171 172 static struct icl_pdu * 173 icl_pdu_new_empty(struct icl_conn *ic, int flags) 174 { 175 struct icl_pdu *ip; 176 177 #ifdef DIAGNOSTIC 178 refcount_acquire(&ic->ic_outstanding_pdus); 179 #endif 180 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 181 if (ip == NULL) { 182 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 183 #ifdef DIAGNOSTIC 184 refcount_release(&ic->ic_outstanding_pdus); 185 #endif 186 return (NULL); 187 } 188 189 ip->ip_conn = ic; 190 191 return (ip); 192 } 193 194 static void 195 icl_pdu_free(struct icl_pdu *ip) 196 { 197 struct icl_conn *ic; 198 199 ic = ip->ip_conn; 200 201 m_freem(ip->ip_bhs_mbuf); 202 m_freem(ip->ip_ahs_mbuf); 203 m_freem(ip->ip_data_mbuf); 204 uma_zfree(icl_pdu_zone, ip); 205 #ifdef DIAGNOSTIC 206 refcount_release(&ic->ic_outstanding_pdus); 207 #endif 208 } 209 210 void 211 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 212 { 213 214 icl_pdu_free(ip); 215 } 216 217 /* 218 * Allocate icl_pdu with empty BHS to fill up by the caller. 219 */ 220 struct icl_pdu * 221 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 222 { 223 struct icl_pdu *ip; 224 225 ip = icl_pdu_new_empty(ic, flags); 226 if (ip == NULL) 227 return (NULL); 228 229 ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), 230 flags, MT_DATA, M_PKTHDR); 231 if (ip->ip_bhs_mbuf == NULL) { 232 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 233 icl_pdu_free(ip); 234 return (NULL); 235 } 236 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 237 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 238 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 239 240 return (ip); 241 } 242 243 static int 244 icl_pdu_ahs_length(const struct icl_pdu *request) 245 { 246 247 return (request->ip_bhs->bhs_total_ahs_len * 4); 248 } 249 250 static size_t 251 icl_pdu_data_segment_length(const struct icl_pdu *request) 252 { 253 uint32_t len = 0; 254 255 len += request->ip_bhs->bhs_data_segment_len[0]; 256 len <<= 8; 257 len += request->ip_bhs->bhs_data_segment_len[1]; 258 len <<= 8; 259 len += request->ip_bhs->bhs_data_segment_len[2]; 260 261 return (len); 262 } 263 264 size_t 265 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 266 const struct icl_pdu *request) 267 { 268 269 return (icl_pdu_data_segment_length(request)); 270 } 271 272 static void 273 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 274 { 275 276 response->ip_bhs->bhs_data_segment_len[2] = len; 277 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 278 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 279 } 280 281 static size_t 282 icl_pdu_padding(const struct icl_pdu *ip) 283 { 284 285 if ((ip->ip_data_len % 4) != 0) 286 return (4 - (ip->ip_data_len % 4)); 287 288 return (0); 289 } 290 291 static size_t 292 icl_pdu_size(const struct icl_pdu *response) 293 { 294 size_t len; 295 296 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 297 298 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 299 icl_pdu_padding(response); 300 if (response->ip_conn->ic_header_crc32c) 301 len += ISCSI_HEADER_DIGEST_SIZE; 302 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 303 len += ISCSI_DATA_DIGEST_SIZE; 304 305 return (len); 306 } 307 308 static int 309 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 310 { 311 struct mbuf *m; 312 313 m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); 314 if (m == NULL) { 315 ICL_DEBUG("failed to receive BHS"); 316 return (-1); 317 } 318 319 request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); 320 if (request->ip_bhs_mbuf == NULL) { 321 ICL_WARN("m_pullup failed"); 322 return (-1); 323 } 324 request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); 325 326 /* 327 * XXX: For architectures with strict alignment requirements 328 * we may need to allocate ip_bhs and copy the data into it. 329 * For some reason, though, not doing this doesn't seem 330 * to cause problems; tested on sparc64. 331 */ 332 333 *availablep -= sizeof(struct iscsi_bhs); 334 return (0); 335 } 336 337 static int 338 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 339 { 340 341 request->ip_ahs_len = icl_pdu_ahs_length(request); 342 if (request->ip_ahs_len == 0) 343 return (0); 344 345 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 346 request->ip_ahs_len); 347 if (request->ip_ahs_mbuf == NULL) { 348 ICL_DEBUG("failed to receive AHS"); 349 return (-1); 350 } 351 352 *availablep -= request->ip_ahs_len; 353 return (0); 354 } 355 356 static uint32_t 357 icl_mbuf_to_crc32c(const struct mbuf *m0) 358 { 359 uint32_t digest = 0xffffffff; 360 const struct mbuf *m; 361 362 for (m = m0; m != NULL; m = m->m_next) 363 digest = calculate_crc32c(digest, 364 mtod(m, const void *), m->m_len); 365 366 digest = digest ^ 0xffffffff; 367 368 return (digest); 369 } 370 371 static int 372 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 373 { 374 struct mbuf *m; 375 uint32_t received_digest, valid_digest; 376 377 if (request->ip_conn->ic_header_crc32c == false) 378 return (0); 379 380 m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); 381 if (m == NULL) { 382 ICL_DEBUG("failed to receive header digest"); 383 return (-1); 384 } 385 386 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 387 m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); 388 m_freem(m); 389 390 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 391 392 /* 393 * XXX: Handle AHS. 394 */ 395 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 396 if (received_digest != valid_digest) { 397 ICL_WARN("header digest check failed; got 0x%x, " 398 "should be 0x%x", received_digest, valid_digest); 399 return (-1); 400 } 401 402 return (0); 403 } 404 405 /* 406 * Return the number of bytes that should be waiting in the receive socket 407 * before icl_pdu_receive_data_segment() gets called. 408 */ 409 static size_t 410 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 411 { 412 size_t len; 413 414 len = icl_pdu_data_segment_length(request); 415 if (len == 0) 416 return (0); 417 418 /* 419 * Account for the parts of data segment already read from 420 * the socket buffer. 421 */ 422 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 423 len -= request->ip_data_len; 424 425 /* 426 * Don't always wait for the full data segment to be delivered 427 * to the socket; this might badly affect performance due to 428 * TCP window scaling. 429 */ 430 if (len > partial_receive_len) { 431 #if 0 432 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 433 len, partial_receive_len)); 434 #endif 435 len = partial_receive_len; 436 437 return (len); 438 } 439 440 /* 441 * Account for padding. Note that due to the way code is written, 442 * the icl_pdu_receive_data_segment() must always receive padding 443 * along with the last part of data segment, because it would be 444 * impossible to tell whether we've already received the full data 445 * segment including padding, or without it. 446 */ 447 if ((len % 4) != 0) 448 len += 4 - (len % 4); 449 450 #if 0 451 ICL_DEBUG("need %zd bytes of data", len)); 452 #endif 453 454 return (len); 455 } 456 457 static int 458 icl_pdu_receive_data_segment(struct icl_pdu *request, 459 size_t *availablep, bool *more_neededp) 460 { 461 struct icl_conn *ic; 462 size_t len, padding = 0; 463 struct mbuf *m; 464 465 ic = request->ip_conn; 466 467 *more_neededp = false; 468 ic->ic_receive_len = 0; 469 470 len = icl_pdu_data_segment_length(request); 471 if (len == 0) 472 return (0); 473 474 if ((len % 4) != 0) 475 padding = 4 - (len % 4); 476 477 /* 478 * Account for already received parts of data segment. 479 */ 480 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 481 len -= request->ip_data_len; 482 483 if (len + padding > *availablep) { 484 /* 485 * Not enough data in the socket buffer. Receive as much 486 * as we can. Don't receive padding, since, obviously, it's 487 * not the end of data segment yet. 488 */ 489 #if 0 490 ICL_DEBUG("limited from %zd to %zd", 491 len + padding, *availablep - padding)); 492 #endif 493 len = *availablep - padding; 494 *more_neededp = true; 495 padding = 0; 496 } 497 498 /* 499 * Must not try to receive padding without at least one byte 500 * of actual data segment. 501 */ 502 if (len > 0) { 503 m = icl_conn_receive(request->ip_conn, len + padding); 504 if (m == NULL) { 505 ICL_DEBUG("failed to receive data segment"); 506 return (-1); 507 } 508 509 if (request->ip_data_mbuf == NULL) 510 request->ip_data_mbuf = m; 511 else 512 m_cat(request->ip_data_mbuf, m); 513 514 request->ip_data_len += len; 515 *availablep -= len + padding; 516 } else 517 ICL_DEBUG("len 0"); 518 519 if (*more_neededp) 520 ic->ic_receive_len = 521 icl_pdu_data_segment_receive_len(request); 522 523 return (0); 524 } 525 526 static int 527 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 528 { 529 struct mbuf *m; 530 uint32_t received_digest, valid_digest; 531 532 if (request->ip_conn->ic_data_crc32c == false) 533 return (0); 534 535 if (request->ip_data_len == 0) 536 return (0); 537 538 m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); 539 if (m == NULL) { 540 ICL_DEBUG("failed to receive data digest"); 541 return (-1); 542 } 543 544 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 545 m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); 546 m_freem(m); 547 548 *availablep -= ISCSI_DATA_DIGEST_SIZE; 549 550 /* 551 * Note that ip_data_mbuf also contains padding; since digest 552 * calculation is supposed to include that, we iterate over 553 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 554 */ 555 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 556 if (received_digest != valid_digest) { 557 ICL_WARN("data digest check failed; got 0x%x, " 558 "should be 0x%x", received_digest, valid_digest); 559 return (-1); 560 } 561 562 return (0); 563 } 564 565 /* 566 * Somewhat contrary to the name, this attempts to receive only one 567 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 568 */ 569 static struct icl_pdu * 570 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 571 { 572 struct icl_pdu *request; 573 struct socket *so; 574 size_t len; 575 int error; 576 bool more_needed; 577 578 so = ic->ic_socket; 579 580 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 581 KASSERT(ic->ic_receive_pdu == NULL, 582 ("ic->ic_receive_pdu != NULL")); 583 request = icl_pdu_new_empty(ic, M_NOWAIT); 584 if (request == NULL) { 585 ICL_DEBUG("failed to allocate PDU; " 586 "dropping connection"); 587 icl_conn_fail(ic); 588 return (NULL); 589 } 590 ic->ic_receive_pdu = request; 591 } else { 592 KASSERT(ic->ic_receive_pdu != NULL, 593 ("ic->ic_receive_pdu == NULL")); 594 request = ic->ic_receive_pdu; 595 } 596 597 if (*availablep < ic->ic_receive_len) { 598 #if 0 599 ICL_DEBUG("not enough data; need %zd, " 600 "have %zd", ic->ic_receive_len, *availablep); 601 #endif 602 return (NULL); 603 } 604 605 switch (ic->ic_receive_state) { 606 case ICL_CONN_STATE_BHS: 607 //ICL_DEBUG("receiving BHS"); 608 error = icl_pdu_receive_bhs(request, availablep); 609 if (error != 0) { 610 ICL_DEBUG("failed to receive BHS; " 611 "dropping connection"); 612 break; 613 } 614 615 /* 616 * We don't enforce any limit for AHS length; 617 * its length is stored in 8 bit field. 618 */ 619 620 len = icl_pdu_data_segment_length(request); 621 if (len > ic->ic_max_data_segment_length) { 622 ICL_WARN("received data segment " 623 "length %zd is larger than negotiated " 624 "MaxDataSegmentLength %zd; " 625 "dropping connection", 626 len, ic->ic_max_data_segment_length); 627 error = EINVAL; 628 break; 629 } 630 631 ic->ic_receive_state = ICL_CONN_STATE_AHS; 632 ic->ic_receive_len = icl_pdu_ahs_length(request); 633 break; 634 635 case ICL_CONN_STATE_AHS: 636 //ICL_DEBUG("receiving AHS"); 637 error = icl_pdu_receive_ahs(request, availablep); 638 if (error != 0) { 639 ICL_DEBUG("failed to receive AHS; " 640 "dropping connection"); 641 break; 642 } 643 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 644 if (ic->ic_header_crc32c == false) 645 ic->ic_receive_len = 0; 646 else 647 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 648 break; 649 650 case ICL_CONN_STATE_HEADER_DIGEST: 651 //ICL_DEBUG("receiving header digest"); 652 error = icl_pdu_check_header_digest(request, availablep); 653 if (error != 0) { 654 ICL_DEBUG("header digest failed; " 655 "dropping connection"); 656 break; 657 } 658 659 ic->ic_receive_state = ICL_CONN_STATE_DATA; 660 ic->ic_receive_len = 661 icl_pdu_data_segment_receive_len(request); 662 break; 663 664 case ICL_CONN_STATE_DATA: 665 //ICL_DEBUG("receiving data segment"); 666 error = icl_pdu_receive_data_segment(request, availablep, 667 &more_needed); 668 if (error != 0) { 669 ICL_DEBUG("failed to receive data segment;" 670 "dropping connection"); 671 break; 672 } 673 674 if (more_needed) 675 break; 676 677 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 678 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 679 ic->ic_receive_len = 0; 680 else 681 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 682 break; 683 684 case ICL_CONN_STATE_DATA_DIGEST: 685 //ICL_DEBUG("receiving data digest"); 686 error = icl_pdu_check_data_digest(request, availablep); 687 if (error != 0) { 688 ICL_DEBUG("data digest failed; " 689 "dropping connection"); 690 break; 691 } 692 693 /* 694 * We've received complete PDU; reset the receive state machine 695 * and return the PDU. 696 */ 697 ic->ic_receive_state = ICL_CONN_STATE_BHS; 698 ic->ic_receive_len = sizeof(struct iscsi_bhs); 699 ic->ic_receive_pdu = NULL; 700 return (request); 701 702 default: 703 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 704 } 705 706 if (error != 0) { 707 /* 708 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 709 * and will get freed in icl_soft_conn_close(). 710 */ 711 icl_conn_fail(ic); 712 } 713 714 return (NULL); 715 } 716 717 static void 718 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 719 { 720 struct icl_pdu *response; 721 struct socket *so; 722 723 so = ic->ic_socket; 724 725 /* 726 * This can never happen; we're careful to only mess with ic->ic_socket 727 * pointer when the send/receive threads are not running. 728 */ 729 KASSERT(so != NULL, ("NULL socket")); 730 731 for (;;) { 732 if (ic->ic_disconnecting) 733 return; 734 735 if (so->so_error != 0) { 736 ICL_DEBUG("connection error %d; " 737 "dropping connection", so->so_error); 738 icl_conn_fail(ic); 739 return; 740 } 741 742 /* 743 * Loop until we have a complete PDU or there is not enough 744 * data in the socket buffer. 745 */ 746 if (available < ic->ic_receive_len) { 747 #if 0 748 ICL_DEBUG("not enough data; have %zd, " 749 "need %zd", available, 750 ic->ic_receive_len); 751 #endif 752 return; 753 } 754 755 response = icl_conn_receive_pdu(ic, &available); 756 if (response == NULL) 757 continue; 758 759 if (response->ip_ahs_len > 0) { 760 ICL_WARN("received PDU with unsupported " 761 "AHS; opcode 0x%x; dropping connection", 762 response->ip_bhs->bhs_opcode); 763 icl_pdu_free(response); 764 icl_conn_fail(ic); 765 return; 766 } 767 768 (ic->ic_receive)(response); 769 } 770 } 771 772 static void 773 icl_receive_thread(void *arg) 774 { 775 struct icl_conn *ic; 776 size_t available; 777 struct socket *so; 778 779 ic = arg; 780 so = ic->ic_socket; 781 782 for (;;) { 783 if (ic->ic_disconnecting) { 784 //ICL_DEBUG("terminating"); 785 break; 786 } 787 788 /* 789 * Set the low watermark, to be checked by 790 * soreadable() in icl_soupcall_receive() 791 * to avoid unnecessary wakeups until there 792 * is enough data received to read the PDU. 793 */ 794 SOCKBUF_LOCK(&so->so_rcv); 795 available = sbavail(&so->so_rcv); 796 if (available < ic->ic_receive_len) { 797 so->so_rcv.sb_lowat = ic->ic_receive_len; 798 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 799 } else 800 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 801 SOCKBUF_UNLOCK(&so->so_rcv); 802 803 icl_conn_receive_pdus(ic, available); 804 } 805 806 ICL_CONN_LOCK(ic); 807 ic->ic_receive_running = false; 808 cv_signal(&ic->ic_send_cv); 809 ICL_CONN_UNLOCK(ic); 810 kthread_exit(); 811 } 812 813 static int 814 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 815 { 816 struct icl_conn *ic; 817 818 if (!soreadable(so)) 819 return (SU_OK); 820 821 ic = arg; 822 cv_signal(&ic->ic_receive_cv); 823 return (SU_OK); 824 } 825 826 static int 827 icl_pdu_finalize(struct icl_pdu *request) 828 { 829 size_t padding, pdu_len; 830 uint32_t digest, zero = 0; 831 int ok; 832 struct icl_conn *ic; 833 834 ic = request->ip_conn; 835 836 icl_pdu_set_data_segment_length(request, request->ip_data_len); 837 838 pdu_len = icl_pdu_size(request); 839 840 if (ic->ic_header_crc32c) { 841 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 842 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 843 (void *)&digest); 844 if (ok != 1) { 845 ICL_WARN("failed to append header digest"); 846 return (1); 847 } 848 } 849 850 if (request->ip_data_len != 0) { 851 padding = icl_pdu_padding(request); 852 if (padding > 0) { 853 ok = m_append(request->ip_data_mbuf, padding, 854 (void *)&zero); 855 if (ok != 1) { 856 ICL_WARN("failed to append padding"); 857 return (1); 858 } 859 } 860 861 if (ic->ic_data_crc32c) { 862 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 863 864 ok = m_append(request->ip_data_mbuf, sizeof(digest), 865 (void *)&digest); 866 if (ok != 1) { 867 ICL_WARN("failed to append data digest"); 868 return (1); 869 } 870 } 871 872 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 873 request->ip_data_mbuf = NULL; 874 } 875 876 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 877 878 return (0); 879 } 880 881 static void 882 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 883 { 884 struct icl_pdu *request, *request2; 885 struct socket *so; 886 size_t available, size, size2; 887 int coalesced, error; 888 889 ICL_CONN_LOCK_ASSERT_NOT(ic); 890 891 so = ic->ic_socket; 892 893 SOCKBUF_LOCK(&so->so_snd); 894 /* 895 * Check how much space do we have for transmit. We can't just 896 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 897 * as it always frees the mbuf chain passed to it, even in case 898 * of error. 899 */ 900 available = sbspace(&so->so_snd); 901 902 /* 903 * Notify the socket upcall that we don't need wakeups 904 * for the time being. 905 */ 906 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 907 SOCKBUF_UNLOCK(&so->so_snd); 908 909 while (!STAILQ_EMPTY(queue)) { 910 request = STAILQ_FIRST(queue); 911 size = icl_pdu_size(request); 912 if (available < size) { 913 914 /* 915 * Set the low watermark, to be checked by 916 * sowriteable() in icl_soupcall_send() 917 * to avoid unnecessary wakeups until there 918 * is enough space for the PDU to fit. 919 */ 920 SOCKBUF_LOCK(&so->so_snd); 921 available = sbspace(&so->so_snd); 922 if (available < size) { 923 #if 1 924 ICL_DEBUG("no space to send; " 925 "have %zd, need %zd", 926 available, size); 927 #endif 928 so->so_snd.sb_lowat = size; 929 SOCKBUF_UNLOCK(&so->so_snd); 930 return; 931 } 932 SOCKBUF_UNLOCK(&so->so_snd); 933 } 934 STAILQ_REMOVE_HEAD(queue, ip_next); 935 error = icl_pdu_finalize(request); 936 if (error != 0) { 937 ICL_DEBUG("failed to finalize PDU; " 938 "dropping connection"); 939 icl_conn_fail(ic); 940 icl_pdu_free(request); 941 return; 942 } 943 if (coalesce) { 944 coalesced = 1; 945 for (;;) { 946 request2 = STAILQ_FIRST(queue); 947 if (request2 == NULL) 948 break; 949 size2 = icl_pdu_size(request2); 950 if (available < size + size2) 951 break; 952 STAILQ_REMOVE_HEAD(queue, ip_next); 953 error = icl_pdu_finalize(request2); 954 if (error != 0) { 955 ICL_DEBUG("failed to finalize PDU; " 956 "dropping connection"); 957 icl_conn_fail(ic); 958 icl_pdu_free(request); 959 icl_pdu_free(request2); 960 return; 961 } 962 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 963 request2->ip_bhs_mbuf = NULL; 964 request->ip_bhs_mbuf->m_pkthdr.len += size2; 965 size += size2; 966 STAILQ_REMOVE_AFTER(queue, request, ip_next); 967 icl_pdu_free(request2); 968 coalesced++; 969 } 970 #if 0 971 if (coalesced > 1) { 972 ICL_DEBUG("coalesced %d PDUs into %zd bytes", 973 coalesced, size); 974 } 975 #endif 976 } 977 available -= size; 978 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 979 NULL, MSG_DONTWAIT, curthread); 980 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 981 if (error != 0) { 982 ICL_DEBUG("failed to send PDU, error %d; " 983 "dropping connection", error); 984 icl_conn_fail(ic); 985 icl_pdu_free(request); 986 return; 987 } 988 icl_pdu_free(request); 989 } 990 } 991 992 static void 993 icl_send_thread(void *arg) 994 { 995 struct icl_conn *ic; 996 struct icl_pdu_stailq queue; 997 998 ic = arg; 999 1000 STAILQ_INIT(&queue); 1001 1002 ICL_CONN_LOCK(ic); 1003 for (;;) { 1004 for (;;) { 1005 /* 1006 * If the local queue is empty, populate it from 1007 * the main one. This way the icl_conn_send_pdus() 1008 * can go through all the queued PDUs without holding 1009 * any locks. 1010 */ 1011 if (STAILQ_EMPTY(&queue)) 1012 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1013 1014 ic->ic_check_send_space = false; 1015 ICL_CONN_UNLOCK(ic); 1016 icl_conn_send_pdus(ic, &queue); 1017 ICL_CONN_LOCK(ic); 1018 1019 /* 1020 * The icl_soupcall_send() was called since the last 1021 * call to sbspace(); go around; 1022 */ 1023 if (ic->ic_check_send_space) 1024 continue; 1025 1026 /* 1027 * Local queue is empty, but we still have PDUs 1028 * in the main one; go around. 1029 */ 1030 if (STAILQ_EMPTY(&queue) && 1031 !STAILQ_EMPTY(&ic->ic_to_send)) 1032 continue; 1033 1034 /* 1035 * There might be some stuff in the local queue, 1036 * which didn't get sent due to not having enough send 1037 * space. Wait for socket upcall. 1038 */ 1039 break; 1040 } 1041 1042 if (ic->ic_disconnecting) { 1043 //ICL_DEBUG("terminating"); 1044 break; 1045 } 1046 1047 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1048 } 1049 1050 /* 1051 * We're exiting; move PDUs back to the main queue, so they can 1052 * get freed properly. At this point ordering doesn't matter. 1053 */ 1054 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1055 1056 ic->ic_send_running = false; 1057 cv_signal(&ic->ic_send_cv); 1058 ICL_CONN_UNLOCK(ic); 1059 kthread_exit(); 1060 } 1061 1062 static int 1063 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1064 { 1065 struct icl_conn *ic; 1066 1067 if (!sowriteable(so)) 1068 return (SU_OK); 1069 1070 ic = arg; 1071 1072 ICL_CONN_LOCK(ic); 1073 ic->ic_check_send_space = true; 1074 ICL_CONN_UNLOCK(ic); 1075 1076 cv_signal(&ic->ic_send_cv); 1077 1078 return (SU_OK); 1079 } 1080 1081 static int 1082 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, 1083 int flags) 1084 { 1085 struct mbuf *mb, *newmb; 1086 size_t copylen, off = 0; 1087 1088 KASSERT(len > 0, ("len == 0")); 1089 1090 newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); 1091 if (newmb == NULL) { 1092 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1093 return (ENOMEM); 1094 } 1095 1096 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1097 copylen = min(M_TRAILINGSPACE(mb), len - off); 1098 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1099 mb->m_len = copylen; 1100 off += copylen; 1101 } 1102 KASSERT(off == len, ("%s: off != len", __func__)); 1103 1104 if (request->ip_data_mbuf == NULL) { 1105 request->ip_data_mbuf = newmb; 1106 request->ip_data_len = len; 1107 } else { 1108 m_cat(request->ip_data_mbuf, newmb); 1109 request->ip_data_len += len; 1110 } 1111 1112 return (0); 1113 } 1114 1115 int 1116 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1117 const void *addr, size_t len, int flags) 1118 { 1119 1120 return (icl_pdu_append_data(request, addr, len, flags)); 1121 } 1122 1123 static void 1124 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) 1125 { 1126 1127 m_copydata(ip->ip_data_mbuf, off, len, addr); 1128 } 1129 1130 void 1131 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1132 size_t off, void *addr, size_t len) 1133 { 1134 1135 return (icl_pdu_get_data(ip, off, addr, len)); 1136 } 1137 1138 static void 1139 icl_pdu_queue(struct icl_pdu *ip) 1140 { 1141 struct icl_conn *ic; 1142 1143 ic = ip->ip_conn; 1144 1145 ICL_CONN_LOCK_ASSERT(ic); 1146 1147 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1148 ICL_DEBUG("icl_pdu_queue on closed connection"); 1149 icl_pdu_free(ip); 1150 return; 1151 } 1152 1153 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1154 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1155 /* 1156 * If the queue is not empty, someone else had already 1157 * signaled the send thread; no need to do that again, 1158 * just return. 1159 */ 1160 return; 1161 } 1162 1163 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1164 cv_signal(&ic->ic_send_cv); 1165 } 1166 1167 void 1168 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1169 { 1170 1171 icl_pdu_queue(ip); 1172 } 1173 1174 static struct icl_conn * 1175 icl_soft_new_conn(const char *name, struct mtx *lock) 1176 { 1177 struct icl_conn *ic; 1178 1179 refcount_acquire(&icl_ncons); 1180 1181 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1182 1183 STAILQ_INIT(&ic->ic_to_send); 1184 ic->ic_lock = lock; 1185 cv_init(&ic->ic_send_cv, "icl_tx"); 1186 cv_init(&ic->ic_receive_cv, "icl_rx"); 1187 #ifdef DIAGNOSTIC 1188 refcount_init(&ic->ic_outstanding_pdus, 0); 1189 #endif 1190 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1191 ic->ic_name = name; 1192 ic->ic_offload = "None"; 1193 ic->ic_unmapped = false; 1194 1195 return (ic); 1196 } 1197 1198 void 1199 icl_soft_conn_free(struct icl_conn *ic) 1200 { 1201 1202 cv_destroy(&ic->ic_send_cv); 1203 cv_destroy(&ic->ic_receive_cv); 1204 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1205 refcount_release(&icl_ncons); 1206 } 1207 1208 static int 1209 icl_conn_start(struct icl_conn *ic) 1210 { 1211 size_t minspace; 1212 struct sockopt opt; 1213 int error, one = 1; 1214 1215 ICL_CONN_LOCK(ic); 1216 1217 /* 1218 * XXX: Ugly hack. 1219 */ 1220 if (ic->ic_socket == NULL) { 1221 ICL_CONN_UNLOCK(ic); 1222 return (EINVAL); 1223 } 1224 1225 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1226 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1227 ic->ic_disconnecting = false; 1228 1229 ICL_CONN_UNLOCK(ic); 1230 1231 /* 1232 * For sendspace, this is required because the current code cannot 1233 * send a PDU in pieces; thus, the minimum buffer size is equal 1234 * to the maximum PDU size. "+4" is to account for possible padding. 1235 * 1236 * What we should actually do here is to use autoscaling, but set 1237 * some minimal buffer size to "minspace". I don't know a way to do 1238 * that, though. 1239 */ 1240 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1241 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1242 if (sendspace < minspace) { 1243 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1244 minspace); 1245 sendspace = minspace; 1246 } 1247 if (recvspace < minspace) { 1248 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1249 minspace); 1250 recvspace = minspace; 1251 } 1252 1253 error = soreserve(ic->ic_socket, sendspace, recvspace); 1254 if (error != 0) { 1255 ICL_WARN("soreserve failed with error %d", error); 1256 icl_soft_conn_close(ic); 1257 return (error); 1258 } 1259 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1260 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1261 1262 /* 1263 * Disable Nagle. 1264 */ 1265 bzero(&opt, sizeof(opt)); 1266 opt.sopt_dir = SOPT_SET; 1267 opt.sopt_level = IPPROTO_TCP; 1268 opt.sopt_name = TCP_NODELAY; 1269 opt.sopt_val = &one; 1270 opt.sopt_valsize = sizeof(one); 1271 error = sosetopt(ic->ic_socket, &opt); 1272 if (error != 0) { 1273 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1274 icl_soft_conn_close(ic); 1275 return (error); 1276 } 1277 1278 /* 1279 * Register socket upcall, to get notified about incoming PDUs 1280 * and free space to send outgoing ones. 1281 */ 1282 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1283 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1284 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1285 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1286 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1287 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1288 1289 /* 1290 * Start threads. 1291 */ 1292 ICL_CONN_LOCK(ic); 1293 ic->ic_send_running = ic->ic_receive_running = true; 1294 ICL_CONN_UNLOCK(ic); 1295 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1296 ic->ic_name); 1297 if (error != 0) { 1298 ICL_WARN("kthread_add(9) failed with error %d", error); 1299 ICL_CONN_LOCK(ic); 1300 ic->ic_send_running = ic->ic_receive_running = false; 1301 cv_signal(&ic->ic_send_cv); 1302 ICL_CONN_UNLOCK(ic); 1303 icl_soft_conn_close(ic); 1304 return (error); 1305 } 1306 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1307 ic->ic_name); 1308 if (error != 0) { 1309 ICL_WARN("kthread_add(9) failed with error %d", error); 1310 ICL_CONN_LOCK(ic); 1311 ic->ic_receive_running = false; 1312 cv_signal(&ic->ic_send_cv); 1313 ICL_CONN_UNLOCK(ic); 1314 icl_soft_conn_close(ic); 1315 return (error); 1316 } 1317 1318 return (0); 1319 } 1320 1321 int 1322 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1323 { 1324 struct file *fp; 1325 struct socket *so; 1326 cap_rights_t rights; 1327 int error; 1328 1329 ICL_CONN_LOCK_ASSERT_NOT(ic); 1330 1331 #ifdef ICL_KERNEL_PROXY 1332 /* 1333 * We're transitioning to Full Feature phase, and we don't 1334 * really care. 1335 */ 1336 if (fd == 0) { 1337 ICL_CONN_LOCK(ic); 1338 if (ic->ic_socket == NULL) { 1339 ICL_CONN_UNLOCK(ic); 1340 ICL_WARN("proxy handoff without connect"); 1341 return (EINVAL); 1342 } 1343 ICL_CONN_UNLOCK(ic); 1344 return (0); 1345 } 1346 #endif 1347 1348 /* 1349 * Steal the socket from userland. 1350 */ 1351 error = fget(curthread, fd, 1352 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1353 if (error != 0) 1354 return (error); 1355 if (fp->f_type != DTYPE_SOCKET) { 1356 fdrop(fp, curthread); 1357 return (EINVAL); 1358 } 1359 so = fp->f_data; 1360 if (so->so_type != SOCK_STREAM) { 1361 fdrop(fp, curthread); 1362 return (EINVAL); 1363 } 1364 1365 ICL_CONN_LOCK(ic); 1366 1367 if (ic->ic_socket != NULL) { 1368 ICL_CONN_UNLOCK(ic); 1369 fdrop(fp, curthread); 1370 return (EBUSY); 1371 } 1372 1373 ic->ic_socket = fp->f_data; 1374 fp->f_ops = &badfileops; 1375 fp->f_data = NULL; 1376 fdrop(fp, curthread); 1377 ICL_CONN_UNLOCK(ic); 1378 1379 error = icl_conn_start(ic); 1380 1381 return (error); 1382 } 1383 1384 void 1385 icl_soft_conn_close(struct icl_conn *ic) 1386 { 1387 struct icl_pdu *pdu; 1388 struct socket *so; 1389 1390 ICL_CONN_LOCK(ic); 1391 1392 /* 1393 * Wake up the threads, so they can properly terminate. 1394 */ 1395 ic->ic_disconnecting = true; 1396 while (ic->ic_receive_running || ic->ic_send_running) { 1397 cv_signal(&ic->ic_receive_cv); 1398 cv_signal(&ic->ic_send_cv); 1399 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1400 } 1401 1402 /* Some other thread could close the connection same time. */ 1403 so = ic->ic_socket; 1404 if (so == NULL) { 1405 ICL_CONN_UNLOCK(ic); 1406 return; 1407 } 1408 ic->ic_socket = NULL; 1409 1410 /* 1411 * Deregister socket upcalls. 1412 */ 1413 ICL_CONN_UNLOCK(ic); 1414 SOCKBUF_LOCK(&so->so_snd); 1415 if (so->so_snd.sb_upcall != NULL) 1416 soupcall_clear(so, SO_SND); 1417 SOCKBUF_UNLOCK(&so->so_snd); 1418 SOCKBUF_LOCK(&so->so_rcv); 1419 if (so->so_rcv.sb_upcall != NULL) 1420 soupcall_clear(so, SO_RCV); 1421 SOCKBUF_UNLOCK(&so->so_rcv); 1422 soclose(so); 1423 ICL_CONN_LOCK(ic); 1424 1425 if (ic->ic_receive_pdu != NULL) { 1426 //ICL_DEBUG("freeing partially received PDU"); 1427 icl_pdu_free(ic->ic_receive_pdu); 1428 ic->ic_receive_pdu = NULL; 1429 } 1430 1431 /* 1432 * Remove any outstanding PDUs from the send queue. 1433 */ 1434 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1435 pdu = STAILQ_FIRST(&ic->ic_to_send); 1436 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1437 icl_pdu_free(pdu); 1438 } 1439 1440 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1441 ("destroying session with non-empty send queue")); 1442 #ifdef DIAGNOSTIC 1443 KASSERT(ic->ic_outstanding_pdus == 0, 1444 ("destroying session with %d outstanding PDUs", 1445 ic->ic_outstanding_pdus)); 1446 #endif 1447 ICL_CONN_UNLOCK(ic); 1448 } 1449 1450 int 1451 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1452 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1453 { 1454 1455 return (0); 1456 } 1457 1458 void 1459 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1460 { 1461 } 1462 1463 int 1464 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1465 uint32_t *transfer_tag, void **prvp) 1466 { 1467 1468 return (0); 1469 } 1470 1471 void 1472 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1473 { 1474 } 1475 1476 static int 1477 icl_soft_limits(struct icl_drv_limits *idl) 1478 { 1479 1480 idl->idl_max_recv_data_segment_length = 128 * 1024; 1481 1482 return (0); 1483 } 1484 1485 #ifdef ICL_KERNEL_PROXY 1486 int 1487 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1488 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1489 { 1490 1491 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1492 from_sa, to_sa)); 1493 } 1494 1495 int 1496 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1497 { 1498 int error; 1499 1500 ICL_CONN_LOCK_ASSERT_NOT(ic); 1501 1502 if (so->so_type != SOCK_STREAM) 1503 return (EINVAL); 1504 1505 ICL_CONN_LOCK(ic); 1506 if (ic->ic_socket != NULL) { 1507 ICL_CONN_UNLOCK(ic); 1508 return (EBUSY); 1509 } 1510 ic->ic_socket = so; 1511 ICL_CONN_UNLOCK(ic); 1512 1513 error = icl_conn_start(ic); 1514 1515 return (error); 1516 } 1517 #endif /* ICL_KERNEL_PROXY */ 1518 1519 static int 1520 icl_soft_load(void) 1521 { 1522 int error; 1523 1524 icl_pdu_zone = uma_zcreate("icl_pdu", 1525 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, 1526 UMA_ALIGN_PTR, 0); 1527 refcount_init(&icl_ncons, 0); 1528 1529 /* 1530 * The reason we call this "none" is that to the user, 1531 * it's known as "offload driver"; "offload driver: soft" 1532 * doesn't make much sense. 1533 */ 1534 error = icl_register("none", false, 0, 1535 icl_soft_limits, icl_soft_new_conn); 1536 KASSERT(error == 0, ("failed to register")); 1537 1538 #if defined(ICL_KERNEL_PROXY) && 0 1539 /* 1540 * Debugging aid for kernel proxy functionality. 1541 */ 1542 error = icl_register("proxytest", true, 0, 1543 icl_soft_limits, icl_soft_new_conn); 1544 KASSERT(error == 0, ("failed to register")); 1545 #endif 1546 1547 return (error); 1548 } 1549 1550 static int 1551 icl_soft_unload(void) 1552 { 1553 1554 if (icl_ncons != 0) 1555 return (EBUSY); 1556 1557 icl_unregister("none", false); 1558 #if defined(ICL_KERNEL_PROXY) && 0 1559 icl_unregister("proxytest", true); 1560 #endif 1561 1562 uma_zdestroy(icl_pdu_zone); 1563 1564 return (0); 1565 } 1566 1567 static int 1568 icl_soft_modevent(module_t mod, int what, void *arg) 1569 { 1570 1571 switch (what) { 1572 case MOD_LOAD: 1573 return (icl_soft_load()); 1574 case MOD_UNLOAD: 1575 return (icl_soft_unload()); 1576 default: 1577 return (EINVAL); 1578 } 1579 } 1580 1581 moduledata_t icl_soft_data = { 1582 "icl_soft", 1583 icl_soft_modevent, 1584 0 1585 }; 1586 1587 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1588 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1589 MODULE_VERSION(icl_soft, 1); 1590