1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/condvar.h> 42 #include <sys/conf.h> 43 #include <sys/gsb_crc32.h> 44 #include <sys/file.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mbuf.h> 49 #include <sys/mutex.h> 50 #include <sys/module.h> 51 #include <sys/protosw.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/sysctl.h> 55 #include <sys/systm.h> 56 #include <sys/sx.h> 57 #include <sys/uio.h> 58 #include <vm/uma.h> 59 #include <netinet/in.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/iscsi/icl.h> 63 #include <dev/iscsi/iscsi_proto.h> 64 #include <icl_conn_if.h> 65 66 #define ICL_CONN_STATE_BHS 1 67 #define ICL_CONN_STATE_AHS 2 68 #define ICL_CONN_STATE_HEADER_DIGEST 3 69 #define ICL_CONN_STATE_DATA 4 70 #define ICL_CONN_STATE_DATA_DIGEST 5 71 72 struct icl_soft_conn { 73 struct icl_conn ic; 74 75 /* soft specific stuff goes here. */ 76 STAILQ_HEAD(, icl_pdu) to_send; 77 struct cv send_cv; 78 struct cv receive_cv; 79 struct icl_pdu *receive_pdu; 80 size_t receive_len; 81 int receive_state; 82 bool receive_running; 83 bool check_send_space; 84 bool send_running; 85 }; 86 87 struct icl_soft_pdu { 88 struct icl_pdu ip; 89 90 /* soft specific stuff goes here. */ 91 u_int ref_cnt; 92 icl_pdu_cb cb; 93 int error; 94 }; 95 96 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 97 "Software iSCSI"); 98 static int coalesce = 1; 99 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 100 &coalesce, 0, "Try to coalesce PDUs before sending"); 101 static int partial_receive_len = 256 * 1024; 102 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 103 &partial_receive_len, 0, "Minimum read size for partially received " 104 "data segment"); 105 static int max_data_segment_length = 256 * 1024; 106 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 107 &max_data_segment_length, 0, "Maximum data segment length"); 108 static int first_burst_length = 1024 * 1024; 109 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 110 &first_burst_length, 0, "First burst length"); 111 static int max_burst_length = 1024 * 1024; 112 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 113 &max_burst_length, 0, "Maximum burst length"); 114 static int sendspace = 1536 * 1024; 115 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 116 &sendspace, 0, "Default send socket buffer size"); 117 static int recvspace = 1536 * 1024; 118 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 119 &recvspace, 0, "Default receive socket buffer size"); 120 121 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 122 static uma_zone_t icl_soft_pdu_zone; 123 124 static volatile u_int icl_ncons; 125 126 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 127 128 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 129 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 130 static icl_conn_pdu_data_segment_length_t 131 icl_soft_conn_pdu_data_segment_length; 132 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 133 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 134 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 135 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 136 static icl_conn_handoff_t icl_soft_conn_handoff; 137 static icl_conn_free_t icl_soft_conn_free; 138 static icl_conn_close_t icl_soft_conn_close; 139 static icl_conn_task_setup_t icl_soft_conn_task_setup; 140 static icl_conn_task_done_t icl_soft_conn_task_done; 141 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 142 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 143 #ifdef ICL_KERNEL_PROXY 144 static icl_conn_connect_t icl_soft_conn_connect; 145 #endif 146 147 static kobj_method_t icl_soft_methods[] = { 148 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 149 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 150 KOBJMETHOD(icl_conn_pdu_data_segment_length, 151 icl_soft_conn_pdu_data_segment_length), 152 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 153 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 154 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 155 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 156 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 157 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 158 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 159 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 160 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 161 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 162 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 163 #ifdef ICL_KERNEL_PROXY 164 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 165 #endif 166 { 0, 0 } 167 }; 168 169 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); 170 171 static void 172 icl_conn_fail(struct icl_conn *ic) 173 { 174 if (ic->ic_socket == NULL) 175 return; 176 177 /* 178 * XXX 179 */ 180 ic->ic_socket->so_error = EDOOFUS; 181 (ic->ic_error)(ic); 182 } 183 184 static void 185 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 186 { 187 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 188 189 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 190 m_freem(ip->ip_bhs_mbuf); 191 m_freem(ip->ip_ahs_mbuf); 192 m_freem(ip->ip_data_mbuf); 193 uma_zfree(icl_soft_pdu_zone, isp); 194 #ifdef DIAGNOSTIC 195 refcount_release(&ic->ic_outstanding_pdus); 196 #endif 197 } 198 199 static void 200 icl_soft_pdu_call_cb(struct icl_pdu *ip) 201 { 202 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 203 204 if (isp->cb != NULL) 205 isp->cb(ip, isp->error); 206 #ifdef DIAGNOSTIC 207 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 208 #endif 209 uma_zfree(icl_soft_pdu_zone, isp); 210 } 211 212 static void 213 icl_soft_pdu_done(struct icl_pdu *ip, int error) 214 { 215 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 216 217 if (error != 0) 218 isp->error = error; 219 220 m_freem(ip->ip_bhs_mbuf); 221 ip->ip_bhs_mbuf = NULL; 222 m_freem(ip->ip_ahs_mbuf); 223 ip->ip_ahs_mbuf = NULL; 224 m_freem(ip->ip_data_mbuf); 225 ip->ip_data_mbuf = NULL; 226 227 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 228 icl_soft_pdu_call_cb(ip); 229 } 230 231 static void 232 icl_soft_mbuf_done(struct mbuf *mb) 233 { 234 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 235 236 icl_soft_pdu_call_cb(&isp->ip); 237 } 238 239 /* 240 * Allocate icl_pdu with empty BHS to fill up by the caller. 241 */ 242 struct icl_pdu * 243 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 244 { 245 struct icl_soft_pdu *isp; 246 struct icl_pdu *ip; 247 248 #ifdef DIAGNOSTIC 249 refcount_acquire(&ic->ic_outstanding_pdus); 250 #endif 251 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 252 if (isp == NULL) { 253 ICL_WARN("failed to allocate soft PDU"); 254 #ifdef DIAGNOSTIC 255 refcount_release(&ic->ic_outstanding_pdus); 256 #endif 257 return (NULL); 258 } 259 ip = &isp->ip; 260 ip->ip_conn = ic; 261 262 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 263 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 264 if (ip->ip_bhs_mbuf == NULL) { 265 ICL_WARN("failed to allocate BHS mbuf"); 266 icl_soft_conn_pdu_free(ic, ip); 267 return (NULL); 268 } 269 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 270 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 271 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 272 273 return (ip); 274 } 275 276 static int 277 icl_pdu_ahs_length(const struct icl_pdu *request) 278 { 279 280 return (request->ip_bhs->bhs_total_ahs_len * 4); 281 } 282 283 static size_t 284 icl_pdu_data_segment_length(const struct icl_pdu *request) 285 { 286 uint32_t len = 0; 287 288 len += request->ip_bhs->bhs_data_segment_len[0]; 289 len <<= 8; 290 len += request->ip_bhs->bhs_data_segment_len[1]; 291 len <<= 8; 292 len += request->ip_bhs->bhs_data_segment_len[2]; 293 294 return (len); 295 } 296 297 size_t 298 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 299 const struct icl_pdu *request) 300 { 301 302 return (icl_pdu_data_segment_length(request)); 303 } 304 305 static void 306 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 307 { 308 309 response->ip_bhs->bhs_data_segment_len[2] = len; 310 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 311 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 312 } 313 314 static size_t 315 icl_pdu_padding(const struct icl_pdu *ip) 316 { 317 318 if ((ip->ip_data_len % 4) != 0) 319 return (4 - (ip->ip_data_len % 4)); 320 321 return (0); 322 } 323 324 static size_t 325 icl_pdu_size(const struct icl_pdu *response) 326 { 327 size_t len; 328 329 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 330 331 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 332 icl_pdu_padding(response); 333 if (response->ip_conn->ic_header_crc32c) 334 len += ISCSI_HEADER_DIGEST_SIZE; 335 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 336 len += ISCSI_DATA_DIGEST_SIZE; 337 338 return (len); 339 } 340 341 static void 342 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) 343 { 344 345 m_copydata(*r, 0, s, buf); 346 m_adj(*r, s); 347 while ((*r) != NULL && (*r)->m_len == 0) 348 *r = m_free(*r); 349 *rs -= s; 350 } 351 352 static void 353 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) 354 { 355 356 request->ip_ahs_len = icl_pdu_ahs_length(request); 357 if (request->ip_ahs_len == 0) 358 return; 359 360 request->ip_ahs_mbuf = *r; 361 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); 362 *rs -= request->ip_ahs_len; 363 } 364 365 static uint32_t 366 icl_mbuf_to_crc32c(const struct mbuf *m0) 367 { 368 uint32_t digest = 0xffffffff; 369 const struct mbuf *m; 370 371 for (m = m0; m != NULL; m = m->m_next) 372 digest = calculate_crc32c(digest, 373 mtod(m, const void *), m->m_len); 374 375 digest = digest ^ 0xffffffff; 376 377 return (digest); 378 } 379 380 static int 381 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 382 { 383 uint32_t received_digest, valid_digest; 384 385 if (request->ip_conn->ic_header_crc32c == false) 386 return (0); 387 388 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 389 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); 390 391 /* Temporary attach AHS to BHS to calculate header digest. */ 392 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 393 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 394 request->ip_bhs_mbuf->m_next = NULL; 395 if (received_digest != valid_digest) { 396 ICL_WARN("header digest check failed; got 0x%x, " 397 "should be 0x%x", received_digest, valid_digest); 398 return (-1); 399 } 400 401 return (0); 402 } 403 404 /* 405 * Return the number of bytes that should be waiting in the receive socket 406 * before icl_pdu_receive_data_segment() gets called. 407 */ 408 static size_t 409 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 410 { 411 size_t len; 412 413 len = icl_pdu_data_segment_length(request); 414 if (len == 0) 415 return (0); 416 417 /* 418 * Account for the parts of data segment already read from 419 * the socket buffer. 420 */ 421 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 422 len -= request->ip_data_len; 423 424 /* 425 * Don't always wait for the full data segment to be delivered 426 * to the socket; this might badly affect performance due to 427 * TCP window scaling. 428 */ 429 if (len > partial_receive_len) { 430 #if 0 431 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 432 len, partial_receive_len)); 433 #endif 434 len = partial_receive_len; 435 436 return (len); 437 } 438 439 /* 440 * Account for padding. Note that due to the way code is written, 441 * the icl_pdu_receive_data_segment() must always receive padding 442 * along with the last part of data segment, because it would be 443 * impossible to tell whether we've already received the full data 444 * segment including padding, or without it. 445 */ 446 if ((len % 4) != 0) 447 len += 4 - (len % 4); 448 449 #if 0 450 ICL_DEBUG("need %zd bytes of data", len)); 451 #endif 452 453 return (len); 454 } 455 456 static int 457 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, 458 size_t *rs, bool *more_neededp) 459 { 460 struct icl_soft_conn *isc; 461 size_t len, padding = 0; 462 struct mbuf *m; 463 464 isc = (struct icl_soft_conn *)request->ip_conn; 465 466 *more_neededp = false; 467 isc->receive_len = 0; 468 469 len = icl_pdu_data_segment_length(request); 470 if (len == 0) 471 return (0); 472 473 if ((len % 4) != 0) 474 padding = 4 - (len % 4); 475 476 /* 477 * Account for already received parts of data segment. 478 */ 479 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 480 len -= request->ip_data_len; 481 482 if (len + padding > *rs) { 483 /* 484 * Not enough data in the socket buffer. Receive as much 485 * as we can. Don't receive padding, since, obviously, it's 486 * not the end of data segment yet. 487 */ 488 #if 0 489 ICL_DEBUG("limited from %zd to %zd", 490 len + padding, *rs - padding)); 491 #endif 492 len = *rs - padding; 493 *more_neededp = true; 494 padding = 0; 495 } 496 497 /* 498 * Must not try to receive padding without at least one byte 499 * of actual data segment. 500 */ 501 if (len > 0) { 502 m = *r; 503 *r = m_split(m, len + padding, M_WAITOK); 504 *rs -= len + padding; 505 506 if (request->ip_data_mbuf == NULL) 507 request->ip_data_mbuf = m; 508 else 509 m_cat(request->ip_data_mbuf, m); 510 511 request->ip_data_len += len; 512 } else 513 ICL_DEBUG("len 0"); 514 515 if (*more_neededp) 516 isc->receive_len = icl_pdu_data_segment_receive_len(request); 517 518 return (0); 519 } 520 521 static int 522 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 523 { 524 uint32_t received_digest, valid_digest; 525 526 if (request->ip_conn->ic_data_crc32c == false) 527 return (0); 528 529 if (request->ip_data_len == 0) 530 return (0); 531 532 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 533 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); 534 535 /* 536 * Note that ip_data_mbuf also contains padding; since digest 537 * calculation is supposed to include that, we iterate over 538 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 539 */ 540 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 541 if (received_digest != valid_digest) { 542 ICL_WARN("data digest check failed; got 0x%x, " 543 "should be 0x%x", received_digest, valid_digest); 544 return (-1); 545 } 546 547 return (0); 548 } 549 550 /* 551 * Somewhat contrary to the name, this attempts to receive only one 552 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 553 */ 554 static struct icl_pdu * 555 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 556 { 557 struct icl_conn *ic = &isc->ic; 558 struct icl_pdu *request; 559 size_t len; 560 int error = 0; 561 bool more_needed; 562 563 if (isc->receive_state == ICL_CONN_STATE_BHS) { 564 KASSERT(isc->receive_pdu == NULL, 565 ("isc->receive_pdu != NULL")); 566 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 567 if (request == NULL) { 568 ICL_DEBUG("failed to allocate PDU; " 569 "dropping connection"); 570 icl_conn_fail(ic); 571 return (NULL); 572 } 573 isc->receive_pdu = request; 574 } else { 575 KASSERT(isc->receive_pdu != NULL, 576 ("isc->receive_pdu == NULL")); 577 request = isc->receive_pdu; 578 } 579 580 switch (isc->receive_state) { 581 case ICL_CONN_STATE_BHS: 582 //ICL_DEBUG("receiving BHS"); 583 icl_soft_receive_buf(r, rs, request->ip_bhs, 584 sizeof(struct iscsi_bhs)); 585 586 /* 587 * We don't enforce any limit for AHS length; 588 * its length is stored in 8 bit field. 589 */ 590 591 len = icl_pdu_data_segment_length(request); 592 if (len > ic->ic_max_recv_data_segment_length) { 593 ICL_WARN("received data segment " 594 "length %zd is larger than negotiated; " 595 "dropping connection", len); 596 error = EINVAL; 597 break; 598 } 599 600 isc->receive_state = ICL_CONN_STATE_AHS; 601 isc->receive_len = icl_pdu_ahs_length(request); 602 break; 603 604 case ICL_CONN_STATE_AHS: 605 //ICL_DEBUG("receiving AHS"); 606 icl_pdu_receive_ahs(request, r, rs); 607 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; 608 if (ic->ic_header_crc32c == false) 609 isc->receive_len = 0; 610 else 611 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; 612 break; 613 614 case ICL_CONN_STATE_HEADER_DIGEST: 615 //ICL_DEBUG("receiving header digest"); 616 error = icl_pdu_check_header_digest(request, r, rs); 617 if (error != 0) { 618 ICL_DEBUG("header digest failed; " 619 "dropping connection"); 620 break; 621 } 622 623 isc->receive_state = ICL_CONN_STATE_DATA; 624 isc->receive_len = icl_pdu_data_segment_receive_len(request); 625 break; 626 627 case ICL_CONN_STATE_DATA: 628 //ICL_DEBUG("receiving data segment"); 629 error = icl_pdu_receive_data_segment(request, r, rs, 630 &more_needed); 631 if (error != 0) { 632 ICL_DEBUG("failed to receive data segment;" 633 "dropping connection"); 634 break; 635 } 636 637 if (more_needed) 638 break; 639 640 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; 641 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 642 isc->receive_len = 0; 643 else 644 isc->receive_len = ISCSI_DATA_DIGEST_SIZE; 645 break; 646 647 case ICL_CONN_STATE_DATA_DIGEST: 648 //ICL_DEBUG("receiving data digest"); 649 error = icl_pdu_check_data_digest(request, r, rs); 650 if (error != 0) { 651 ICL_DEBUG("data digest failed; " 652 "dropping connection"); 653 break; 654 } 655 656 /* 657 * We've received complete PDU; reset the receive state machine 658 * and return the PDU. 659 */ 660 isc->receive_state = ICL_CONN_STATE_BHS; 661 isc->receive_len = sizeof(struct iscsi_bhs); 662 isc->receive_pdu = NULL; 663 return (request); 664 665 default: 666 panic("invalid receive_state %d\n", isc->receive_state); 667 } 668 669 if (error != 0) { 670 /* 671 * Don't free the PDU; it's pointed to by isc->receive_pdu 672 * and will get freed in icl_soft_conn_close(). 673 */ 674 icl_conn_fail(ic); 675 } 676 677 return (NULL); 678 } 679 680 static void 681 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 682 { 683 struct icl_conn *ic = &isc->ic; 684 struct icl_pdu *response; 685 686 for (;;) { 687 if (ic->ic_disconnecting) 688 return; 689 690 /* 691 * Loop until we have a complete PDU or there is not enough 692 * data in the socket buffer. 693 */ 694 if (*rs < isc->receive_len) { 695 #if 0 696 ICL_DEBUG("not enough data; have %zd, need %zd", 697 *rs, isc->receive_len); 698 #endif 699 return; 700 } 701 702 response = icl_conn_receive_pdu(isc, r, rs); 703 if (response == NULL) 704 continue; 705 706 if (response->ip_ahs_len > 0) { 707 ICL_WARN("received PDU with unsupported " 708 "AHS; opcode 0x%x; dropping connection", 709 response->ip_bhs->bhs_opcode); 710 icl_soft_conn_pdu_free(ic, response); 711 icl_conn_fail(ic); 712 return; 713 } 714 715 (ic->ic_receive)(response); 716 } 717 } 718 719 static void 720 icl_receive_thread(void *arg) 721 { 722 struct icl_soft_conn *isc = arg; 723 struct icl_conn *ic = &isc->ic; 724 size_t available, read = 0; 725 struct socket *so; 726 struct mbuf *m, *r = NULL; 727 struct uio uio; 728 int error, flags; 729 730 so = ic->ic_socket; 731 732 for (;;) { 733 SOCKBUF_LOCK(&so->so_rcv); 734 if (ic->ic_disconnecting) { 735 SOCKBUF_UNLOCK(&so->so_rcv); 736 break; 737 } 738 739 /* 740 * Set the low watermark, to be checked by 741 * soreadable() in icl_soupcall_receive() 742 * to avoid unnecessary wakeups until there 743 * is enough data received to read the PDU. 744 */ 745 available = sbavail(&so->so_rcv); 746 if (read + available < isc->receive_len) { 747 so->so_rcv.sb_lowat = isc->receive_len - read; 748 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); 749 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 750 available = sbavail(&so->so_rcv); 751 } 752 SOCKBUF_UNLOCK(&so->so_rcv); 753 754 if (available == 0) { 755 if (so->so_error != 0) { 756 ICL_DEBUG("connection error %d; " 757 "dropping connection", so->so_error); 758 icl_conn_fail(ic); 759 break; 760 } 761 continue; 762 } 763 764 memset(&uio, 0, sizeof(uio)); 765 uio.uio_resid = available; 766 flags = MSG_DONTWAIT; 767 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 768 if (error != 0) { 769 ICL_DEBUG("soreceive error %d", error); 770 break; 771 } 772 if (uio.uio_resid != 0) { 773 m_freem(m); 774 ICL_DEBUG("short read"); 775 break; 776 } 777 if (r) 778 m_cat(r, m); 779 else 780 r = m; 781 read += available; 782 783 icl_conn_receive_pdus(isc, &r, &read); 784 } 785 786 if (r) 787 m_freem(r); 788 789 ICL_CONN_LOCK(ic); 790 isc->receive_running = false; 791 cv_signal(&isc->send_cv); 792 ICL_CONN_UNLOCK(ic); 793 kthread_exit(); 794 } 795 796 static int 797 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 798 { 799 struct icl_soft_conn *isc; 800 801 if (!soreadable(so)) 802 return (SU_OK); 803 804 isc = arg; 805 cv_signal(&isc->receive_cv); 806 return (SU_OK); 807 } 808 809 static int 810 icl_pdu_finalize(struct icl_pdu *request) 811 { 812 size_t padding, pdu_len; 813 uint32_t digest, zero = 0; 814 int ok; 815 struct icl_conn *ic; 816 817 ic = request->ip_conn; 818 819 icl_pdu_set_data_segment_length(request, request->ip_data_len); 820 821 pdu_len = icl_pdu_size(request); 822 823 if (ic->ic_header_crc32c) { 824 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 825 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 826 (void *)&digest); 827 if (ok != 1) { 828 ICL_WARN("failed to append header digest"); 829 return (1); 830 } 831 } 832 833 if (request->ip_data_len != 0) { 834 padding = icl_pdu_padding(request); 835 if (padding > 0) { 836 ok = m_append(request->ip_data_mbuf, padding, 837 (void *)&zero); 838 if (ok != 1) { 839 ICL_WARN("failed to append padding"); 840 return (1); 841 } 842 } 843 844 if (ic->ic_data_crc32c) { 845 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 846 847 ok = m_append(request->ip_data_mbuf, sizeof(digest), 848 (void *)&digest); 849 if (ok != 1) { 850 ICL_WARN("failed to append data digest"); 851 return (1); 852 } 853 } 854 855 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 856 request->ip_data_mbuf = NULL; 857 } 858 859 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 860 861 return (0); 862 } 863 864 static void 865 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) 866 { 867 struct icl_conn *ic = &isc->ic; 868 struct icl_pdu *request, *request2; 869 struct mbuf *m; 870 struct socket *so; 871 long available, size, size2; 872 int coalesced, error; 873 874 ICL_CONN_LOCK_ASSERT_NOT(ic); 875 876 so = ic->ic_socket; 877 878 SOCKBUF_LOCK(&so->so_snd); 879 /* 880 * Check how much space do we have for transmit. We can't just 881 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 882 * as it always frees the mbuf chain passed to it, even in case 883 * of error. 884 */ 885 available = sbspace(&so->so_snd); 886 isc->check_send_space = false; 887 888 /* 889 * Notify the socket upcall that we don't need wakeups 890 * for the time being. 891 */ 892 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 893 SOCKBUF_UNLOCK(&so->so_snd); 894 895 while (!STAILQ_EMPTY(queue)) { 896 request = STAILQ_FIRST(queue); 897 size = icl_pdu_size(request); 898 if (available < size) { 899 /* 900 * Set the low watermark, to be checked by 901 * sowriteable() in icl_soupcall_send() 902 * to avoid unnecessary wakeups until there 903 * is enough space for the PDU to fit. 904 */ 905 SOCKBUF_LOCK(&so->so_snd); 906 available = sbspace(&so->so_snd); 907 if (available < size) { 908 #if 1 909 ICL_DEBUG("no space to send; " 910 "have %ld, need %ld", 911 available, size); 912 #endif 913 so->so_snd.sb_lowat = max(size, 914 so->so_snd.sb_hiwat / 8); 915 SOCKBUF_UNLOCK(&so->so_snd); 916 return; 917 } 918 SOCKBUF_UNLOCK(&so->so_snd); 919 } 920 STAILQ_REMOVE_HEAD(queue, ip_next); 921 error = icl_pdu_finalize(request); 922 if (error != 0) { 923 ICL_DEBUG("failed to finalize PDU; " 924 "dropping connection"); 925 icl_soft_pdu_done(request, EIO); 926 icl_conn_fail(ic); 927 return; 928 } 929 if (coalesce) { 930 m = request->ip_bhs_mbuf; 931 for (coalesced = 1; ; coalesced++) { 932 request2 = STAILQ_FIRST(queue); 933 if (request2 == NULL) 934 break; 935 size2 = icl_pdu_size(request2); 936 if (available < size + size2) 937 break; 938 STAILQ_REMOVE_HEAD(queue, ip_next); 939 error = icl_pdu_finalize(request2); 940 if (error != 0) { 941 ICL_DEBUG("failed to finalize PDU; " 942 "dropping connection"); 943 icl_soft_pdu_done(request, EIO); 944 icl_soft_pdu_done(request2, EIO); 945 icl_conn_fail(ic); 946 return; 947 } 948 while (m->m_next) 949 m = m->m_next; 950 m_cat(m, request2->ip_bhs_mbuf); 951 request2->ip_bhs_mbuf = NULL; 952 request->ip_bhs_mbuf->m_pkthdr.len += size2; 953 size += size2; 954 icl_soft_pdu_done(request2, 0); 955 } 956 #if 0 957 if (coalesced > 1) { 958 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 959 coalesced, size); 960 } 961 #endif 962 } 963 available -= size; 964 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 965 NULL, MSG_DONTWAIT, curthread); 966 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 967 if (error != 0) { 968 ICL_DEBUG("failed to send PDU, error %d; " 969 "dropping connection", error); 970 icl_soft_pdu_done(request, error); 971 icl_conn_fail(ic); 972 return; 973 } 974 icl_soft_pdu_done(request, 0); 975 } 976 } 977 978 static void 979 icl_send_thread(void *arg) 980 { 981 struct icl_soft_conn *isc; 982 struct icl_conn *ic; 983 struct icl_pdu_stailq queue; 984 985 isc = arg; 986 ic = &isc->ic; 987 988 STAILQ_INIT(&queue); 989 990 ICL_CONN_LOCK(ic); 991 for (;;) { 992 for (;;) { 993 /* 994 * Populate the local queue from the main one. 995 * This way the icl_conn_send_pdus() can go through 996 * all the queued PDUs without holding any locks. 997 */ 998 if (STAILQ_EMPTY(&queue) || isc->check_send_space) 999 STAILQ_CONCAT(&queue, &isc->to_send); 1000 1001 ICL_CONN_UNLOCK(ic); 1002 icl_conn_send_pdus(isc, &queue); 1003 ICL_CONN_LOCK(ic); 1004 1005 /* 1006 * The icl_soupcall_send() was called since the last 1007 * call to sbspace(); go around; 1008 */ 1009 if (isc->check_send_space) 1010 continue; 1011 1012 /* 1013 * Local queue is empty, but we still have PDUs 1014 * in the main one; go around. 1015 */ 1016 if (STAILQ_EMPTY(&queue) && 1017 !STAILQ_EMPTY(&isc->to_send)) 1018 continue; 1019 1020 /* 1021 * There might be some stuff in the local queue, 1022 * which didn't get sent due to not having enough send 1023 * space. Wait for socket upcall. 1024 */ 1025 break; 1026 } 1027 1028 if (ic->ic_disconnecting) { 1029 //ICL_DEBUG("terminating"); 1030 break; 1031 } 1032 1033 cv_wait(&isc->send_cv, ic->ic_lock); 1034 } 1035 1036 /* 1037 * We're exiting; move PDUs back to the main queue, so they can 1038 * get freed properly. At this point ordering doesn't matter. 1039 */ 1040 STAILQ_CONCAT(&isc->to_send, &queue); 1041 1042 isc->send_running = false; 1043 cv_signal(&isc->send_cv); 1044 ICL_CONN_UNLOCK(ic); 1045 kthread_exit(); 1046 } 1047 1048 static int 1049 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1050 { 1051 struct icl_soft_conn *isc; 1052 struct icl_conn *ic; 1053 1054 if (!sowriteable(so)) 1055 return (SU_OK); 1056 1057 isc = arg; 1058 ic = &isc->ic; 1059 1060 ICL_CONN_LOCK(ic); 1061 isc->check_send_space = true; 1062 ICL_CONN_UNLOCK(ic); 1063 1064 cv_signal(&isc->send_cv); 1065 1066 return (SU_OK); 1067 } 1068 1069 static int 1070 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1071 const void *addr, size_t len, int flags) 1072 { 1073 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1074 struct mbuf *mb, *newmb; 1075 size_t copylen, off = 0; 1076 1077 KASSERT(len > 0, ("len == 0")); 1078 1079 if (flags & ICL_NOCOPY) { 1080 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1081 if (newmb == NULL) { 1082 ICL_WARN("failed to allocate mbuf"); 1083 return (ENOMEM); 1084 } 1085 1086 newmb->m_flags |= M_RDONLY; 1087 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1088 icl_soft_mbuf_done, isp, NULL); 1089 newmb->m_len = len; 1090 } else { 1091 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1092 if (newmb == NULL) { 1093 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1094 return (ENOMEM); 1095 } 1096 1097 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1098 copylen = min(M_TRAILINGSPACE(mb), len - off); 1099 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1100 mb->m_len = copylen; 1101 off += copylen; 1102 } 1103 KASSERT(off == len, ("%s: off != len", __func__)); 1104 } 1105 1106 if (request->ip_data_mbuf == NULL) { 1107 request->ip_data_mbuf = newmb; 1108 request->ip_data_len = len; 1109 } else { 1110 m_cat(request->ip_data_mbuf, newmb); 1111 request->ip_data_len += len; 1112 } 1113 1114 return (0); 1115 } 1116 1117 void 1118 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1119 size_t off, void *addr, size_t len) 1120 { 1121 1122 m_copydata(ip->ip_data_mbuf, off, len, addr); 1123 } 1124 1125 static void 1126 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1127 { 1128 1129 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1130 } 1131 1132 static void 1133 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1134 icl_pdu_cb cb) 1135 { 1136 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1137 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1138 1139 ICL_CONN_LOCK_ASSERT(ic); 1140 isp->ref_cnt++; 1141 isp->cb = cb; 1142 1143 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1144 ICL_DEBUG("icl_pdu_queue on closed connection"); 1145 icl_soft_pdu_done(ip, ENOTCONN); 1146 return; 1147 } 1148 1149 if (!STAILQ_EMPTY(&isc->to_send)) { 1150 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1151 /* 1152 * If the queue is not empty, someone else had already 1153 * signaled the send thread; no need to do that again, 1154 * just return. 1155 */ 1156 return; 1157 } 1158 1159 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1160 cv_signal(&isc->send_cv); 1161 } 1162 1163 static struct icl_conn * 1164 icl_soft_new_conn(const char *name, struct mtx *lock) 1165 { 1166 struct icl_soft_conn *isc; 1167 struct icl_conn *ic; 1168 1169 refcount_acquire(&icl_ncons); 1170 1171 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, 1172 M_WAITOK | M_ZERO); 1173 1174 STAILQ_INIT(&isc->to_send); 1175 cv_init(&isc->send_cv, "icl_tx"); 1176 cv_init(&isc->receive_cv, "icl_rx"); 1177 1178 ic = &isc->ic; 1179 ic->ic_lock = lock; 1180 #ifdef DIAGNOSTIC 1181 refcount_init(&ic->ic_outstanding_pdus, 0); 1182 #endif 1183 ic->ic_name = name; 1184 ic->ic_offload = "None"; 1185 ic->ic_unmapped = false; 1186 1187 return (ic); 1188 } 1189 1190 void 1191 icl_soft_conn_free(struct icl_conn *ic) 1192 { 1193 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1194 1195 #ifdef DIAGNOSTIC 1196 KASSERT(ic->ic_outstanding_pdus == 0, 1197 ("destroying session with %d outstanding PDUs", 1198 ic->ic_outstanding_pdus)); 1199 #endif 1200 cv_destroy(&isc->send_cv); 1201 cv_destroy(&isc->receive_cv); 1202 kobj_delete((struct kobj *)isc, M_ICL_SOFT); 1203 refcount_release(&icl_ncons); 1204 } 1205 1206 static int 1207 icl_conn_start(struct icl_conn *ic) 1208 { 1209 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1210 size_t minspace; 1211 struct sockopt opt; 1212 int error, one = 1; 1213 1214 ICL_CONN_LOCK(ic); 1215 1216 /* 1217 * XXX: Ugly hack. 1218 */ 1219 if (ic->ic_socket == NULL) { 1220 ICL_CONN_UNLOCK(ic); 1221 return (EINVAL); 1222 } 1223 1224 isc->receive_state = ICL_CONN_STATE_BHS; 1225 isc->receive_len = sizeof(struct iscsi_bhs); 1226 ic->ic_disconnecting = false; 1227 1228 ICL_CONN_UNLOCK(ic); 1229 1230 /* 1231 * For sendspace, this is required because the current code cannot 1232 * send a PDU in pieces; thus, the minimum buffer size is equal 1233 * to the maximum PDU size. "+4" is to account for possible padding. 1234 */ 1235 minspace = sizeof(struct iscsi_bhs) + 1236 ic->ic_max_send_data_segment_length + 1237 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1238 if (sendspace < minspace) { 1239 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1240 minspace); 1241 sendspace = minspace; 1242 } 1243 minspace = sizeof(struct iscsi_bhs) + 1244 ic->ic_max_recv_data_segment_length + 1245 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1246 if (recvspace < minspace) { 1247 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1248 minspace); 1249 recvspace = minspace; 1250 } 1251 1252 error = soreserve(ic->ic_socket, sendspace, recvspace); 1253 if (error != 0) { 1254 ICL_WARN("soreserve failed with error %d", error); 1255 icl_soft_conn_close(ic); 1256 return (error); 1257 } 1258 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1259 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1260 1261 /* 1262 * Disable Nagle. 1263 */ 1264 bzero(&opt, sizeof(opt)); 1265 opt.sopt_dir = SOPT_SET; 1266 opt.sopt_level = IPPROTO_TCP; 1267 opt.sopt_name = TCP_NODELAY; 1268 opt.sopt_val = &one; 1269 opt.sopt_valsize = sizeof(one); 1270 error = sosetopt(ic->ic_socket, &opt); 1271 if (error != 0) { 1272 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1273 icl_soft_conn_close(ic); 1274 return (error); 1275 } 1276 1277 /* 1278 * Register socket upcall, to get notified about incoming PDUs 1279 * and free space to send outgoing ones. 1280 */ 1281 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1282 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); 1283 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1284 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1285 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); 1286 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1287 1288 /* 1289 * Start threads. 1290 */ 1291 ICL_CONN_LOCK(ic); 1292 isc->send_running = isc->receive_running = true; 1293 ICL_CONN_UNLOCK(ic); 1294 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1295 ic->ic_name); 1296 if (error != 0) { 1297 ICL_WARN("kthread_add(9) failed with error %d", error); 1298 ICL_CONN_LOCK(ic); 1299 isc->send_running = isc->receive_running = false; 1300 cv_signal(&isc->send_cv); 1301 ICL_CONN_UNLOCK(ic); 1302 icl_soft_conn_close(ic); 1303 return (error); 1304 } 1305 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1306 ic->ic_name); 1307 if (error != 0) { 1308 ICL_WARN("kthread_add(9) failed with error %d", error); 1309 ICL_CONN_LOCK(ic); 1310 isc->receive_running = false; 1311 cv_signal(&isc->send_cv); 1312 ICL_CONN_UNLOCK(ic); 1313 icl_soft_conn_close(ic); 1314 return (error); 1315 } 1316 1317 return (0); 1318 } 1319 1320 int 1321 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1322 { 1323 struct file *fp; 1324 struct socket *so; 1325 cap_rights_t rights; 1326 int error; 1327 1328 ICL_CONN_LOCK_ASSERT_NOT(ic); 1329 1330 #ifdef ICL_KERNEL_PROXY 1331 /* 1332 * We're transitioning to Full Feature phase, and we don't 1333 * really care. 1334 */ 1335 if (fd == 0) { 1336 ICL_CONN_LOCK(ic); 1337 if (ic->ic_socket == NULL) { 1338 ICL_CONN_UNLOCK(ic); 1339 ICL_WARN("proxy handoff without connect"); 1340 return (EINVAL); 1341 } 1342 ICL_CONN_UNLOCK(ic); 1343 return (0); 1344 } 1345 #endif 1346 1347 /* 1348 * Steal the socket from userland. 1349 */ 1350 error = fget(curthread, fd, 1351 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1352 if (error != 0) 1353 return (error); 1354 if (fp->f_type != DTYPE_SOCKET) { 1355 fdrop(fp, curthread); 1356 return (EINVAL); 1357 } 1358 so = fp->f_data; 1359 if (so->so_type != SOCK_STREAM) { 1360 fdrop(fp, curthread); 1361 return (EINVAL); 1362 } 1363 1364 ICL_CONN_LOCK(ic); 1365 1366 if (ic->ic_socket != NULL) { 1367 ICL_CONN_UNLOCK(ic); 1368 fdrop(fp, curthread); 1369 return (EBUSY); 1370 } 1371 1372 ic->ic_socket = fp->f_data; 1373 fp->f_ops = &badfileops; 1374 fp->f_data = NULL; 1375 fdrop(fp, curthread); 1376 ICL_CONN_UNLOCK(ic); 1377 1378 error = icl_conn_start(ic); 1379 1380 return (error); 1381 } 1382 1383 void 1384 icl_soft_conn_close(struct icl_conn *ic) 1385 { 1386 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1387 struct icl_pdu *pdu; 1388 struct socket *so; 1389 1390 /* 1391 * Wake up the threads, so they can properly terminate. 1392 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. 1393 */ 1394 ICL_CONN_LOCK(ic); 1395 if (!ic->ic_disconnecting) { 1396 so = ic->ic_socket; 1397 if (so) 1398 SOCKBUF_LOCK(&so->so_rcv); 1399 ic->ic_disconnecting = true; 1400 if (so) 1401 SOCKBUF_UNLOCK(&so->so_rcv); 1402 } 1403 while (isc->receive_running || isc->send_running) { 1404 cv_signal(&isc->receive_cv); 1405 cv_signal(&isc->send_cv); 1406 cv_wait(&isc->send_cv, ic->ic_lock); 1407 } 1408 1409 /* Some other thread could close the connection same time. */ 1410 so = ic->ic_socket; 1411 if (so == NULL) { 1412 ICL_CONN_UNLOCK(ic); 1413 return; 1414 } 1415 ic->ic_socket = NULL; 1416 1417 /* 1418 * Deregister socket upcalls. 1419 */ 1420 ICL_CONN_UNLOCK(ic); 1421 SOCKBUF_LOCK(&so->so_snd); 1422 if (so->so_snd.sb_upcall != NULL) 1423 soupcall_clear(so, SO_SND); 1424 SOCKBUF_UNLOCK(&so->so_snd); 1425 SOCKBUF_LOCK(&so->so_rcv); 1426 if (so->so_rcv.sb_upcall != NULL) 1427 soupcall_clear(so, SO_RCV); 1428 SOCKBUF_UNLOCK(&so->so_rcv); 1429 soclose(so); 1430 ICL_CONN_LOCK(ic); 1431 1432 if (isc->receive_pdu != NULL) { 1433 //ICL_DEBUG("freeing partially received PDU"); 1434 icl_soft_conn_pdu_free(ic, isc->receive_pdu); 1435 isc->receive_pdu = NULL; 1436 } 1437 1438 /* 1439 * Remove any outstanding PDUs from the send queue. 1440 */ 1441 while (!STAILQ_EMPTY(&isc->to_send)) { 1442 pdu = STAILQ_FIRST(&isc->to_send); 1443 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); 1444 icl_soft_pdu_done(pdu, ENOTCONN); 1445 } 1446 1447 KASSERT(STAILQ_EMPTY(&isc->to_send), 1448 ("destroying session with non-empty send queue")); 1449 ICL_CONN_UNLOCK(ic); 1450 } 1451 1452 int 1453 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1454 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1455 { 1456 1457 return (0); 1458 } 1459 1460 void 1461 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1462 { 1463 } 1464 1465 int 1466 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1467 uint32_t *transfer_tag, void **prvp) 1468 { 1469 1470 return (0); 1471 } 1472 1473 void 1474 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1475 { 1476 } 1477 1478 static int 1479 icl_soft_limits(struct icl_drv_limits *idl) 1480 { 1481 1482 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1483 idl->idl_max_send_data_segment_length = max_data_segment_length; 1484 idl->idl_max_burst_length = max_burst_length; 1485 idl->idl_first_burst_length = first_burst_length; 1486 1487 return (0); 1488 } 1489 1490 #ifdef ICL_KERNEL_PROXY 1491 int 1492 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1493 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1494 { 1495 1496 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1497 from_sa, to_sa)); 1498 } 1499 1500 int 1501 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1502 { 1503 int error; 1504 1505 ICL_CONN_LOCK_ASSERT_NOT(ic); 1506 1507 if (so->so_type != SOCK_STREAM) 1508 return (EINVAL); 1509 1510 ICL_CONN_LOCK(ic); 1511 if (ic->ic_socket != NULL) { 1512 ICL_CONN_UNLOCK(ic); 1513 return (EBUSY); 1514 } 1515 ic->ic_socket = so; 1516 ICL_CONN_UNLOCK(ic); 1517 1518 error = icl_conn_start(ic); 1519 1520 return (error); 1521 } 1522 #endif /* ICL_KERNEL_PROXY */ 1523 1524 static int 1525 icl_soft_load(void) 1526 { 1527 int error; 1528 1529 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1530 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1531 UMA_ALIGN_PTR, 0); 1532 refcount_init(&icl_ncons, 0); 1533 1534 /* 1535 * The reason we call this "none" is that to the user, 1536 * it's known as "offload driver"; "offload driver: soft" 1537 * doesn't make much sense. 1538 */ 1539 error = icl_register("none", false, 0, 1540 icl_soft_limits, icl_soft_new_conn); 1541 KASSERT(error == 0, ("failed to register")); 1542 1543 #if defined(ICL_KERNEL_PROXY) && 0 1544 /* 1545 * Debugging aid for kernel proxy functionality. 1546 */ 1547 error = icl_register("proxytest", true, 0, 1548 icl_soft_limits, icl_soft_new_conn); 1549 KASSERT(error == 0, ("failed to register")); 1550 #endif 1551 1552 return (error); 1553 } 1554 1555 static int 1556 icl_soft_unload(void) 1557 { 1558 1559 if (icl_ncons != 0) 1560 return (EBUSY); 1561 1562 icl_unregister("none", false); 1563 #if defined(ICL_KERNEL_PROXY) && 0 1564 icl_unregister("proxytest", true); 1565 #endif 1566 1567 uma_zdestroy(icl_soft_pdu_zone); 1568 1569 return (0); 1570 } 1571 1572 static int 1573 icl_soft_modevent(module_t mod, int what, void *arg) 1574 { 1575 1576 switch (what) { 1577 case MOD_LOAD: 1578 return (icl_soft_load()); 1579 case MOD_UNLOAD: 1580 return (icl_soft_unload()); 1581 default: 1582 return (EINVAL); 1583 } 1584 } 1585 1586 moduledata_t icl_soft_data = { 1587 "icl_soft", 1588 icl_soft_modevent, 1589 0 1590 }; 1591 1592 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1593 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1594 MODULE_VERSION(icl_soft, 1); 1595