1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/bio.h> 39 #include <sys/capsicum.h> 40 #include <sys/condvar.h> 41 #include <sys/conf.h> 42 #include <sys/gsb_crc32.h> 43 #include <sys/file.h> 44 #include <sys/kernel.h> 45 #include <sys/kthread.h> 46 #include <sys/lock.h> 47 #include <sys/mbuf.h> 48 #include <sys/mutex.h> 49 #include <sys/module.h> 50 #include <sys/protosw.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/sysctl.h> 54 #include <sys/systm.h> 55 #include <sys/sx.h> 56 #include <sys/uio.h> 57 #include <vm/uma.h> 58 #include <vm/vm_page.h> 59 #include <netinet/in.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/iscsi/icl.h> 63 #include <dev/iscsi/iscsi_proto.h> 64 #include <icl_conn_if.h> 65 66 #define ICL_CONN_STATE_BHS 1 67 #define ICL_CONN_STATE_AHS 2 68 #define ICL_CONN_STATE_HEADER_DIGEST 3 69 #define ICL_CONN_STATE_DATA 4 70 #define ICL_CONN_STATE_DATA_DIGEST 5 71 72 struct icl_soft_conn { 73 struct icl_conn ic; 74 75 /* soft specific stuff goes here. */ 76 STAILQ_HEAD(, icl_pdu) to_send; 77 struct cv send_cv; 78 struct cv receive_cv; 79 struct icl_pdu *receive_pdu; 80 size_t receive_len; 81 int receive_state; 82 bool receive_running; 83 bool check_send_space; 84 bool send_running; 85 }; 86 87 struct icl_soft_pdu { 88 struct icl_pdu ip; 89 90 /* soft specific stuff goes here. */ 91 u_int ref_cnt; 92 icl_pdu_cb cb; 93 int error; 94 }; 95 96 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 97 "Software iSCSI"); 98 static int coalesce = 1; 99 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 100 &coalesce, 0, "Try to coalesce PDUs before sending"); 101 static int partial_receive_len = 256 * 1024; 102 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 103 &partial_receive_len, 0, "Minimum read size for partially received " 104 "data segment"); 105 static int max_data_segment_length = 256 * 1024; 106 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 107 &max_data_segment_length, 0, "Maximum data segment length"); 108 static int first_burst_length = 1024 * 1024; 109 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 110 &first_burst_length, 0, "First burst length"); 111 static int max_burst_length = 1024 * 1024; 112 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 113 &max_burst_length, 0, "Maximum burst length"); 114 static int sendspace = 1536 * 1024; 115 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 116 &sendspace, 0, "Default send socket buffer size"); 117 static int recvspace = 1536 * 1024; 118 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 119 &recvspace, 0, "Default receive socket buffer size"); 120 121 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 122 static uma_zone_t icl_soft_pdu_zone; 123 124 static volatile u_int icl_ncons; 125 126 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 127 128 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 129 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 130 static icl_conn_pdu_data_segment_length_t 131 icl_soft_conn_pdu_data_segment_length; 132 static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio; 133 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 134 static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio; 135 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 136 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 137 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 138 static icl_conn_handoff_t icl_soft_conn_handoff; 139 static icl_conn_free_t icl_soft_conn_free; 140 static icl_conn_close_t icl_soft_conn_close; 141 static icl_conn_task_setup_t icl_soft_conn_task_setup; 142 static icl_conn_task_done_t icl_soft_conn_task_done; 143 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 144 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 145 #ifdef ICL_KERNEL_PROXY 146 static icl_conn_connect_t icl_soft_conn_connect; 147 #endif 148 149 static kobj_method_t icl_soft_methods[] = { 150 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 151 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 152 KOBJMETHOD(icl_conn_pdu_data_segment_length, 153 icl_soft_conn_pdu_data_segment_length), 154 KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio), 155 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 156 KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio), 157 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 158 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 159 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 160 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 161 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 162 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 163 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 164 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 165 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 166 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 167 #ifdef ICL_KERNEL_PROXY 168 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 169 #endif 170 { 0, 0 } 171 }; 172 173 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); 174 175 static void 176 icl_conn_fail(struct icl_conn *ic) 177 { 178 if (ic->ic_socket == NULL) 179 return; 180 181 /* 182 * XXX 183 */ 184 ic->ic_socket->so_error = EDOOFUS; 185 (ic->ic_error)(ic); 186 } 187 188 static void 189 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 190 { 191 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 192 193 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 194 m_freem(ip->ip_bhs_mbuf); 195 m_freem(ip->ip_ahs_mbuf); 196 m_freem(ip->ip_data_mbuf); 197 uma_zfree(icl_soft_pdu_zone, isp); 198 #ifdef DIAGNOSTIC 199 refcount_release(&ic->ic_outstanding_pdus); 200 #endif 201 } 202 203 static void 204 icl_soft_pdu_call_cb(struct icl_pdu *ip) 205 { 206 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 207 208 if (isp->cb != NULL) 209 isp->cb(ip, isp->error); 210 #ifdef DIAGNOSTIC 211 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 212 #endif 213 uma_zfree(icl_soft_pdu_zone, isp); 214 } 215 216 static void 217 icl_soft_pdu_done(struct icl_pdu *ip, int error) 218 { 219 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 220 221 if (error != 0) 222 isp->error = error; 223 224 m_freem(ip->ip_bhs_mbuf); 225 ip->ip_bhs_mbuf = NULL; 226 m_freem(ip->ip_ahs_mbuf); 227 ip->ip_ahs_mbuf = NULL; 228 m_freem(ip->ip_data_mbuf); 229 ip->ip_data_mbuf = NULL; 230 231 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 232 icl_soft_pdu_call_cb(ip); 233 } 234 235 static void 236 icl_soft_mbuf_done(struct mbuf *mb) 237 { 238 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 239 240 icl_soft_pdu_call_cb(&isp->ip); 241 } 242 243 /* 244 * Allocate icl_pdu with empty BHS to fill up by the caller. 245 */ 246 struct icl_pdu * 247 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 248 { 249 struct icl_soft_pdu *isp; 250 struct icl_pdu *ip; 251 252 #ifdef DIAGNOSTIC 253 refcount_acquire(&ic->ic_outstanding_pdus); 254 #endif 255 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 256 if (isp == NULL) { 257 ICL_WARN("failed to allocate soft PDU"); 258 #ifdef DIAGNOSTIC 259 refcount_release(&ic->ic_outstanding_pdus); 260 #endif 261 return (NULL); 262 } 263 ip = &isp->ip; 264 ip->ip_conn = ic; 265 266 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 267 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 268 if (ip->ip_bhs_mbuf == NULL) { 269 ICL_WARN("failed to allocate BHS mbuf"); 270 icl_soft_conn_pdu_free(ic, ip); 271 return (NULL); 272 } 273 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 274 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 275 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 276 277 return (ip); 278 } 279 280 static int 281 icl_pdu_ahs_length(const struct icl_pdu *request) 282 { 283 284 return (request->ip_bhs->bhs_total_ahs_len * 4); 285 } 286 287 static size_t 288 icl_pdu_data_segment_length(const struct icl_pdu *request) 289 { 290 uint32_t len = 0; 291 292 len += request->ip_bhs->bhs_data_segment_len[0]; 293 len <<= 8; 294 len += request->ip_bhs->bhs_data_segment_len[1]; 295 len <<= 8; 296 len += request->ip_bhs->bhs_data_segment_len[2]; 297 298 return (len); 299 } 300 301 size_t 302 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 303 const struct icl_pdu *request) 304 { 305 306 return (icl_pdu_data_segment_length(request)); 307 } 308 309 static void 310 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 311 { 312 313 response->ip_bhs->bhs_data_segment_len[2] = len; 314 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 315 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 316 } 317 318 static size_t 319 icl_pdu_padding(const struct icl_pdu *ip) 320 { 321 322 if ((ip->ip_data_len % 4) != 0) 323 return (4 - (ip->ip_data_len % 4)); 324 325 return (0); 326 } 327 328 static size_t 329 icl_pdu_size(const struct icl_pdu *response) 330 { 331 size_t len; 332 333 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 334 335 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 336 icl_pdu_padding(response); 337 if (response->ip_conn->ic_header_crc32c) 338 len += ISCSI_HEADER_DIGEST_SIZE; 339 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 340 len += ISCSI_DATA_DIGEST_SIZE; 341 342 return (len); 343 } 344 345 static void 346 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) 347 { 348 349 m_copydata(*r, 0, s, buf); 350 m_adj(*r, s); 351 while ((*r) != NULL && (*r)->m_len == 0) 352 *r = m_free(*r); 353 *rs -= s; 354 } 355 356 static void 357 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) 358 { 359 360 request->ip_ahs_len = icl_pdu_ahs_length(request); 361 if (request->ip_ahs_len == 0) 362 return; 363 364 request->ip_ahs_mbuf = *r; 365 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); 366 *rs -= request->ip_ahs_len; 367 } 368 369 static int 370 mbuf_crc32c_helper(void *arg, void *data, u_int len) 371 { 372 uint32_t *digestp = arg; 373 374 *digestp = calculate_crc32c(*digestp, data, len); 375 return (0); 376 } 377 378 static uint32_t 379 icl_mbuf_to_crc32c(struct mbuf *m0, size_t len) 380 { 381 uint32_t digest = 0xffffffff; 382 383 m_apply(m0, 0, len, mbuf_crc32c_helper, &digest); 384 digest = digest ^ 0xffffffff; 385 386 return (digest); 387 } 388 389 static int 390 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 391 { 392 uint32_t received_digest, valid_digest; 393 394 if (request->ip_conn->ic_header_crc32c == false) 395 return (0); 396 397 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 398 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); 399 400 /* Temporary attach AHS to BHS to calculate header digest. */ 401 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 402 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); 403 request->ip_bhs_mbuf->m_next = NULL; 404 if (received_digest != valid_digest) { 405 ICL_WARN("header digest check failed; got 0x%x, " 406 "should be 0x%x", received_digest, valid_digest); 407 return (-1); 408 } 409 410 return (0); 411 } 412 413 /* 414 * Return the number of bytes that should be waiting in the receive socket 415 * before icl_pdu_receive_data_segment() gets called. 416 */ 417 static size_t 418 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 419 { 420 size_t len; 421 422 len = icl_pdu_data_segment_length(request); 423 if (len == 0) 424 return (0); 425 426 /* 427 * Account for the parts of data segment already read from 428 * the socket buffer. 429 */ 430 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 431 len -= request->ip_data_len; 432 433 /* 434 * Don't always wait for the full data segment to be delivered 435 * to the socket; this might badly affect performance due to 436 * TCP window scaling. 437 */ 438 if (len > partial_receive_len) { 439 #if 0 440 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 441 len, partial_receive_len)); 442 #endif 443 len = partial_receive_len; 444 445 return (len); 446 } 447 448 /* 449 * Account for padding. Note that due to the way code is written, 450 * the icl_pdu_receive_data_segment() must always receive padding 451 * along with the last part of data segment, because it would be 452 * impossible to tell whether we've already received the full data 453 * segment including padding, or without it. 454 */ 455 if ((len % 4) != 0) 456 len += 4 - (len % 4); 457 458 #if 0 459 ICL_DEBUG("need %zd bytes of data", len)); 460 #endif 461 462 return (len); 463 } 464 465 static int 466 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, 467 size_t *rs, bool *more_neededp) 468 { 469 struct icl_soft_conn *isc; 470 size_t len, padding = 0; 471 struct mbuf *m; 472 473 isc = (struct icl_soft_conn *)request->ip_conn; 474 475 *more_neededp = false; 476 isc->receive_len = 0; 477 478 len = icl_pdu_data_segment_length(request); 479 if (len == 0) 480 return (0); 481 482 if ((len % 4) != 0) 483 padding = 4 - (len % 4); 484 485 /* 486 * Account for already received parts of data segment. 487 */ 488 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 489 len -= request->ip_data_len; 490 491 if (len + padding > *rs) { 492 /* 493 * Not enough data in the socket buffer. Receive as much 494 * as we can. Don't receive padding, since, obviously, it's 495 * not the end of data segment yet. 496 */ 497 #if 0 498 ICL_DEBUG("limited from %zd to %zd", 499 len + padding, *rs - padding)); 500 #endif 501 len = *rs - padding; 502 *more_neededp = true; 503 padding = 0; 504 } 505 506 /* 507 * Must not try to receive padding without at least one byte 508 * of actual data segment. 509 */ 510 if (len > 0) { 511 m = *r; 512 *r = m_split(m, len + padding, M_WAITOK); 513 *rs -= len + padding; 514 515 if (request->ip_data_mbuf == NULL) 516 request->ip_data_mbuf = m; 517 else 518 m_cat(request->ip_data_mbuf, m); 519 520 request->ip_data_len += len; 521 } else 522 ICL_DEBUG("len 0"); 523 524 if (*more_neededp) 525 isc->receive_len = icl_pdu_data_segment_receive_len(request); 526 527 return (0); 528 } 529 530 static int 531 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 532 { 533 uint32_t received_digest, valid_digest; 534 535 if (request->ip_conn->ic_data_crc32c == false) 536 return (0); 537 538 if (request->ip_data_len == 0) 539 return (0); 540 541 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 542 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); 543 544 /* 545 * Note that ip_data_mbuf also contains padding; since digest 546 * calculation is supposed to include that, we iterate over 547 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 548 */ 549 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 550 roundup2(request->ip_data_len, 4)); 551 if (received_digest != valid_digest) { 552 ICL_WARN("data digest check failed; got 0x%x, " 553 "should be 0x%x", received_digest, valid_digest); 554 return (-1); 555 } 556 557 return (0); 558 } 559 560 /* 561 * Somewhat contrary to the name, this attempts to receive only one 562 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 563 */ 564 static struct icl_pdu * 565 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 566 { 567 struct icl_conn *ic = &isc->ic; 568 struct icl_pdu *request; 569 size_t len; 570 int error = 0; 571 bool more_needed; 572 573 if (isc->receive_state == ICL_CONN_STATE_BHS) { 574 KASSERT(isc->receive_pdu == NULL, 575 ("isc->receive_pdu != NULL")); 576 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 577 if (request == NULL) { 578 ICL_DEBUG("failed to allocate PDU; " 579 "dropping connection"); 580 icl_conn_fail(ic); 581 return (NULL); 582 } 583 isc->receive_pdu = request; 584 } else { 585 KASSERT(isc->receive_pdu != NULL, 586 ("isc->receive_pdu == NULL")); 587 request = isc->receive_pdu; 588 } 589 590 switch (isc->receive_state) { 591 case ICL_CONN_STATE_BHS: 592 //ICL_DEBUG("receiving BHS"); 593 icl_soft_receive_buf(r, rs, request->ip_bhs, 594 sizeof(struct iscsi_bhs)); 595 596 /* 597 * We don't enforce any limit for AHS length; 598 * its length is stored in 8 bit field. 599 */ 600 601 len = icl_pdu_data_segment_length(request); 602 if (len > ic->ic_max_recv_data_segment_length) { 603 ICL_WARN("received data segment " 604 "length %zd is larger than negotiated; " 605 "dropping connection", len); 606 error = EINVAL; 607 break; 608 } 609 610 isc->receive_state = ICL_CONN_STATE_AHS; 611 isc->receive_len = icl_pdu_ahs_length(request); 612 break; 613 614 case ICL_CONN_STATE_AHS: 615 //ICL_DEBUG("receiving AHS"); 616 icl_pdu_receive_ahs(request, r, rs); 617 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; 618 if (ic->ic_header_crc32c == false) 619 isc->receive_len = 0; 620 else 621 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; 622 break; 623 624 case ICL_CONN_STATE_HEADER_DIGEST: 625 //ICL_DEBUG("receiving header digest"); 626 error = icl_pdu_check_header_digest(request, r, rs); 627 if (error != 0) { 628 ICL_DEBUG("header digest failed; " 629 "dropping connection"); 630 break; 631 } 632 633 isc->receive_state = ICL_CONN_STATE_DATA; 634 isc->receive_len = icl_pdu_data_segment_receive_len(request); 635 break; 636 637 case ICL_CONN_STATE_DATA: 638 //ICL_DEBUG("receiving data segment"); 639 error = icl_pdu_receive_data_segment(request, r, rs, 640 &more_needed); 641 if (error != 0) { 642 ICL_DEBUG("failed to receive data segment;" 643 "dropping connection"); 644 break; 645 } 646 647 if (more_needed) 648 break; 649 650 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; 651 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 652 isc->receive_len = 0; 653 else 654 isc->receive_len = ISCSI_DATA_DIGEST_SIZE; 655 break; 656 657 case ICL_CONN_STATE_DATA_DIGEST: 658 //ICL_DEBUG("receiving data digest"); 659 error = icl_pdu_check_data_digest(request, r, rs); 660 if (error != 0) { 661 ICL_DEBUG("data digest failed; " 662 "dropping connection"); 663 break; 664 } 665 666 /* 667 * We've received complete PDU; reset the receive state machine 668 * and return the PDU. 669 */ 670 isc->receive_state = ICL_CONN_STATE_BHS; 671 isc->receive_len = sizeof(struct iscsi_bhs); 672 isc->receive_pdu = NULL; 673 return (request); 674 675 default: 676 panic("invalid receive_state %d\n", isc->receive_state); 677 } 678 679 if (error != 0) { 680 /* 681 * Don't free the PDU; it's pointed to by isc->receive_pdu 682 * and will get freed in icl_soft_conn_close(). 683 */ 684 icl_conn_fail(ic); 685 } 686 687 return (NULL); 688 } 689 690 static void 691 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 692 { 693 struct icl_conn *ic = &isc->ic; 694 struct icl_pdu *response; 695 696 for (;;) { 697 if (ic->ic_disconnecting) 698 return; 699 700 /* 701 * Loop until we have a complete PDU or there is not enough 702 * data in the socket buffer. 703 */ 704 if (*rs < isc->receive_len) { 705 #if 0 706 ICL_DEBUG("not enough data; have %zd, need %zd", 707 *rs, isc->receive_len); 708 #endif 709 return; 710 } 711 712 response = icl_conn_receive_pdu(isc, r, rs); 713 if (response == NULL) 714 continue; 715 716 if (response->ip_ahs_len > 0) { 717 ICL_WARN("received PDU with unsupported " 718 "AHS; opcode 0x%x; dropping connection", 719 response->ip_bhs->bhs_opcode); 720 icl_soft_conn_pdu_free(ic, response); 721 icl_conn_fail(ic); 722 return; 723 } 724 725 (ic->ic_receive)(response); 726 } 727 } 728 729 static void 730 icl_receive_thread(void *arg) 731 { 732 struct icl_soft_conn *isc = arg; 733 struct icl_conn *ic = &isc->ic; 734 size_t available, read = 0; 735 struct socket *so; 736 struct mbuf *m, *r = NULL; 737 struct uio uio; 738 int error, flags; 739 740 so = ic->ic_socket; 741 742 for (;;) { 743 SOCKBUF_LOCK(&so->so_rcv); 744 if (ic->ic_disconnecting) { 745 SOCKBUF_UNLOCK(&so->so_rcv); 746 break; 747 } 748 749 /* 750 * Set the low watermark, to be checked by 751 * soreadable() in icl_soupcall_receive() 752 * to avoid unnecessary wakeups until there 753 * is enough data received to read the PDU. 754 */ 755 available = sbavail(&so->so_rcv); 756 if (read + available < isc->receive_len) { 757 so->so_rcv.sb_lowat = isc->receive_len - read; 758 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); 759 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 760 available = sbavail(&so->so_rcv); 761 } 762 SOCKBUF_UNLOCK(&so->so_rcv); 763 764 if (available == 0) { 765 if (so->so_error != 0) { 766 ICL_DEBUG("connection error %d; " 767 "dropping connection", so->so_error); 768 icl_conn_fail(ic); 769 break; 770 } 771 continue; 772 } 773 774 memset(&uio, 0, sizeof(uio)); 775 uio.uio_resid = available; 776 flags = MSG_DONTWAIT; 777 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 778 if (error != 0) { 779 ICL_DEBUG("soreceive error %d", error); 780 break; 781 } 782 if (uio.uio_resid != 0) { 783 m_freem(m); 784 ICL_DEBUG("short read"); 785 break; 786 } 787 if (r) 788 m_cat(r, m); 789 else 790 r = m; 791 read += available; 792 793 icl_conn_receive_pdus(isc, &r, &read); 794 } 795 796 if (r) 797 m_freem(r); 798 799 ICL_CONN_LOCK(ic); 800 isc->receive_running = false; 801 cv_signal(&isc->send_cv); 802 ICL_CONN_UNLOCK(ic); 803 kthread_exit(); 804 } 805 806 static int 807 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 808 { 809 struct icl_soft_conn *isc; 810 811 if (!soreadable(so)) 812 return (SU_OK); 813 814 isc = arg; 815 cv_signal(&isc->receive_cv); 816 return (SU_OK); 817 } 818 819 static int 820 icl_pdu_finalize(struct icl_pdu *request) 821 { 822 size_t padding, pdu_len; 823 uint32_t digest, zero = 0; 824 int ok; 825 struct icl_conn *ic; 826 827 ic = request->ip_conn; 828 829 icl_pdu_set_data_segment_length(request, request->ip_data_len); 830 831 pdu_len = icl_pdu_size(request); 832 833 if (ic->ic_header_crc32c) { 834 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, 835 ISCSI_BHS_SIZE); 836 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 837 (void *)&digest); 838 if (ok != 1) { 839 ICL_WARN("failed to append header digest"); 840 return (1); 841 } 842 } 843 844 if (request->ip_data_len != 0) { 845 padding = icl_pdu_padding(request); 846 if (padding > 0) { 847 ok = m_append(request->ip_data_mbuf, padding, 848 (void *)&zero); 849 if (ok != 1) { 850 ICL_WARN("failed to append padding"); 851 return (1); 852 } 853 } 854 855 if (ic->ic_data_crc32c) { 856 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 857 roundup2(request->ip_data_len, 4)); 858 859 ok = m_append(request->ip_data_mbuf, sizeof(digest), 860 (void *)&digest); 861 if (ok != 1) { 862 ICL_WARN("failed to append data digest"); 863 return (1); 864 } 865 } 866 867 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 868 request->ip_data_mbuf = NULL; 869 } 870 871 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 872 873 return (0); 874 } 875 876 static void 877 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) 878 { 879 struct icl_conn *ic = &isc->ic; 880 struct icl_pdu *request, *request2; 881 struct mbuf *m; 882 struct socket *so; 883 long available, size, size2; 884 #ifdef DEBUG_COALESCED 885 int coalesced; 886 #endif 887 int error; 888 889 ICL_CONN_LOCK_ASSERT_NOT(ic); 890 891 so = ic->ic_socket; 892 893 SOCKBUF_LOCK(&so->so_snd); 894 /* 895 * Check how much space do we have for transmit. We can't just 896 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 897 * as it always frees the mbuf chain passed to it, even in case 898 * of error. 899 */ 900 available = sbspace(&so->so_snd); 901 isc->check_send_space = false; 902 903 /* 904 * Notify the socket upcall that we don't need wakeups 905 * for the time being. 906 */ 907 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 908 SOCKBUF_UNLOCK(&so->so_snd); 909 910 while (!STAILQ_EMPTY(queue)) { 911 request = STAILQ_FIRST(queue); 912 size = icl_pdu_size(request); 913 if (available < size) { 914 /* 915 * Set the low watermark, to be checked by 916 * sowriteable() in icl_soupcall_send() 917 * to avoid unnecessary wakeups until there 918 * is enough space for the PDU to fit. 919 */ 920 SOCKBUF_LOCK(&so->so_snd); 921 available = sbspace(&so->so_snd); 922 if (available < size) { 923 #if 1 924 ICL_DEBUG("no space to send; " 925 "have %ld, need %ld", 926 available, size); 927 #endif 928 so->so_snd.sb_lowat = max(size, 929 so->so_snd.sb_hiwat / 8); 930 SOCKBUF_UNLOCK(&so->so_snd); 931 return; 932 } 933 SOCKBUF_UNLOCK(&so->so_snd); 934 } 935 STAILQ_REMOVE_HEAD(queue, ip_next); 936 error = icl_pdu_finalize(request); 937 if (error != 0) { 938 ICL_DEBUG("failed to finalize PDU; " 939 "dropping connection"); 940 icl_soft_pdu_done(request, EIO); 941 icl_conn_fail(ic); 942 return; 943 } 944 if (coalesce) { 945 m = request->ip_bhs_mbuf; 946 for ( 947 #ifdef DEBUG_COALESCED 948 coalesced = 1 949 #endif 950 ; ; 951 #ifdef DEBUG_COALESCED 952 coalesced++ 953 #endif 954 ) { 955 request2 = STAILQ_FIRST(queue); 956 if (request2 == NULL) 957 break; 958 size2 = icl_pdu_size(request2); 959 if (available < size + size2) 960 break; 961 STAILQ_REMOVE_HEAD(queue, ip_next); 962 error = icl_pdu_finalize(request2); 963 if (error != 0) { 964 ICL_DEBUG("failed to finalize PDU; " 965 "dropping connection"); 966 icl_soft_pdu_done(request, EIO); 967 icl_soft_pdu_done(request2, EIO); 968 icl_conn_fail(ic); 969 return; 970 } 971 while (m->m_next) 972 m = m->m_next; 973 m_cat(m, request2->ip_bhs_mbuf); 974 request2->ip_bhs_mbuf = NULL; 975 request->ip_bhs_mbuf->m_pkthdr.len += size2; 976 size += size2; 977 icl_soft_pdu_done(request2, 0); 978 } 979 #ifdef DEBUG_COALESCED 980 if (coalesced > 1) { 981 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 982 coalesced, size); 983 } 984 #endif 985 } 986 available -= size; 987 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 988 NULL, MSG_DONTWAIT, curthread); 989 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 990 if (error != 0) { 991 ICL_DEBUG("failed to send PDU, error %d; " 992 "dropping connection", error); 993 icl_soft_pdu_done(request, error); 994 icl_conn_fail(ic); 995 return; 996 } 997 icl_soft_pdu_done(request, 0); 998 } 999 } 1000 1001 static void 1002 icl_send_thread(void *arg) 1003 { 1004 struct icl_soft_conn *isc; 1005 struct icl_conn *ic; 1006 struct icl_pdu_stailq queue; 1007 1008 isc = arg; 1009 ic = &isc->ic; 1010 1011 STAILQ_INIT(&queue); 1012 1013 ICL_CONN_LOCK(ic); 1014 for (;;) { 1015 for (;;) { 1016 /* 1017 * Populate the local queue from the main one. 1018 * This way the icl_conn_send_pdus() can go through 1019 * all the queued PDUs without holding any locks. 1020 */ 1021 if (STAILQ_EMPTY(&queue) || isc->check_send_space) 1022 STAILQ_CONCAT(&queue, &isc->to_send); 1023 1024 ICL_CONN_UNLOCK(ic); 1025 icl_conn_send_pdus(isc, &queue); 1026 ICL_CONN_LOCK(ic); 1027 1028 /* 1029 * The icl_soupcall_send() was called since the last 1030 * call to sbspace(); go around; 1031 */ 1032 if (isc->check_send_space) 1033 continue; 1034 1035 /* 1036 * Local queue is empty, but we still have PDUs 1037 * in the main one; go around. 1038 */ 1039 if (STAILQ_EMPTY(&queue) && 1040 !STAILQ_EMPTY(&isc->to_send)) 1041 continue; 1042 1043 /* 1044 * There might be some stuff in the local queue, 1045 * which didn't get sent due to not having enough send 1046 * space. Wait for socket upcall. 1047 */ 1048 break; 1049 } 1050 1051 if (ic->ic_disconnecting) { 1052 //ICL_DEBUG("terminating"); 1053 break; 1054 } 1055 1056 cv_wait(&isc->send_cv, ic->ic_lock); 1057 } 1058 1059 /* 1060 * We're exiting; move PDUs back to the main queue, so they can 1061 * get freed properly. At this point ordering doesn't matter. 1062 */ 1063 STAILQ_CONCAT(&isc->to_send, &queue); 1064 1065 isc->send_running = false; 1066 cv_signal(&isc->send_cv); 1067 ICL_CONN_UNLOCK(ic); 1068 kthread_exit(); 1069 } 1070 1071 static int 1072 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1073 { 1074 struct icl_soft_conn *isc; 1075 struct icl_conn *ic; 1076 1077 if (!sowriteable(so)) 1078 return (SU_OK); 1079 1080 isc = arg; 1081 ic = &isc->ic; 1082 1083 ICL_CONN_LOCK(ic); 1084 isc->check_send_space = true; 1085 ICL_CONN_UNLOCK(ic); 1086 1087 cv_signal(&isc->send_cv); 1088 1089 return (SU_OK); 1090 } 1091 1092 static void 1093 icl_soft_free_mext_pg(struct mbuf *m) 1094 { 1095 struct icl_soft_pdu *isp; 1096 1097 M_ASSERTEXTPG(m); 1098 1099 /* 1100 * Nothing to do for the pages; they are owned by the PDU / 1101 * I/O request. 1102 */ 1103 1104 /* Drop reference on the PDU. */ 1105 isp = m->m_ext.ext_arg1; 1106 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 1107 icl_soft_pdu_call_cb(&isp->ip); 1108 } 1109 1110 static int 1111 icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 1112 struct bio *bp, size_t offset, size_t len, int flags) 1113 { 1114 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1115 struct mbuf *m, *m_tail; 1116 vm_offset_t vaddr; 1117 size_t mtodo, page_offset, todo; 1118 int i; 1119 1120 KASSERT(len > 0, ("len == 0")); 1121 1122 m_tail = request->ip_data_mbuf; 1123 if (m_tail != NULL) 1124 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 1125 ; 1126 1127 MPASS(bp->bio_flags & BIO_UNMAPPED); 1128 if (offset < PAGE_SIZE - bp->bio_ma_offset) { 1129 page_offset = bp->bio_ma_offset + offset; 1130 i = 0; 1131 } else { 1132 offset -= PAGE_SIZE - bp->bio_ma_offset; 1133 for (i = 1; offset >= PAGE_SIZE; i++) 1134 offset -= PAGE_SIZE; 1135 page_offset = offset; 1136 } 1137 1138 if (flags & ICL_NOCOPY) { 1139 m = NULL; 1140 while (len > 0) { 1141 if (m == NULL) { 1142 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, 1143 icl_soft_free_mext_pg); 1144 if (__predict_false(m == NULL)) 1145 return (ENOMEM); 1146 atomic_add_int(&isp->ref_cnt, 1); 1147 m->m_ext.ext_arg1 = isp; 1148 m->m_epg_1st_off = page_offset; 1149 } 1150 1151 todo = MIN(len, PAGE_SIZE - page_offset); 1152 1153 m->m_epg_pa[m->m_epg_npgs] = 1154 VM_PAGE_TO_PHYS(bp->bio_ma[i]); 1155 m->m_epg_npgs++; 1156 m->m_epg_last_len = todo; 1157 m->m_len += todo; 1158 m->m_ext.ext_size += PAGE_SIZE; 1159 MBUF_EXT_PGS_ASSERT_SANITY(m); 1160 1161 if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { 1162 if (m_tail != NULL) 1163 m_tail->m_next = m; 1164 else 1165 request->ip_data_mbuf = m; 1166 m_tail = m; 1167 request->ip_data_len += m->m_len; 1168 m = NULL; 1169 } 1170 1171 page_offset = 0; 1172 len -= todo; 1173 i++; 1174 } 1175 1176 if (m != NULL) { 1177 if (m_tail != NULL) 1178 m_tail->m_next = m; 1179 else 1180 request->ip_data_mbuf = m; 1181 request->ip_data_len += m->m_len; 1182 } 1183 return (0); 1184 } 1185 1186 m = m_getm2(NULL, len, flags, MT_DATA, 0); 1187 if (__predict_false(m == NULL)) 1188 return (ENOMEM); 1189 1190 if (request->ip_data_mbuf == NULL) { 1191 request->ip_data_mbuf = m; 1192 request->ip_data_len = len; 1193 } else { 1194 m_tail->m_next = m; 1195 request->ip_data_len += len; 1196 } 1197 1198 while (len > 0) { 1199 todo = MIN(len, PAGE_SIZE - page_offset); 1200 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1201 1202 do { 1203 mtodo = min(todo, M_SIZE(m) - m->m_len); 1204 memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + 1205 page_offset, mtodo); 1206 m->m_len += mtodo; 1207 if (m->m_len == M_SIZE(m)) 1208 m = m->m_next; 1209 page_offset += mtodo; 1210 todo -= mtodo; 1211 } while (todo > 0); 1212 1213 page_offset = 0; 1214 len -= todo; 1215 i++; 1216 } 1217 1218 return (0); 1219 } 1220 1221 static int 1222 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1223 const void *addr, size_t len, int flags) 1224 { 1225 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1226 struct mbuf *mb, *newmb; 1227 size_t copylen, off = 0; 1228 1229 KASSERT(len > 0, ("len == 0")); 1230 1231 if (flags & ICL_NOCOPY) { 1232 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1233 if (newmb == NULL) { 1234 ICL_WARN("failed to allocate mbuf"); 1235 return (ENOMEM); 1236 } 1237 1238 newmb->m_flags |= M_RDONLY; 1239 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1240 icl_soft_mbuf_done, isp, NULL); 1241 newmb->m_len = len; 1242 } else { 1243 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1244 if (newmb == NULL) { 1245 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1246 return (ENOMEM); 1247 } 1248 1249 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1250 copylen = min(M_TRAILINGSPACE(mb), len - off); 1251 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1252 mb->m_len = copylen; 1253 off += copylen; 1254 } 1255 KASSERT(off == len, ("%s: off != len", __func__)); 1256 } 1257 1258 if (request->ip_data_mbuf == NULL) { 1259 request->ip_data_mbuf = newmb; 1260 request->ip_data_len = len; 1261 } else { 1262 m_cat(request->ip_data_mbuf, newmb); 1263 request->ip_data_len += len; 1264 } 1265 1266 return (0); 1267 } 1268 1269 void 1270 icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 1271 size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) 1272 { 1273 vm_offset_t vaddr; 1274 size_t page_offset, todo; 1275 int i __unused; 1276 1277 MPASS(bp->bio_flags & BIO_UNMAPPED); 1278 if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { 1279 page_offset = bp->bio_ma_offset + bio_off; 1280 i = 0; 1281 } else { 1282 bio_off -= PAGE_SIZE - bp->bio_ma_offset; 1283 for (i = 1; bio_off >= PAGE_SIZE; i++) 1284 bio_off -= PAGE_SIZE; 1285 page_offset = bio_off; 1286 } 1287 1288 while (len > 0) { 1289 todo = MIN(len, PAGE_SIZE - page_offset); 1290 1291 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1292 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + 1293 page_offset); 1294 1295 page_offset = 0; 1296 pdu_off += todo; 1297 len -= todo; 1298 i++; 1299 } 1300 } 1301 1302 void 1303 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1304 size_t off, void *addr, size_t len) 1305 { 1306 1307 m_copydata(ip->ip_data_mbuf, off, len, addr); 1308 } 1309 1310 static void 1311 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1312 { 1313 1314 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1315 } 1316 1317 static void 1318 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1319 icl_pdu_cb cb) 1320 { 1321 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1322 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1323 1324 ICL_CONN_LOCK_ASSERT(ic); 1325 isp->ref_cnt++; 1326 isp->cb = cb; 1327 1328 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1329 ICL_DEBUG("icl_pdu_queue on closed connection"); 1330 icl_soft_pdu_done(ip, ENOTCONN); 1331 return; 1332 } 1333 1334 if (!STAILQ_EMPTY(&isc->to_send)) { 1335 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1336 /* 1337 * If the queue is not empty, someone else had already 1338 * signaled the send thread; no need to do that again, 1339 * just return. 1340 */ 1341 return; 1342 } 1343 1344 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1345 cv_signal(&isc->send_cv); 1346 } 1347 1348 static struct icl_conn * 1349 icl_soft_new_conn(const char *name, struct mtx *lock) 1350 { 1351 struct icl_soft_conn *isc; 1352 struct icl_conn *ic; 1353 1354 refcount_acquire(&icl_ncons); 1355 1356 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, 1357 M_WAITOK | M_ZERO); 1358 1359 STAILQ_INIT(&isc->to_send); 1360 cv_init(&isc->send_cv, "icl_tx"); 1361 cv_init(&isc->receive_cv, "icl_rx"); 1362 1363 ic = &isc->ic; 1364 ic->ic_lock = lock; 1365 #ifdef DIAGNOSTIC 1366 refcount_init(&ic->ic_outstanding_pdus, 0); 1367 #endif 1368 ic->ic_name = name; 1369 ic->ic_offload = "None"; 1370 ic->ic_unmapped = PMAP_HAS_DMAP; 1371 1372 return (ic); 1373 } 1374 1375 void 1376 icl_soft_conn_free(struct icl_conn *ic) 1377 { 1378 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1379 1380 #ifdef DIAGNOSTIC 1381 KASSERT(ic->ic_outstanding_pdus == 0, 1382 ("destroying session with %d outstanding PDUs", 1383 ic->ic_outstanding_pdus)); 1384 #endif 1385 cv_destroy(&isc->send_cv); 1386 cv_destroy(&isc->receive_cv); 1387 kobj_delete((struct kobj *)isc, M_ICL_SOFT); 1388 refcount_release(&icl_ncons); 1389 } 1390 1391 static int 1392 icl_conn_start(struct icl_conn *ic) 1393 { 1394 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1395 size_t minspace; 1396 struct sockopt opt; 1397 int error, one = 1; 1398 1399 ICL_CONN_LOCK(ic); 1400 1401 /* 1402 * XXX: Ugly hack. 1403 */ 1404 if (ic->ic_socket == NULL) { 1405 ICL_CONN_UNLOCK(ic); 1406 return (EINVAL); 1407 } 1408 1409 isc->receive_state = ICL_CONN_STATE_BHS; 1410 isc->receive_len = sizeof(struct iscsi_bhs); 1411 ic->ic_disconnecting = false; 1412 1413 ICL_CONN_UNLOCK(ic); 1414 1415 /* 1416 * For sendspace, this is required because the current code cannot 1417 * send a PDU in pieces; thus, the minimum buffer size is equal 1418 * to the maximum PDU size. "+4" is to account for possible padding. 1419 */ 1420 minspace = sizeof(struct iscsi_bhs) + 1421 ic->ic_max_send_data_segment_length + 1422 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1423 if (sendspace < minspace) { 1424 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1425 minspace); 1426 sendspace = minspace; 1427 } 1428 minspace = sizeof(struct iscsi_bhs) + 1429 ic->ic_max_recv_data_segment_length + 1430 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1431 if (recvspace < minspace) { 1432 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1433 minspace); 1434 recvspace = minspace; 1435 } 1436 1437 error = soreserve(ic->ic_socket, sendspace, recvspace); 1438 if (error != 0) { 1439 ICL_WARN("soreserve failed with error %d", error); 1440 icl_soft_conn_close(ic); 1441 return (error); 1442 } 1443 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1444 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1445 1446 /* 1447 * Disable Nagle. 1448 */ 1449 bzero(&opt, sizeof(opt)); 1450 opt.sopt_dir = SOPT_SET; 1451 opt.sopt_level = IPPROTO_TCP; 1452 opt.sopt_name = TCP_NODELAY; 1453 opt.sopt_val = &one; 1454 opt.sopt_valsize = sizeof(one); 1455 error = sosetopt(ic->ic_socket, &opt); 1456 if (error != 0) { 1457 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1458 icl_soft_conn_close(ic); 1459 return (error); 1460 } 1461 1462 /* 1463 * Register socket upcall, to get notified about incoming PDUs 1464 * and free space to send outgoing ones. 1465 */ 1466 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1467 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); 1468 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1469 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1470 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); 1471 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1472 1473 /* 1474 * Start threads. 1475 */ 1476 ICL_CONN_LOCK(ic); 1477 isc->send_running = isc->receive_running = true; 1478 ICL_CONN_UNLOCK(ic); 1479 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1480 ic->ic_name); 1481 if (error != 0) { 1482 ICL_WARN("kthread_add(9) failed with error %d", error); 1483 ICL_CONN_LOCK(ic); 1484 isc->send_running = isc->receive_running = false; 1485 cv_signal(&isc->send_cv); 1486 ICL_CONN_UNLOCK(ic); 1487 icl_soft_conn_close(ic); 1488 return (error); 1489 } 1490 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1491 ic->ic_name); 1492 if (error != 0) { 1493 ICL_WARN("kthread_add(9) failed with error %d", error); 1494 ICL_CONN_LOCK(ic); 1495 isc->receive_running = false; 1496 cv_signal(&isc->send_cv); 1497 ICL_CONN_UNLOCK(ic); 1498 icl_soft_conn_close(ic); 1499 return (error); 1500 } 1501 1502 return (0); 1503 } 1504 1505 int 1506 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1507 { 1508 struct file *fp; 1509 struct socket *so; 1510 cap_rights_t rights; 1511 int error; 1512 1513 ICL_CONN_LOCK_ASSERT_NOT(ic); 1514 1515 #ifdef ICL_KERNEL_PROXY 1516 /* 1517 * We're transitioning to Full Feature phase, and we don't 1518 * really care. 1519 */ 1520 if (fd == 0) { 1521 ICL_CONN_LOCK(ic); 1522 if (ic->ic_socket == NULL) { 1523 ICL_CONN_UNLOCK(ic); 1524 ICL_WARN("proxy handoff without connect"); 1525 return (EINVAL); 1526 } 1527 ICL_CONN_UNLOCK(ic); 1528 return (0); 1529 } 1530 #endif 1531 1532 /* 1533 * Steal the socket from userland. 1534 */ 1535 error = fget(curthread, fd, 1536 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1537 if (error != 0) 1538 return (error); 1539 if (fp->f_type != DTYPE_SOCKET) { 1540 fdrop(fp, curthread); 1541 return (EINVAL); 1542 } 1543 so = fp->f_data; 1544 if (so->so_type != SOCK_STREAM) { 1545 fdrop(fp, curthread); 1546 return (EINVAL); 1547 } 1548 1549 ICL_CONN_LOCK(ic); 1550 1551 if (ic->ic_socket != NULL) { 1552 ICL_CONN_UNLOCK(ic); 1553 fdrop(fp, curthread); 1554 return (EBUSY); 1555 } 1556 1557 ic->ic_socket = fp->f_data; 1558 fp->f_ops = &badfileops; 1559 fp->f_data = NULL; 1560 fdrop(fp, curthread); 1561 ICL_CONN_UNLOCK(ic); 1562 1563 error = icl_conn_start(ic); 1564 1565 return (error); 1566 } 1567 1568 void 1569 icl_soft_conn_close(struct icl_conn *ic) 1570 { 1571 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1572 struct icl_pdu *pdu; 1573 struct socket *so; 1574 1575 /* 1576 * Wake up the threads, so they can properly terminate. 1577 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. 1578 */ 1579 ICL_CONN_LOCK(ic); 1580 if (!ic->ic_disconnecting) { 1581 so = ic->ic_socket; 1582 if (so) 1583 SOCKBUF_LOCK(&so->so_rcv); 1584 ic->ic_disconnecting = true; 1585 if (so) 1586 SOCKBUF_UNLOCK(&so->so_rcv); 1587 } 1588 while (isc->receive_running || isc->send_running) { 1589 cv_signal(&isc->receive_cv); 1590 cv_signal(&isc->send_cv); 1591 cv_wait(&isc->send_cv, ic->ic_lock); 1592 } 1593 1594 /* Some other thread could close the connection same time. */ 1595 so = ic->ic_socket; 1596 if (so == NULL) { 1597 ICL_CONN_UNLOCK(ic); 1598 return; 1599 } 1600 ic->ic_socket = NULL; 1601 1602 /* 1603 * Deregister socket upcalls. 1604 */ 1605 ICL_CONN_UNLOCK(ic); 1606 SOCKBUF_LOCK(&so->so_snd); 1607 if (so->so_snd.sb_upcall != NULL) 1608 soupcall_clear(so, SO_SND); 1609 SOCKBUF_UNLOCK(&so->so_snd); 1610 SOCKBUF_LOCK(&so->so_rcv); 1611 if (so->so_rcv.sb_upcall != NULL) 1612 soupcall_clear(so, SO_RCV); 1613 SOCKBUF_UNLOCK(&so->so_rcv); 1614 soclose(so); 1615 ICL_CONN_LOCK(ic); 1616 1617 if (isc->receive_pdu != NULL) { 1618 //ICL_DEBUG("freeing partially received PDU"); 1619 icl_soft_conn_pdu_free(ic, isc->receive_pdu); 1620 isc->receive_pdu = NULL; 1621 } 1622 1623 /* 1624 * Remove any outstanding PDUs from the send queue. 1625 */ 1626 while (!STAILQ_EMPTY(&isc->to_send)) { 1627 pdu = STAILQ_FIRST(&isc->to_send); 1628 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); 1629 icl_soft_pdu_done(pdu, ENOTCONN); 1630 } 1631 1632 KASSERT(STAILQ_EMPTY(&isc->to_send), 1633 ("destroying session with non-empty send queue")); 1634 ICL_CONN_UNLOCK(ic); 1635 } 1636 1637 int 1638 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1639 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1640 { 1641 1642 return (0); 1643 } 1644 1645 void 1646 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1647 { 1648 } 1649 1650 int 1651 icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, 1652 union ctl_io *io, uint32_t *transfer_tag, void **prvp) 1653 { 1654 1655 return (0); 1656 } 1657 1658 void 1659 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1660 { 1661 } 1662 1663 static int 1664 icl_soft_limits(struct icl_drv_limits *idl, int socket) 1665 { 1666 1667 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1668 idl->idl_max_send_data_segment_length = max_data_segment_length; 1669 idl->idl_max_burst_length = max_burst_length; 1670 idl->idl_first_burst_length = first_burst_length; 1671 1672 return (0); 1673 } 1674 1675 #ifdef ICL_KERNEL_PROXY 1676 int 1677 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1678 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1679 { 1680 1681 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1682 from_sa, to_sa)); 1683 } 1684 1685 int 1686 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1687 { 1688 int error; 1689 1690 ICL_CONN_LOCK_ASSERT_NOT(ic); 1691 1692 if (so->so_type != SOCK_STREAM) 1693 return (EINVAL); 1694 1695 ICL_CONN_LOCK(ic); 1696 if (ic->ic_socket != NULL) { 1697 ICL_CONN_UNLOCK(ic); 1698 return (EBUSY); 1699 } 1700 ic->ic_socket = so; 1701 ICL_CONN_UNLOCK(ic); 1702 1703 error = icl_conn_start(ic); 1704 1705 return (error); 1706 } 1707 #endif /* ICL_KERNEL_PROXY */ 1708 1709 static int 1710 icl_soft_load(void) 1711 { 1712 int error; 1713 1714 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1715 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1716 UMA_ALIGN_PTR, 0); 1717 refcount_init(&icl_ncons, 0); 1718 1719 /* 1720 * The reason we call this "none" is that to the user, 1721 * it's known as "offload driver"; "offload driver: soft" 1722 * doesn't make much sense. 1723 */ 1724 error = icl_register("none", false, 0, 1725 icl_soft_limits, icl_soft_new_conn); 1726 KASSERT(error == 0, ("failed to register")); 1727 1728 #if defined(ICL_KERNEL_PROXY) && 0 1729 /* 1730 * Debugging aid for kernel proxy functionality. 1731 */ 1732 error = icl_register("proxytest", true, 0, 1733 icl_soft_limits, icl_soft_new_conn); 1734 KASSERT(error == 0, ("failed to register")); 1735 #endif 1736 1737 return (error); 1738 } 1739 1740 static int 1741 icl_soft_unload(void) 1742 { 1743 1744 if (icl_ncons != 0) 1745 return (EBUSY); 1746 1747 icl_unregister("none", false); 1748 #if defined(ICL_KERNEL_PROXY) && 0 1749 icl_unregister("proxytest", true); 1750 #endif 1751 1752 uma_zdestroy(icl_soft_pdu_zone); 1753 1754 return (0); 1755 } 1756 1757 static int 1758 icl_soft_modevent(module_t mod, int what, void *arg) 1759 { 1760 1761 switch (what) { 1762 case MOD_LOAD: 1763 return (icl_soft_load()); 1764 case MOD_UNLOAD: 1765 return (icl_soft_unload()); 1766 default: 1767 return (EINVAL); 1768 } 1769 } 1770 1771 moduledata_t icl_soft_data = { 1772 "icl_soft", 1773 icl_soft_modevent, 1774 0 1775 }; 1776 1777 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1778 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1779 MODULE_VERSION(icl_soft, 1); 1780