1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/bio.h> 41 #include <sys/capsicum.h> 42 #include <sys/condvar.h> 43 #include <sys/conf.h> 44 #include <sys/gsb_crc32.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/lock.h> 49 #include <sys/mbuf.h> 50 #include <sys/mutex.h> 51 #include <sys/module.h> 52 #include <sys/protosw.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sx.h> 58 #include <sys/uio.h> 59 #include <vm/uma.h> 60 #include <vm/vm_page.h> 61 #include <netinet/in.h> 62 #include <netinet/tcp.h> 63 64 #include <dev/iscsi/icl.h> 65 #include <dev/iscsi/iscsi_proto.h> 66 #include <icl_conn_if.h> 67 68 #define ICL_CONN_STATE_BHS 1 69 #define ICL_CONN_STATE_AHS 2 70 #define ICL_CONN_STATE_HEADER_DIGEST 3 71 #define ICL_CONN_STATE_DATA 4 72 #define ICL_CONN_STATE_DATA_DIGEST 5 73 74 struct icl_soft_conn { 75 struct icl_conn ic; 76 77 /* soft specific stuff goes here. */ 78 STAILQ_HEAD(, icl_pdu) to_send; 79 struct cv send_cv; 80 struct cv receive_cv; 81 struct icl_pdu *receive_pdu; 82 size_t receive_len; 83 int receive_state; 84 bool receive_running; 85 bool check_send_space; 86 bool send_running; 87 }; 88 89 struct icl_soft_pdu { 90 struct icl_pdu ip; 91 92 /* soft specific stuff goes here. */ 93 u_int ref_cnt; 94 icl_pdu_cb cb; 95 int error; 96 }; 97 98 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 99 "Software iSCSI"); 100 static int coalesce = 1; 101 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 102 &coalesce, 0, "Try to coalesce PDUs before sending"); 103 static int partial_receive_len = 256 * 1024; 104 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 105 &partial_receive_len, 0, "Minimum read size for partially received " 106 "data segment"); 107 static int max_data_segment_length = 256 * 1024; 108 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 109 &max_data_segment_length, 0, "Maximum data segment length"); 110 static int first_burst_length = 1024 * 1024; 111 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 112 &first_burst_length, 0, "First burst length"); 113 static int max_burst_length = 1024 * 1024; 114 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 115 &max_burst_length, 0, "Maximum burst length"); 116 static int sendspace = 1536 * 1024; 117 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 118 &sendspace, 0, "Default send socket buffer size"); 119 static int recvspace = 1536 * 1024; 120 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 121 &recvspace, 0, "Default receive socket buffer size"); 122 123 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 124 static uma_zone_t icl_soft_pdu_zone; 125 126 static volatile u_int icl_ncons; 127 128 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 129 130 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 131 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 132 static icl_conn_pdu_data_segment_length_t 133 icl_soft_conn_pdu_data_segment_length; 134 static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio; 135 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 136 static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio; 137 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 138 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 139 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 140 static icl_conn_handoff_t icl_soft_conn_handoff; 141 static icl_conn_free_t icl_soft_conn_free; 142 static icl_conn_close_t icl_soft_conn_close; 143 static icl_conn_task_setup_t icl_soft_conn_task_setup; 144 static icl_conn_task_done_t icl_soft_conn_task_done; 145 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 146 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 147 #ifdef ICL_KERNEL_PROXY 148 static icl_conn_connect_t icl_soft_conn_connect; 149 #endif 150 151 static kobj_method_t icl_soft_methods[] = { 152 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 153 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 154 KOBJMETHOD(icl_conn_pdu_data_segment_length, 155 icl_soft_conn_pdu_data_segment_length), 156 KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio), 157 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 158 KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio), 159 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 160 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 161 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 162 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 163 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 164 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 165 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 166 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 167 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 168 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 169 #ifdef ICL_KERNEL_PROXY 170 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 171 #endif 172 { 0, 0 } 173 }; 174 175 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); 176 177 static void 178 icl_conn_fail(struct icl_conn *ic) 179 { 180 if (ic->ic_socket == NULL) 181 return; 182 183 /* 184 * XXX 185 */ 186 ic->ic_socket->so_error = EDOOFUS; 187 (ic->ic_error)(ic); 188 } 189 190 static void 191 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 192 { 193 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 194 195 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 196 m_freem(ip->ip_bhs_mbuf); 197 m_freem(ip->ip_ahs_mbuf); 198 m_freem(ip->ip_data_mbuf); 199 uma_zfree(icl_soft_pdu_zone, isp); 200 #ifdef DIAGNOSTIC 201 refcount_release(&ic->ic_outstanding_pdus); 202 #endif 203 } 204 205 static void 206 icl_soft_pdu_call_cb(struct icl_pdu *ip) 207 { 208 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 209 210 if (isp->cb != NULL) 211 isp->cb(ip, isp->error); 212 #ifdef DIAGNOSTIC 213 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 214 #endif 215 uma_zfree(icl_soft_pdu_zone, isp); 216 } 217 218 static void 219 icl_soft_pdu_done(struct icl_pdu *ip, int error) 220 { 221 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 222 223 if (error != 0) 224 isp->error = error; 225 226 m_freem(ip->ip_bhs_mbuf); 227 ip->ip_bhs_mbuf = NULL; 228 m_freem(ip->ip_ahs_mbuf); 229 ip->ip_ahs_mbuf = NULL; 230 m_freem(ip->ip_data_mbuf); 231 ip->ip_data_mbuf = NULL; 232 233 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 234 icl_soft_pdu_call_cb(ip); 235 } 236 237 static void 238 icl_soft_mbuf_done(struct mbuf *mb) 239 { 240 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 241 242 icl_soft_pdu_call_cb(&isp->ip); 243 } 244 245 /* 246 * Allocate icl_pdu with empty BHS to fill up by the caller. 247 */ 248 struct icl_pdu * 249 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 250 { 251 struct icl_soft_pdu *isp; 252 struct icl_pdu *ip; 253 254 #ifdef DIAGNOSTIC 255 refcount_acquire(&ic->ic_outstanding_pdus); 256 #endif 257 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 258 if (isp == NULL) { 259 ICL_WARN("failed to allocate soft PDU"); 260 #ifdef DIAGNOSTIC 261 refcount_release(&ic->ic_outstanding_pdus); 262 #endif 263 return (NULL); 264 } 265 ip = &isp->ip; 266 ip->ip_conn = ic; 267 268 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 269 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 270 if (ip->ip_bhs_mbuf == NULL) { 271 ICL_WARN("failed to allocate BHS mbuf"); 272 icl_soft_conn_pdu_free(ic, ip); 273 return (NULL); 274 } 275 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 276 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 277 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 278 279 return (ip); 280 } 281 282 static int 283 icl_pdu_ahs_length(const struct icl_pdu *request) 284 { 285 286 return (request->ip_bhs->bhs_total_ahs_len * 4); 287 } 288 289 static size_t 290 icl_pdu_data_segment_length(const struct icl_pdu *request) 291 { 292 uint32_t len = 0; 293 294 len += request->ip_bhs->bhs_data_segment_len[0]; 295 len <<= 8; 296 len += request->ip_bhs->bhs_data_segment_len[1]; 297 len <<= 8; 298 len += request->ip_bhs->bhs_data_segment_len[2]; 299 300 return (len); 301 } 302 303 size_t 304 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 305 const struct icl_pdu *request) 306 { 307 308 return (icl_pdu_data_segment_length(request)); 309 } 310 311 static void 312 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 313 { 314 315 response->ip_bhs->bhs_data_segment_len[2] = len; 316 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 317 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 318 } 319 320 static size_t 321 icl_pdu_padding(const struct icl_pdu *ip) 322 { 323 324 if ((ip->ip_data_len % 4) != 0) 325 return (4 - (ip->ip_data_len % 4)); 326 327 return (0); 328 } 329 330 static size_t 331 icl_pdu_size(const struct icl_pdu *response) 332 { 333 size_t len; 334 335 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 336 337 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 338 icl_pdu_padding(response); 339 if (response->ip_conn->ic_header_crc32c) 340 len += ISCSI_HEADER_DIGEST_SIZE; 341 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 342 len += ISCSI_DATA_DIGEST_SIZE; 343 344 return (len); 345 } 346 347 static void 348 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) 349 { 350 351 m_copydata(*r, 0, s, buf); 352 m_adj(*r, s); 353 while ((*r) != NULL && (*r)->m_len == 0) 354 *r = m_free(*r); 355 *rs -= s; 356 } 357 358 static void 359 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) 360 { 361 362 request->ip_ahs_len = icl_pdu_ahs_length(request); 363 if (request->ip_ahs_len == 0) 364 return; 365 366 request->ip_ahs_mbuf = *r; 367 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); 368 *rs -= request->ip_ahs_len; 369 } 370 371 static int 372 mbuf_crc32c_helper(void *arg, void *data, u_int len) 373 { 374 uint32_t *digestp = arg; 375 376 *digestp = calculate_crc32c(*digestp, data, len); 377 return (0); 378 } 379 380 static uint32_t 381 icl_mbuf_to_crc32c(struct mbuf *m0, size_t len) 382 { 383 uint32_t digest = 0xffffffff; 384 385 m_apply(m0, 0, len, mbuf_crc32c_helper, &digest); 386 digest = digest ^ 0xffffffff; 387 388 return (digest); 389 } 390 391 static int 392 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 393 { 394 uint32_t received_digest, valid_digest; 395 396 if (request->ip_conn->ic_header_crc32c == false) 397 return (0); 398 399 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 400 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); 401 402 /* Temporary attach AHS to BHS to calculate header digest. */ 403 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 404 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); 405 request->ip_bhs_mbuf->m_next = NULL; 406 if (received_digest != valid_digest) { 407 ICL_WARN("header digest check failed; got 0x%x, " 408 "should be 0x%x", received_digest, valid_digest); 409 return (-1); 410 } 411 412 return (0); 413 } 414 415 /* 416 * Return the number of bytes that should be waiting in the receive socket 417 * before icl_pdu_receive_data_segment() gets called. 418 */ 419 static size_t 420 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 421 { 422 size_t len; 423 424 len = icl_pdu_data_segment_length(request); 425 if (len == 0) 426 return (0); 427 428 /* 429 * Account for the parts of data segment already read from 430 * the socket buffer. 431 */ 432 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 433 len -= request->ip_data_len; 434 435 /* 436 * Don't always wait for the full data segment to be delivered 437 * to the socket; this might badly affect performance due to 438 * TCP window scaling. 439 */ 440 if (len > partial_receive_len) { 441 #if 0 442 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 443 len, partial_receive_len)); 444 #endif 445 len = partial_receive_len; 446 447 return (len); 448 } 449 450 /* 451 * Account for padding. Note that due to the way code is written, 452 * the icl_pdu_receive_data_segment() must always receive padding 453 * along with the last part of data segment, because it would be 454 * impossible to tell whether we've already received the full data 455 * segment including padding, or without it. 456 */ 457 if ((len % 4) != 0) 458 len += 4 - (len % 4); 459 460 #if 0 461 ICL_DEBUG("need %zd bytes of data", len)); 462 #endif 463 464 return (len); 465 } 466 467 static int 468 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, 469 size_t *rs, bool *more_neededp) 470 { 471 struct icl_soft_conn *isc; 472 size_t len, padding = 0; 473 struct mbuf *m; 474 475 isc = (struct icl_soft_conn *)request->ip_conn; 476 477 *more_neededp = false; 478 isc->receive_len = 0; 479 480 len = icl_pdu_data_segment_length(request); 481 if (len == 0) 482 return (0); 483 484 if ((len % 4) != 0) 485 padding = 4 - (len % 4); 486 487 /* 488 * Account for already received parts of data segment. 489 */ 490 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 491 len -= request->ip_data_len; 492 493 if (len + padding > *rs) { 494 /* 495 * Not enough data in the socket buffer. Receive as much 496 * as we can. Don't receive padding, since, obviously, it's 497 * not the end of data segment yet. 498 */ 499 #if 0 500 ICL_DEBUG("limited from %zd to %zd", 501 len + padding, *rs - padding)); 502 #endif 503 len = *rs - padding; 504 *more_neededp = true; 505 padding = 0; 506 } 507 508 /* 509 * Must not try to receive padding without at least one byte 510 * of actual data segment. 511 */ 512 if (len > 0) { 513 m = *r; 514 *r = m_split(m, len + padding, M_WAITOK); 515 *rs -= len + padding; 516 517 if (request->ip_data_mbuf == NULL) 518 request->ip_data_mbuf = m; 519 else 520 m_cat(request->ip_data_mbuf, m); 521 522 request->ip_data_len += len; 523 } else 524 ICL_DEBUG("len 0"); 525 526 if (*more_neededp) 527 isc->receive_len = icl_pdu_data_segment_receive_len(request); 528 529 return (0); 530 } 531 532 static int 533 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 534 { 535 uint32_t received_digest, valid_digest; 536 537 if (request->ip_conn->ic_data_crc32c == false) 538 return (0); 539 540 if (request->ip_data_len == 0) 541 return (0); 542 543 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 544 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); 545 546 /* 547 * Note that ip_data_mbuf also contains padding; since digest 548 * calculation is supposed to include that, we iterate over 549 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 550 */ 551 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 552 roundup2(request->ip_data_len, 4)); 553 if (received_digest != valid_digest) { 554 ICL_WARN("data digest check failed; got 0x%x, " 555 "should be 0x%x", received_digest, valid_digest); 556 return (-1); 557 } 558 559 return (0); 560 } 561 562 /* 563 * Somewhat contrary to the name, this attempts to receive only one 564 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 565 */ 566 static struct icl_pdu * 567 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 568 { 569 struct icl_conn *ic = &isc->ic; 570 struct icl_pdu *request; 571 size_t len; 572 int error = 0; 573 bool more_needed; 574 575 if (isc->receive_state == ICL_CONN_STATE_BHS) { 576 KASSERT(isc->receive_pdu == NULL, 577 ("isc->receive_pdu != NULL")); 578 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 579 if (request == NULL) { 580 ICL_DEBUG("failed to allocate PDU; " 581 "dropping connection"); 582 icl_conn_fail(ic); 583 return (NULL); 584 } 585 isc->receive_pdu = request; 586 } else { 587 KASSERT(isc->receive_pdu != NULL, 588 ("isc->receive_pdu == NULL")); 589 request = isc->receive_pdu; 590 } 591 592 switch (isc->receive_state) { 593 case ICL_CONN_STATE_BHS: 594 //ICL_DEBUG("receiving BHS"); 595 icl_soft_receive_buf(r, rs, request->ip_bhs, 596 sizeof(struct iscsi_bhs)); 597 598 /* 599 * We don't enforce any limit for AHS length; 600 * its length is stored in 8 bit field. 601 */ 602 603 len = icl_pdu_data_segment_length(request); 604 if (len > ic->ic_max_recv_data_segment_length) { 605 ICL_WARN("received data segment " 606 "length %zd is larger than negotiated; " 607 "dropping connection", len); 608 error = EINVAL; 609 break; 610 } 611 612 isc->receive_state = ICL_CONN_STATE_AHS; 613 isc->receive_len = icl_pdu_ahs_length(request); 614 break; 615 616 case ICL_CONN_STATE_AHS: 617 //ICL_DEBUG("receiving AHS"); 618 icl_pdu_receive_ahs(request, r, rs); 619 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; 620 if (ic->ic_header_crc32c == false) 621 isc->receive_len = 0; 622 else 623 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; 624 break; 625 626 case ICL_CONN_STATE_HEADER_DIGEST: 627 //ICL_DEBUG("receiving header digest"); 628 error = icl_pdu_check_header_digest(request, r, rs); 629 if (error != 0) { 630 ICL_DEBUG("header digest failed; " 631 "dropping connection"); 632 break; 633 } 634 635 isc->receive_state = ICL_CONN_STATE_DATA; 636 isc->receive_len = icl_pdu_data_segment_receive_len(request); 637 break; 638 639 case ICL_CONN_STATE_DATA: 640 //ICL_DEBUG("receiving data segment"); 641 error = icl_pdu_receive_data_segment(request, r, rs, 642 &more_needed); 643 if (error != 0) { 644 ICL_DEBUG("failed to receive data segment;" 645 "dropping connection"); 646 break; 647 } 648 649 if (more_needed) 650 break; 651 652 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; 653 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 654 isc->receive_len = 0; 655 else 656 isc->receive_len = ISCSI_DATA_DIGEST_SIZE; 657 break; 658 659 case ICL_CONN_STATE_DATA_DIGEST: 660 //ICL_DEBUG("receiving data digest"); 661 error = icl_pdu_check_data_digest(request, r, rs); 662 if (error != 0) { 663 ICL_DEBUG("data digest failed; " 664 "dropping connection"); 665 break; 666 } 667 668 /* 669 * We've received complete PDU; reset the receive state machine 670 * and return the PDU. 671 */ 672 isc->receive_state = ICL_CONN_STATE_BHS; 673 isc->receive_len = sizeof(struct iscsi_bhs); 674 isc->receive_pdu = NULL; 675 return (request); 676 677 default: 678 panic("invalid receive_state %d\n", isc->receive_state); 679 } 680 681 if (error != 0) { 682 /* 683 * Don't free the PDU; it's pointed to by isc->receive_pdu 684 * and will get freed in icl_soft_conn_close(). 685 */ 686 icl_conn_fail(ic); 687 } 688 689 return (NULL); 690 } 691 692 static void 693 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 694 { 695 struct icl_conn *ic = &isc->ic; 696 struct icl_pdu *response; 697 698 for (;;) { 699 if (ic->ic_disconnecting) 700 return; 701 702 /* 703 * Loop until we have a complete PDU or there is not enough 704 * data in the socket buffer. 705 */ 706 if (*rs < isc->receive_len) { 707 #if 0 708 ICL_DEBUG("not enough data; have %zd, need %zd", 709 *rs, isc->receive_len); 710 #endif 711 return; 712 } 713 714 response = icl_conn_receive_pdu(isc, r, rs); 715 if (response == NULL) 716 continue; 717 718 if (response->ip_ahs_len > 0) { 719 ICL_WARN("received PDU with unsupported " 720 "AHS; opcode 0x%x; dropping connection", 721 response->ip_bhs->bhs_opcode); 722 icl_soft_conn_pdu_free(ic, response); 723 icl_conn_fail(ic); 724 return; 725 } 726 727 (ic->ic_receive)(response); 728 } 729 } 730 731 static void 732 icl_receive_thread(void *arg) 733 { 734 struct icl_soft_conn *isc = arg; 735 struct icl_conn *ic = &isc->ic; 736 size_t available, read = 0; 737 struct socket *so; 738 struct mbuf *m, *r = NULL; 739 struct uio uio; 740 int error, flags; 741 742 so = ic->ic_socket; 743 744 for (;;) { 745 SOCKBUF_LOCK(&so->so_rcv); 746 if (ic->ic_disconnecting) { 747 SOCKBUF_UNLOCK(&so->so_rcv); 748 break; 749 } 750 751 /* 752 * Set the low watermark, to be checked by 753 * soreadable() in icl_soupcall_receive() 754 * to avoid unnecessary wakeups until there 755 * is enough data received to read the PDU. 756 */ 757 available = sbavail(&so->so_rcv); 758 if (read + available < isc->receive_len) { 759 so->so_rcv.sb_lowat = isc->receive_len - read; 760 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); 761 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 762 available = sbavail(&so->so_rcv); 763 } 764 SOCKBUF_UNLOCK(&so->so_rcv); 765 766 if (available == 0) { 767 if (so->so_error != 0) { 768 ICL_DEBUG("connection error %d; " 769 "dropping connection", so->so_error); 770 icl_conn_fail(ic); 771 break; 772 } 773 continue; 774 } 775 776 memset(&uio, 0, sizeof(uio)); 777 uio.uio_resid = available; 778 flags = MSG_DONTWAIT; 779 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 780 if (error != 0) { 781 ICL_DEBUG("soreceive error %d", error); 782 break; 783 } 784 if (uio.uio_resid != 0) { 785 m_freem(m); 786 ICL_DEBUG("short read"); 787 break; 788 } 789 if (r) 790 m_cat(r, m); 791 else 792 r = m; 793 read += available; 794 795 icl_conn_receive_pdus(isc, &r, &read); 796 } 797 798 if (r) 799 m_freem(r); 800 801 ICL_CONN_LOCK(ic); 802 isc->receive_running = false; 803 cv_signal(&isc->send_cv); 804 ICL_CONN_UNLOCK(ic); 805 kthread_exit(); 806 } 807 808 static int 809 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 810 { 811 struct icl_soft_conn *isc; 812 813 if (!soreadable(so)) 814 return (SU_OK); 815 816 isc = arg; 817 cv_signal(&isc->receive_cv); 818 return (SU_OK); 819 } 820 821 static int 822 icl_pdu_finalize(struct icl_pdu *request) 823 { 824 size_t padding, pdu_len; 825 uint32_t digest, zero = 0; 826 int ok; 827 struct icl_conn *ic; 828 829 ic = request->ip_conn; 830 831 icl_pdu_set_data_segment_length(request, request->ip_data_len); 832 833 pdu_len = icl_pdu_size(request); 834 835 if (ic->ic_header_crc32c) { 836 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, 837 ISCSI_BHS_SIZE); 838 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 839 (void *)&digest); 840 if (ok != 1) { 841 ICL_WARN("failed to append header digest"); 842 return (1); 843 } 844 } 845 846 if (request->ip_data_len != 0) { 847 padding = icl_pdu_padding(request); 848 if (padding > 0) { 849 ok = m_append(request->ip_data_mbuf, padding, 850 (void *)&zero); 851 if (ok != 1) { 852 ICL_WARN("failed to append padding"); 853 return (1); 854 } 855 } 856 857 if (ic->ic_data_crc32c) { 858 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 859 roundup2(request->ip_data_len, 4)); 860 861 ok = m_append(request->ip_data_mbuf, sizeof(digest), 862 (void *)&digest); 863 if (ok != 1) { 864 ICL_WARN("failed to append data digest"); 865 return (1); 866 } 867 } 868 869 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 870 request->ip_data_mbuf = NULL; 871 } 872 873 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 874 875 return (0); 876 } 877 878 static void 879 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) 880 { 881 struct icl_conn *ic = &isc->ic; 882 struct icl_pdu *request, *request2; 883 struct mbuf *m; 884 struct socket *so; 885 long available, size, size2; 886 #ifdef DEBUG_COALESCED 887 int coalesced; 888 #endif 889 int error; 890 891 ICL_CONN_LOCK_ASSERT_NOT(ic); 892 893 so = ic->ic_socket; 894 895 SOCKBUF_LOCK(&so->so_snd); 896 /* 897 * Check how much space do we have for transmit. We can't just 898 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 899 * as it always frees the mbuf chain passed to it, even in case 900 * of error. 901 */ 902 available = sbspace(&so->so_snd); 903 isc->check_send_space = false; 904 905 /* 906 * Notify the socket upcall that we don't need wakeups 907 * for the time being. 908 */ 909 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 910 SOCKBUF_UNLOCK(&so->so_snd); 911 912 while (!STAILQ_EMPTY(queue)) { 913 request = STAILQ_FIRST(queue); 914 size = icl_pdu_size(request); 915 if (available < size) { 916 /* 917 * Set the low watermark, to be checked by 918 * sowriteable() in icl_soupcall_send() 919 * to avoid unnecessary wakeups until there 920 * is enough space for the PDU to fit. 921 */ 922 SOCKBUF_LOCK(&so->so_snd); 923 available = sbspace(&so->so_snd); 924 if (available < size) { 925 #if 1 926 ICL_DEBUG("no space to send; " 927 "have %ld, need %ld", 928 available, size); 929 #endif 930 so->so_snd.sb_lowat = max(size, 931 so->so_snd.sb_hiwat / 8); 932 SOCKBUF_UNLOCK(&so->so_snd); 933 return; 934 } 935 SOCKBUF_UNLOCK(&so->so_snd); 936 } 937 STAILQ_REMOVE_HEAD(queue, ip_next); 938 error = icl_pdu_finalize(request); 939 if (error != 0) { 940 ICL_DEBUG("failed to finalize PDU; " 941 "dropping connection"); 942 icl_soft_pdu_done(request, EIO); 943 icl_conn_fail(ic); 944 return; 945 } 946 if (coalesce) { 947 m = request->ip_bhs_mbuf; 948 for ( 949 #ifdef DEBUG_COALESCED 950 coalesced = 1 951 #endif 952 ; ; 953 #ifdef DEBUG_COALESCED 954 coalesced++ 955 #endif 956 ) { 957 request2 = STAILQ_FIRST(queue); 958 if (request2 == NULL) 959 break; 960 size2 = icl_pdu_size(request2); 961 if (available < size + size2) 962 break; 963 STAILQ_REMOVE_HEAD(queue, ip_next); 964 error = icl_pdu_finalize(request2); 965 if (error != 0) { 966 ICL_DEBUG("failed to finalize PDU; " 967 "dropping connection"); 968 icl_soft_pdu_done(request, EIO); 969 icl_soft_pdu_done(request2, EIO); 970 icl_conn_fail(ic); 971 return; 972 } 973 while (m->m_next) 974 m = m->m_next; 975 m_cat(m, request2->ip_bhs_mbuf); 976 request2->ip_bhs_mbuf = NULL; 977 request->ip_bhs_mbuf->m_pkthdr.len += size2; 978 size += size2; 979 icl_soft_pdu_done(request2, 0); 980 } 981 #ifdef DEBUG_COALESCED 982 if (coalesced > 1) { 983 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 984 coalesced, size); 985 } 986 #endif 987 } 988 available -= size; 989 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 990 NULL, MSG_DONTWAIT, curthread); 991 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 992 if (error != 0) { 993 ICL_DEBUG("failed to send PDU, error %d; " 994 "dropping connection", error); 995 icl_soft_pdu_done(request, error); 996 icl_conn_fail(ic); 997 return; 998 } 999 icl_soft_pdu_done(request, 0); 1000 } 1001 } 1002 1003 static void 1004 icl_send_thread(void *arg) 1005 { 1006 struct icl_soft_conn *isc; 1007 struct icl_conn *ic; 1008 struct icl_pdu_stailq queue; 1009 1010 isc = arg; 1011 ic = &isc->ic; 1012 1013 STAILQ_INIT(&queue); 1014 1015 ICL_CONN_LOCK(ic); 1016 for (;;) { 1017 for (;;) { 1018 /* 1019 * Populate the local queue from the main one. 1020 * This way the icl_conn_send_pdus() can go through 1021 * all the queued PDUs without holding any locks. 1022 */ 1023 if (STAILQ_EMPTY(&queue) || isc->check_send_space) 1024 STAILQ_CONCAT(&queue, &isc->to_send); 1025 1026 ICL_CONN_UNLOCK(ic); 1027 icl_conn_send_pdus(isc, &queue); 1028 ICL_CONN_LOCK(ic); 1029 1030 /* 1031 * The icl_soupcall_send() was called since the last 1032 * call to sbspace(); go around; 1033 */ 1034 if (isc->check_send_space) 1035 continue; 1036 1037 /* 1038 * Local queue is empty, but we still have PDUs 1039 * in the main one; go around. 1040 */ 1041 if (STAILQ_EMPTY(&queue) && 1042 !STAILQ_EMPTY(&isc->to_send)) 1043 continue; 1044 1045 /* 1046 * There might be some stuff in the local queue, 1047 * which didn't get sent due to not having enough send 1048 * space. Wait for socket upcall. 1049 */ 1050 break; 1051 } 1052 1053 if (ic->ic_disconnecting) { 1054 //ICL_DEBUG("terminating"); 1055 break; 1056 } 1057 1058 cv_wait(&isc->send_cv, ic->ic_lock); 1059 } 1060 1061 /* 1062 * We're exiting; move PDUs back to the main queue, so they can 1063 * get freed properly. At this point ordering doesn't matter. 1064 */ 1065 STAILQ_CONCAT(&isc->to_send, &queue); 1066 1067 isc->send_running = false; 1068 cv_signal(&isc->send_cv); 1069 ICL_CONN_UNLOCK(ic); 1070 kthread_exit(); 1071 } 1072 1073 static int 1074 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1075 { 1076 struct icl_soft_conn *isc; 1077 struct icl_conn *ic; 1078 1079 if (!sowriteable(so)) 1080 return (SU_OK); 1081 1082 isc = arg; 1083 ic = &isc->ic; 1084 1085 ICL_CONN_LOCK(ic); 1086 isc->check_send_space = true; 1087 ICL_CONN_UNLOCK(ic); 1088 1089 cv_signal(&isc->send_cv); 1090 1091 return (SU_OK); 1092 } 1093 1094 static void 1095 icl_soft_free_mext_pg(struct mbuf *m) 1096 { 1097 struct icl_soft_pdu *isp; 1098 1099 M_ASSERTEXTPG(m); 1100 1101 /* 1102 * Nothing to do for the pages; they are owned by the PDU / 1103 * I/O request. 1104 */ 1105 1106 /* Drop reference on the PDU. */ 1107 isp = m->m_ext.ext_arg1; 1108 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 1109 icl_soft_pdu_call_cb(&isp->ip); 1110 } 1111 1112 static int 1113 icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 1114 struct bio *bp, size_t offset, size_t len, int flags) 1115 { 1116 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1117 struct mbuf *m, *m_tail; 1118 vm_offset_t vaddr; 1119 size_t mtodo, page_offset, todo; 1120 int i; 1121 1122 KASSERT(len > 0, ("len == 0")); 1123 1124 m_tail = request->ip_data_mbuf; 1125 if (m_tail != NULL) 1126 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 1127 ; 1128 1129 MPASS(bp->bio_flags & BIO_UNMAPPED); 1130 if (offset < PAGE_SIZE - bp->bio_ma_offset) { 1131 page_offset = bp->bio_ma_offset + offset; 1132 i = 0; 1133 } else { 1134 offset -= PAGE_SIZE - bp->bio_ma_offset; 1135 for (i = 1; offset >= PAGE_SIZE; i++) 1136 offset -= PAGE_SIZE; 1137 page_offset = offset; 1138 } 1139 1140 if (flags & ICL_NOCOPY) { 1141 m = NULL; 1142 while (len > 0) { 1143 if (m == NULL) { 1144 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, 1145 icl_soft_free_mext_pg); 1146 if (__predict_false(m == NULL)) 1147 return (ENOMEM); 1148 atomic_add_int(&isp->ref_cnt, 1); 1149 m->m_ext.ext_arg1 = isp; 1150 m->m_epg_1st_off = page_offset; 1151 } 1152 1153 todo = MIN(len, PAGE_SIZE - page_offset); 1154 1155 m->m_epg_pa[m->m_epg_npgs] = 1156 VM_PAGE_TO_PHYS(bp->bio_ma[i]); 1157 m->m_epg_npgs++; 1158 m->m_epg_last_len = todo; 1159 m->m_len += todo; 1160 m->m_ext.ext_size += PAGE_SIZE; 1161 MBUF_EXT_PGS_ASSERT_SANITY(m); 1162 1163 if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { 1164 if (m_tail != NULL) 1165 m_tail->m_next = m; 1166 else 1167 request->ip_data_mbuf = m; 1168 m_tail = m; 1169 request->ip_data_len += m->m_len; 1170 m = NULL; 1171 } 1172 1173 page_offset = 0; 1174 len -= todo; 1175 i++; 1176 } 1177 1178 if (m != NULL) { 1179 if (m_tail != NULL) 1180 m_tail->m_next = m; 1181 else 1182 request->ip_data_mbuf = m; 1183 request->ip_data_len += m->m_len; 1184 } 1185 return (0); 1186 } 1187 1188 m = m_getm2(NULL, len, flags, MT_DATA, 0); 1189 if (__predict_false(m == NULL)) 1190 return (ENOMEM); 1191 1192 if (request->ip_data_mbuf == NULL) { 1193 request->ip_data_mbuf = m; 1194 request->ip_data_len = len; 1195 } else { 1196 m_tail->m_next = m; 1197 request->ip_data_len += len; 1198 } 1199 1200 while (len > 0) { 1201 todo = MIN(len, PAGE_SIZE - page_offset); 1202 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1203 1204 do { 1205 mtodo = min(todo, M_SIZE(m) - m->m_len); 1206 memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + 1207 page_offset, mtodo); 1208 m->m_len += mtodo; 1209 if (m->m_len == M_SIZE(m)) 1210 m = m->m_next; 1211 page_offset += mtodo; 1212 todo -= mtodo; 1213 } while (todo > 0); 1214 1215 page_offset = 0; 1216 len -= todo; 1217 i++; 1218 } 1219 1220 return (0); 1221 } 1222 1223 static int 1224 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1225 const void *addr, size_t len, int flags) 1226 { 1227 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1228 struct mbuf *mb, *newmb; 1229 size_t copylen, off = 0; 1230 1231 KASSERT(len > 0, ("len == 0")); 1232 1233 if (flags & ICL_NOCOPY) { 1234 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1235 if (newmb == NULL) { 1236 ICL_WARN("failed to allocate mbuf"); 1237 return (ENOMEM); 1238 } 1239 1240 newmb->m_flags |= M_RDONLY; 1241 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1242 icl_soft_mbuf_done, isp, NULL); 1243 newmb->m_len = len; 1244 } else { 1245 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1246 if (newmb == NULL) { 1247 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1248 return (ENOMEM); 1249 } 1250 1251 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1252 copylen = min(M_TRAILINGSPACE(mb), len - off); 1253 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1254 mb->m_len = copylen; 1255 off += copylen; 1256 } 1257 KASSERT(off == len, ("%s: off != len", __func__)); 1258 } 1259 1260 if (request->ip_data_mbuf == NULL) { 1261 request->ip_data_mbuf = newmb; 1262 request->ip_data_len = len; 1263 } else { 1264 m_cat(request->ip_data_mbuf, newmb); 1265 request->ip_data_len += len; 1266 } 1267 1268 return (0); 1269 } 1270 1271 void 1272 icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 1273 size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) 1274 { 1275 vm_offset_t vaddr; 1276 size_t page_offset, todo; 1277 int i __unused; 1278 1279 MPASS(bp->bio_flags & BIO_UNMAPPED); 1280 if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { 1281 page_offset = bp->bio_ma_offset + bio_off; 1282 i = 0; 1283 } else { 1284 bio_off -= PAGE_SIZE - bp->bio_ma_offset; 1285 for (i = 1; bio_off >= PAGE_SIZE; i++) 1286 bio_off -= PAGE_SIZE; 1287 page_offset = bio_off; 1288 } 1289 1290 while (len > 0) { 1291 todo = MIN(len, PAGE_SIZE - page_offset); 1292 1293 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1294 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + 1295 page_offset); 1296 1297 page_offset = 0; 1298 pdu_off += todo; 1299 len -= todo; 1300 i++; 1301 } 1302 } 1303 1304 void 1305 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1306 size_t off, void *addr, size_t len) 1307 { 1308 1309 m_copydata(ip->ip_data_mbuf, off, len, addr); 1310 } 1311 1312 static void 1313 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1314 { 1315 1316 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1317 } 1318 1319 static void 1320 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1321 icl_pdu_cb cb) 1322 { 1323 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1324 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1325 1326 ICL_CONN_LOCK_ASSERT(ic); 1327 isp->ref_cnt++; 1328 isp->cb = cb; 1329 1330 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1331 ICL_DEBUG("icl_pdu_queue on closed connection"); 1332 icl_soft_pdu_done(ip, ENOTCONN); 1333 return; 1334 } 1335 1336 if (!STAILQ_EMPTY(&isc->to_send)) { 1337 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1338 /* 1339 * If the queue is not empty, someone else had already 1340 * signaled the send thread; no need to do that again, 1341 * just return. 1342 */ 1343 return; 1344 } 1345 1346 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1347 cv_signal(&isc->send_cv); 1348 } 1349 1350 static struct icl_conn * 1351 icl_soft_new_conn(const char *name, struct mtx *lock) 1352 { 1353 struct icl_soft_conn *isc; 1354 struct icl_conn *ic; 1355 1356 refcount_acquire(&icl_ncons); 1357 1358 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, 1359 M_WAITOK | M_ZERO); 1360 1361 STAILQ_INIT(&isc->to_send); 1362 cv_init(&isc->send_cv, "icl_tx"); 1363 cv_init(&isc->receive_cv, "icl_rx"); 1364 1365 ic = &isc->ic; 1366 ic->ic_lock = lock; 1367 #ifdef DIAGNOSTIC 1368 refcount_init(&ic->ic_outstanding_pdus, 0); 1369 #endif 1370 ic->ic_name = name; 1371 ic->ic_offload = "None"; 1372 ic->ic_unmapped = PMAP_HAS_DMAP; 1373 1374 return (ic); 1375 } 1376 1377 void 1378 icl_soft_conn_free(struct icl_conn *ic) 1379 { 1380 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1381 1382 #ifdef DIAGNOSTIC 1383 KASSERT(ic->ic_outstanding_pdus == 0, 1384 ("destroying session with %d outstanding PDUs", 1385 ic->ic_outstanding_pdus)); 1386 #endif 1387 cv_destroy(&isc->send_cv); 1388 cv_destroy(&isc->receive_cv); 1389 kobj_delete((struct kobj *)isc, M_ICL_SOFT); 1390 refcount_release(&icl_ncons); 1391 } 1392 1393 static int 1394 icl_conn_start(struct icl_conn *ic) 1395 { 1396 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1397 size_t minspace; 1398 struct sockopt opt; 1399 int error, one = 1; 1400 1401 ICL_CONN_LOCK(ic); 1402 1403 /* 1404 * XXX: Ugly hack. 1405 */ 1406 if (ic->ic_socket == NULL) { 1407 ICL_CONN_UNLOCK(ic); 1408 return (EINVAL); 1409 } 1410 1411 isc->receive_state = ICL_CONN_STATE_BHS; 1412 isc->receive_len = sizeof(struct iscsi_bhs); 1413 ic->ic_disconnecting = false; 1414 1415 ICL_CONN_UNLOCK(ic); 1416 1417 /* 1418 * For sendspace, this is required because the current code cannot 1419 * send a PDU in pieces; thus, the minimum buffer size is equal 1420 * to the maximum PDU size. "+4" is to account for possible padding. 1421 */ 1422 minspace = sizeof(struct iscsi_bhs) + 1423 ic->ic_max_send_data_segment_length + 1424 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1425 if (sendspace < minspace) { 1426 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1427 minspace); 1428 sendspace = minspace; 1429 } 1430 minspace = sizeof(struct iscsi_bhs) + 1431 ic->ic_max_recv_data_segment_length + 1432 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1433 if (recvspace < minspace) { 1434 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1435 minspace); 1436 recvspace = minspace; 1437 } 1438 1439 error = soreserve(ic->ic_socket, sendspace, recvspace); 1440 if (error != 0) { 1441 ICL_WARN("soreserve failed with error %d", error); 1442 icl_soft_conn_close(ic); 1443 return (error); 1444 } 1445 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1446 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1447 1448 /* 1449 * Disable Nagle. 1450 */ 1451 bzero(&opt, sizeof(opt)); 1452 opt.sopt_dir = SOPT_SET; 1453 opt.sopt_level = IPPROTO_TCP; 1454 opt.sopt_name = TCP_NODELAY; 1455 opt.sopt_val = &one; 1456 opt.sopt_valsize = sizeof(one); 1457 error = sosetopt(ic->ic_socket, &opt); 1458 if (error != 0) { 1459 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1460 icl_soft_conn_close(ic); 1461 return (error); 1462 } 1463 1464 /* 1465 * Register socket upcall, to get notified about incoming PDUs 1466 * and free space to send outgoing ones. 1467 */ 1468 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1469 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); 1470 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1471 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1472 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); 1473 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1474 1475 /* 1476 * Start threads. 1477 */ 1478 ICL_CONN_LOCK(ic); 1479 isc->send_running = isc->receive_running = true; 1480 ICL_CONN_UNLOCK(ic); 1481 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1482 ic->ic_name); 1483 if (error != 0) { 1484 ICL_WARN("kthread_add(9) failed with error %d", error); 1485 ICL_CONN_LOCK(ic); 1486 isc->send_running = isc->receive_running = false; 1487 cv_signal(&isc->send_cv); 1488 ICL_CONN_UNLOCK(ic); 1489 icl_soft_conn_close(ic); 1490 return (error); 1491 } 1492 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1493 ic->ic_name); 1494 if (error != 0) { 1495 ICL_WARN("kthread_add(9) failed with error %d", error); 1496 ICL_CONN_LOCK(ic); 1497 isc->receive_running = false; 1498 cv_signal(&isc->send_cv); 1499 ICL_CONN_UNLOCK(ic); 1500 icl_soft_conn_close(ic); 1501 return (error); 1502 } 1503 1504 return (0); 1505 } 1506 1507 int 1508 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1509 { 1510 struct file *fp; 1511 struct socket *so; 1512 cap_rights_t rights; 1513 int error; 1514 1515 ICL_CONN_LOCK_ASSERT_NOT(ic); 1516 1517 #ifdef ICL_KERNEL_PROXY 1518 /* 1519 * We're transitioning to Full Feature phase, and we don't 1520 * really care. 1521 */ 1522 if (fd == 0) { 1523 ICL_CONN_LOCK(ic); 1524 if (ic->ic_socket == NULL) { 1525 ICL_CONN_UNLOCK(ic); 1526 ICL_WARN("proxy handoff without connect"); 1527 return (EINVAL); 1528 } 1529 ICL_CONN_UNLOCK(ic); 1530 return (0); 1531 } 1532 #endif 1533 1534 /* 1535 * Steal the socket from userland. 1536 */ 1537 error = fget(curthread, fd, 1538 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1539 if (error != 0) 1540 return (error); 1541 if (fp->f_type != DTYPE_SOCKET) { 1542 fdrop(fp, curthread); 1543 return (EINVAL); 1544 } 1545 so = fp->f_data; 1546 if (so->so_type != SOCK_STREAM) { 1547 fdrop(fp, curthread); 1548 return (EINVAL); 1549 } 1550 1551 ICL_CONN_LOCK(ic); 1552 1553 if (ic->ic_socket != NULL) { 1554 ICL_CONN_UNLOCK(ic); 1555 fdrop(fp, curthread); 1556 return (EBUSY); 1557 } 1558 1559 ic->ic_socket = fp->f_data; 1560 fp->f_ops = &badfileops; 1561 fp->f_data = NULL; 1562 fdrop(fp, curthread); 1563 ICL_CONN_UNLOCK(ic); 1564 1565 error = icl_conn_start(ic); 1566 1567 return (error); 1568 } 1569 1570 void 1571 icl_soft_conn_close(struct icl_conn *ic) 1572 { 1573 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1574 struct icl_pdu *pdu; 1575 struct socket *so; 1576 1577 /* 1578 * Wake up the threads, so they can properly terminate. 1579 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. 1580 */ 1581 ICL_CONN_LOCK(ic); 1582 if (!ic->ic_disconnecting) { 1583 so = ic->ic_socket; 1584 if (so) 1585 SOCKBUF_LOCK(&so->so_rcv); 1586 ic->ic_disconnecting = true; 1587 if (so) 1588 SOCKBUF_UNLOCK(&so->so_rcv); 1589 } 1590 while (isc->receive_running || isc->send_running) { 1591 cv_signal(&isc->receive_cv); 1592 cv_signal(&isc->send_cv); 1593 cv_wait(&isc->send_cv, ic->ic_lock); 1594 } 1595 1596 /* Some other thread could close the connection same time. */ 1597 so = ic->ic_socket; 1598 if (so == NULL) { 1599 ICL_CONN_UNLOCK(ic); 1600 return; 1601 } 1602 ic->ic_socket = NULL; 1603 1604 /* 1605 * Deregister socket upcalls. 1606 */ 1607 ICL_CONN_UNLOCK(ic); 1608 SOCKBUF_LOCK(&so->so_snd); 1609 if (so->so_snd.sb_upcall != NULL) 1610 soupcall_clear(so, SO_SND); 1611 SOCKBUF_UNLOCK(&so->so_snd); 1612 SOCKBUF_LOCK(&so->so_rcv); 1613 if (so->so_rcv.sb_upcall != NULL) 1614 soupcall_clear(so, SO_RCV); 1615 SOCKBUF_UNLOCK(&so->so_rcv); 1616 soclose(so); 1617 ICL_CONN_LOCK(ic); 1618 1619 if (isc->receive_pdu != NULL) { 1620 //ICL_DEBUG("freeing partially received PDU"); 1621 icl_soft_conn_pdu_free(ic, isc->receive_pdu); 1622 isc->receive_pdu = NULL; 1623 } 1624 1625 /* 1626 * Remove any outstanding PDUs from the send queue. 1627 */ 1628 while (!STAILQ_EMPTY(&isc->to_send)) { 1629 pdu = STAILQ_FIRST(&isc->to_send); 1630 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); 1631 icl_soft_pdu_done(pdu, ENOTCONN); 1632 } 1633 1634 KASSERT(STAILQ_EMPTY(&isc->to_send), 1635 ("destroying session with non-empty send queue")); 1636 ICL_CONN_UNLOCK(ic); 1637 } 1638 1639 int 1640 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1641 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1642 { 1643 1644 return (0); 1645 } 1646 1647 void 1648 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1649 { 1650 } 1651 1652 int 1653 icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, 1654 union ctl_io *io, uint32_t *transfer_tag, void **prvp) 1655 { 1656 1657 return (0); 1658 } 1659 1660 void 1661 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1662 { 1663 } 1664 1665 static int 1666 icl_soft_limits(struct icl_drv_limits *idl, int socket) 1667 { 1668 1669 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1670 idl->idl_max_send_data_segment_length = max_data_segment_length; 1671 idl->idl_max_burst_length = max_burst_length; 1672 idl->idl_first_burst_length = first_burst_length; 1673 1674 return (0); 1675 } 1676 1677 #ifdef ICL_KERNEL_PROXY 1678 int 1679 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1680 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1681 { 1682 1683 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1684 from_sa, to_sa)); 1685 } 1686 1687 int 1688 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1689 { 1690 int error; 1691 1692 ICL_CONN_LOCK_ASSERT_NOT(ic); 1693 1694 if (so->so_type != SOCK_STREAM) 1695 return (EINVAL); 1696 1697 ICL_CONN_LOCK(ic); 1698 if (ic->ic_socket != NULL) { 1699 ICL_CONN_UNLOCK(ic); 1700 return (EBUSY); 1701 } 1702 ic->ic_socket = so; 1703 ICL_CONN_UNLOCK(ic); 1704 1705 error = icl_conn_start(ic); 1706 1707 return (error); 1708 } 1709 #endif /* ICL_KERNEL_PROXY */ 1710 1711 static int 1712 icl_soft_load(void) 1713 { 1714 int error; 1715 1716 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1717 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1718 UMA_ALIGN_PTR, 0); 1719 refcount_init(&icl_ncons, 0); 1720 1721 /* 1722 * The reason we call this "none" is that to the user, 1723 * it's known as "offload driver"; "offload driver: soft" 1724 * doesn't make much sense. 1725 */ 1726 error = icl_register("none", false, 0, 1727 icl_soft_limits, icl_soft_new_conn); 1728 KASSERT(error == 0, ("failed to register")); 1729 1730 #if defined(ICL_KERNEL_PROXY) && 0 1731 /* 1732 * Debugging aid for kernel proxy functionality. 1733 */ 1734 error = icl_register("proxytest", true, 0, 1735 icl_soft_limits, icl_soft_new_conn); 1736 KASSERT(error == 0, ("failed to register")); 1737 #endif 1738 1739 return (error); 1740 } 1741 1742 static int 1743 icl_soft_unload(void) 1744 { 1745 1746 if (icl_ncons != 0) 1747 return (EBUSY); 1748 1749 icl_unregister("none", false); 1750 #if defined(ICL_KERNEL_PROXY) && 0 1751 icl_unregister("proxytest", true); 1752 #endif 1753 1754 uma_zdestroy(icl_soft_pdu_zone); 1755 1756 return (0); 1757 } 1758 1759 static int 1760 icl_soft_modevent(module_t mod, int what, void *arg) 1761 { 1762 1763 switch (what) { 1764 case MOD_LOAD: 1765 return (icl_soft_load()); 1766 case MOD_UNLOAD: 1767 return (icl_soft_unload()); 1768 default: 1769 return (EINVAL); 1770 } 1771 } 1772 1773 moduledata_t icl_soft_data = { 1774 "icl_soft", 1775 icl_soft_modevent, 1776 0 1777 }; 1778 1779 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1780 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1781 MODULE_VERSION(icl_soft, 1); 1782