1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/bio.h> 41 #include <sys/capsicum.h> 42 #include <sys/condvar.h> 43 #include <sys/conf.h> 44 #include <sys/gsb_crc32.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/lock.h> 49 #include <sys/mbuf.h> 50 #include <sys/mutex.h> 51 #include <sys/module.h> 52 #include <sys/protosw.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sx.h> 58 #include <sys/uio.h> 59 #include <vm/uma.h> 60 #include <vm/vm_page.h> 61 #include <netinet/in.h> 62 #include <netinet/tcp.h> 63 64 #include <dev/iscsi/icl.h> 65 #include <dev/iscsi/iscsi_proto.h> 66 #include <icl_conn_if.h> 67 68 #define ICL_CONN_STATE_BHS 1 69 #define ICL_CONN_STATE_AHS 2 70 #define ICL_CONN_STATE_HEADER_DIGEST 3 71 #define ICL_CONN_STATE_DATA 4 72 #define ICL_CONN_STATE_DATA_DIGEST 5 73 74 struct icl_soft_conn { 75 struct icl_conn ic; 76 77 /* soft specific stuff goes here. */ 78 STAILQ_HEAD(, icl_pdu) to_send; 79 struct cv send_cv; 80 struct cv receive_cv; 81 struct icl_pdu *receive_pdu; 82 size_t receive_len; 83 int receive_state; 84 bool receive_running; 85 bool check_send_space; 86 bool send_running; 87 }; 88 89 struct icl_soft_pdu { 90 struct icl_pdu ip; 91 92 /* soft specific stuff goes here. */ 93 u_int ref_cnt; 94 icl_pdu_cb cb; 95 int error; 96 }; 97 98 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 99 "Software iSCSI"); 100 static int coalesce = 1; 101 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 102 &coalesce, 0, "Try to coalesce PDUs before sending"); 103 static int partial_receive_len = 256 * 1024; 104 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 105 &partial_receive_len, 0, "Minimum read size for partially received " 106 "data segment"); 107 static int max_data_segment_length = 256 * 1024; 108 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 109 &max_data_segment_length, 0, "Maximum data segment length"); 110 static int first_burst_length = 1024 * 1024; 111 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 112 &first_burst_length, 0, "First burst length"); 113 static int max_burst_length = 1024 * 1024; 114 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 115 &max_burst_length, 0, "Maximum burst length"); 116 static int sendspace = 1536 * 1024; 117 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 118 &sendspace, 0, "Default send socket buffer size"); 119 static int recvspace = 1536 * 1024; 120 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 121 &recvspace, 0, "Default receive socket buffer size"); 122 123 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 124 static uma_zone_t icl_soft_pdu_zone; 125 126 static volatile u_int icl_ncons; 127 128 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 129 130 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 131 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 132 static icl_conn_pdu_data_segment_length_t 133 icl_soft_conn_pdu_data_segment_length; 134 static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio; 135 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 136 static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio; 137 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 138 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 139 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 140 static icl_conn_handoff_t icl_soft_conn_handoff; 141 static icl_conn_free_t icl_soft_conn_free; 142 static icl_conn_close_t icl_soft_conn_close; 143 static icl_conn_task_setup_t icl_soft_conn_task_setup; 144 static icl_conn_task_done_t icl_soft_conn_task_done; 145 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 146 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 147 #ifdef ICL_KERNEL_PROXY 148 static icl_conn_connect_t icl_soft_conn_connect; 149 #endif 150 151 static kobj_method_t icl_soft_methods[] = { 152 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 153 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 154 KOBJMETHOD(icl_conn_pdu_data_segment_length, 155 icl_soft_conn_pdu_data_segment_length), 156 KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio), 157 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 158 KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio), 159 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 160 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 161 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 162 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 163 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 164 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 165 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 166 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 167 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 168 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 169 #ifdef ICL_KERNEL_PROXY 170 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 171 #endif 172 { 0, 0 } 173 }; 174 175 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); 176 177 static void 178 icl_conn_fail(struct icl_conn *ic) 179 { 180 if (ic->ic_socket == NULL) 181 return; 182 183 /* 184 * XXX 185 */ 186 ic->ic_socket->so_error = EDOOFUS; 187 (ic->ic_error)(ic); 188 } 189 190 static void 191 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 192 { 193 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 194 195 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 196 m_freem(ip->ip_bhs_mbuf); 197 m_freem(ip->ip_ahs_mbuf); 198 m_freem(ip->ip_data_mbuf); 199 uma_zfree(icl_soft_pdu_zone, isp); 200 #ifdef DIAGNOSTIC 201 refcount_release(&ic->ic_outstanding_pdus); 202 #endif 203 } 204 205 static void 206 icl_soft_pdu_call_cb(struct icl_pdu *ip) 207 { 208 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 209 210 if (isp->cb != NULL) 211 isp->cb(ip, isp->error); 212 #ifdef DIAGNOSTIC 213 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 214 #endif 215 uma_zfree(icl_soft_pdu_zone, isp); 216 } 217 218 static void 219 icl_soft_pdu_done(struct icl_pdu *ip, int error) 220 { 221 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 222 223 if (error != 0) 224 isp->error = error; 225 226 m_freem(ip->ip_bhs_mbuf); 227 ip->ip_bhs_mbuf = NULL; 228 m_freem(ip->ip_ahs_mbuf); 229 ip->ip_ahs_mbuf = NULL; 230 m_freem(ip->ip_data_mbuf); 231 ip->ip_data_mbuf = NULL; 232 233 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 234 icl_soft_pdu_call_cb(ip); 235 } 236 237 static void 238 icl_soft_mbuf_done(struct mbuf *mb) 239 { 240 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 241 242 icl_soft_pdu_call_cb(&isp->ip); 243 } 244 245 /* 246 * Allocate icl_pdu with empty BHS to fill up by the caller. 247 */ 248 struct icl_pdu * 249 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 250 { 251 struct icl_soft_pdu *isp; 252 struct icl_pdu *ip; 253 254 #ifdef DIAGNOSTIC 255 refcount_acquire(&ic->ic_outstanding_pdus); 256 #endif 257 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 258 if (isp == NULL) { 259 ICL_WARN("failed to allocate soft PDU"); 260 #ifdef DIAGNOSTIC 261 refcount_release(&ic->ic_outstanding_pdus); 262 #endif 263 return (NULL); 264 } 265 ip = &isp->ip; 266 ip->ip_conn = ic; 267 268 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 269 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 270 if (ip->ip_bhs_mbuf == NULL) { 271 ICL_WARN("failed to allocate BHS mbuf"); 272 icl_soft_conn_pdu_free(ic, ip); 273 return (NULL); 274 } 275 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 276 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 277 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 278 279 return (ip); 280 } 281 282 static int 283 icl_pdu_ahs_length(const struct icl_pdu *request) 284 { 285 286 return (request->ip_bhs->bhs_total_ahs_len * 4); 287 } 288 289 static size_t 290 icl_pdu_data_segment_length(const struct icl_pdu *request) 291 { 292 uint32_t len = 0; 293 294 len += request->ip_bhs->bhs_data_segment_len[0]; 295 len <<= 8; 296 len += request->ip_bhs->bhs_data_segment_len[1]; 297 len <<= 8; 298 len += request->ip_bhs->bhs_data_segment_len[2]; 299 300 return (len); 301 } 302 303 size_t 304 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 305 const struct icl_pdu *request) 306 { 307 308 return (icl_pdu_data_segment_length(request)); 309 } 310 311 static void 312 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 313 { 314 315 response->ip_bhs->bhs_data_segment_len[2] = len; 316 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 317 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 318 } 319 320 static size_t 321 icl_pdu_padding(const struct icl_pdu *ip) 322 { 323 324 if ((ip->ip_data_len % 4) != 0) 325 return (4 - (ip->ip_data_len % 4)); 326 327 return (0); 328 } 329 330 static size_t 331 icl_pdu_size(const struct icl_pdu *response) 332 { 333 size_t len; 334 335 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 336 337 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 338 icl_pdu_padding(response); 339 if (response->ip_conn->ic_header_crc32c) 340 len += ISCSI_HEADER_DIGEST_SIZE; 341 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 342 len += ISCSI_DATA_DIGEST_SIZE; 343 344 return (len); 345 } 346 347 static void 348 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) 349 { 350 351 m_copydata(*r, 0, s, buf); 352 m_adj(*r, s); 353 while ((*r) != NULL && (*r)->m_len == 0) 354 *r = m_free(*r); 355 *rs -= s; 356 } 357 358 static void 359 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) 360 { 361 362 request->ip_ahs_len = icl_pdu_ahs_length(request); 363 if (request->ip_ahs_len == 0) 364 return; 365 366 request->ip_ahs_mbuf = *r; 367 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); 368 *rs -= request->ip_ahs_len; 369 } 370 371 static int 372 mbuf_crc32c_helper(void *arg, void *data, u_int len) 373 { 374 uint32_t *digestp = arg; 375 376 *digestp = calculate_crc32c(*digestp, data, len); 377 return (0); 378 } 379 380 static uint32_t 381 icl_mbuf_to_crc32c(struct mbuf *m0, size_t len) 382 { 383 uint32_t digest = 0xffffffff; 384 385 m_apply(m0, 0, len, mbuf_crc32c_helper, &digest); 386 digest = digest ^ 0xffffffff; 387 388 return (digest); 389 } 390 391 static int 392 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 393 { 394 uint32_t received_digest, valid_digest; 395 396 if (request->ip_conn->ic_header_crc32c == false) 397 return (0); 398 399 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 400 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); 401 402 /* Temporary attach AHS to BHS to calculate header digest. */ 403 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 404 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); 405 request->ip_bhs_mbuf->m_next = NULL; 406 if (received_digest != valid_digest) { 407 ICL_WARN("header digest check failed; got 0x%x, " 408 "should be 0x%x", received_digest, valid_digest); 409 return (-1); 410 } 411 412 return (0); 413 } 414 415 /* 416 * Return the number of bytes that should be waiting in the receive socket 417 * before icl_pdu_receive_data_segment() gets called. 418 */ 419 static size_t 420 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 421 { 422 size_t len; 423 424 len = icl_pdu_data_segment_length(request); 425 if (len == 0) 426 return (0); 427 428 /* 429 * Account for the parts of data segment already read from 430 * the socket buffer. 431 */ 432 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 433 len -= request->ip_data_len; 434 435 /* 436 * Don't always wait for the full data segment to be delivered 437 * to the socket; this might badly affect performance due to 438 * TCP window scaling. 439 */ 440 if (len > partial_receive_len) { 441 #if 0 442 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 443 len, partial_receive_len)); 444 #endif 445 len = partial_receive_len; 446 447 return (len); 448 } 449 450 /* 451 * Account for padding. Note that due to the way code is written, 452 * the icl_pdu_receive_data_segment() must always receive padding 453 * along with the last part of data segment, because it would be 454 * impossible to tell whether we've already received the full data 455 * segment including padding, or without it. 456 */ 457 if ((len % 4) != 0) 458 len += 4 - (len % 4); 459 460 #if 0 461 ICL_DEBUG("need %zd bytes of data", len)); 462 #endif 463 464 return (len); 465 } 466 467 static int 468 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, 469 size_t *rs, bool *more_neededp) 470 { 471 struct icl_soft_conn *isc; 472 size_t len, padding = 0; 473 struct mbuf *m; 474 475 isc = (struct icl_soft_conn *)request->ip_conn; 476 477 *more_neededp = false; 478 isc->receive_len = 0; 479 480 len = icl_pdu_data_segment_length(request); 481 if (len == 0) 482 return (0); 483 484 if ((len % 4) != 0) 485 padding = 4 - (len % 4); 486 487 /* 488 * Account for already received parts of data segment. 489 */ 490 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 491 len -= request->ip_data_len; 492 493 if (len + padding > *rs) { 494 /* 495 * Not enough data in the socket buffer. Receive as much 496 * as we can. Don't receive padding, since, obviously, it's 497 * not the end of data segment yet. 498 */ 499 #if 0 500 ICL_DEBUG("limited from %zd to %zd", 501 len + padding, *rs - padding)); 502 #endif 503 len = *rs - padding; 504 *more_neededp = true; 505 padding = 0; 506 } 507 508 /* 509 * Must not try to receive padding without at least one byte 510 * of actual data segment. 511 */ 512 if (len > 0) { 513 m = *r; 514 *r = m_split(m, len + padding, M_WAITOK); 515 *rs -= len + padding; 516 517 if (request->ip_data_mbuf == NULL) 518 request->ip_data_mbuf = m; 519 else 520 m_cat(request->ip_data_mbuf, m); 521 522 request->ip_data_len += len; 523 } else 524 ICL_DEBUG("len 0"); 525 526 if (*more_neededp) 527 isc->receive_len = icl_pdu_data_segment_receive_len(request); 528 529 return (0); 530 } 531 532 static int 533 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 534 { 535 uint32_t received_digest, valid_digest; 536 537 if (request->ip_conn->ic_data_crc32c == false) 538 return (0); 539 540 if (request->ip_data_len == 0) 541 return (0); 542 543 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 544 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); 545 546 /* 547 * Note that ip_data_mbuf also contains padding; since digest 548 * calculation is supposed to include that, we iterate over 549 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 550 */ 551 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 552 roundup2(request->ip_data_len, 4)); 553 if (received_digest != valid_digest) { 554 ICL_WARN("data digest check failed; got 0x%x, " 555 "should be 0x%x", received_digest, valid_digest); 556 return (-1); 557 } 558 559 return (0); 560 } 561 562 /* 563 * Somewhat contrary to the name, this attempts to receive only one 564 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 565 */ 566 static struct icl_pdu * 567 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 568 { 569 struct icl_conn *ic = &isc->ic; 570 struct icl_pdu *request; 571 size_t len; 572 int error = 0; 573 bool more_needed; 574 575 if (isc->receive_state == ICL_CONN_STATE_BHS) { 576 KASSERT(isc->receive_pdu == NULL, 577 ("isc->receive_pdu != NULL")); 578 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 579 if (request == NULL) { 580 ICL_DEBUG("failed to allocate PDU; " 581 "dropping connection"); 582 icl_conn_fail(ic); 583 return (NULL); 584 } 585 isc->receive_pdu = request; 586 } else { 587 KASSERT(isc->receive_pdu != NULL, 588 ("isc->receive_pdu == NULL")); 589 request = isc->receive_pdu; 590 } 591 592 switch (isc->receive_state) { 593 case ICL_CONN_STATE_BHS: 594 //ICL_DEBUG("receiving BHS"); 595 icl_soft_receive_buf(r, rs, request->ip_bhs, 596 sizeof(struct iscsi_bhs)); 597 598 /* 599 * We don't enforce any limit for AHS length; 600 * its length is stored in 8 bit field. 601 */ 602 603 len = icl_pdu_data_segment_length(request); 604 if (len > ic->ic_max_recv_data_segment_length) { 605 ICL_WARN("received data segment " 606 "length %zd is larger than negotiated; " 607 "dropping connection", len); 608 error = EINVAL; 609 break; 610 } 611 612 isc->receive_state = ICL_CONN_STATE_AHS; 613 isc->receive_len = icl_pdu_ahs_length(request); 614 break; 615 616 case ICL_CONN_STATE_AHS: 617 //ICL_DEBUG("receiving AHS"); 618 icl_pdu_receive_ahs(request, r, rs); 619 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; 620 if (ic->ic_header_crc32c == false) 621 isc->receive_len = 0; 622 else 623 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; 624 break; 625 626 case ICL_CONN_STATE_HEADER_DIGEST: 627 //ICL_DEBUG("receiving header digest"); 628 error = icl_pdu_check_header_digest(request, r, rs); 629 if (error != 0) { 630 ICL_DEBUG("header digest failed; " 631 "dropping connection"); 632 break; 633 } 634 635 isc->receive_state = ICL_CONN_STATE_DATA; 636 isc->receive_len = icl_pdu_data_segment_receive_len(request); 637 break; 638 639 case ICL_CONN_STATE_DATA: 640 //ICL_DEBUG("receiving data segment"); 641 error = icl_pdu_receive_data_segment(request, r, rs, 642 &more_needed); 643 if (error != 0) { 644 ICL_DEBUG("failed to receive data segment;" 645 "dropping connection"); 646 break; 647 } 648 649 if (more_needed) 650 break; 651 652 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; 653 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 654 isc->receive_len = 0; 655 else 656 isc->receive_len = ISCSI_DATA_DIGEST_SIZE; 657 break; 658 659 case ICL_CONN_STATE_DATA_DIGEST: 660 //ICL_DEBUG("receiving data digest"); 661 error = icl_pdu_check_data_digest(request, r, rs); 662 if (error != 0) { 663 ICL_DEBUG("data digest failed; " 664 "dropping connection"); 665 break; 666 } 667 668 /* 669 * We've received complete PDU; reset the receive state machine 670 * and return the PDU. 671 */ 672 isc->receive_state = ICL_CONN_STATE_BHS; 673 isc->receive_len = sizeof(struct iscsi_bhs); 674 isc->receive_pdu = NULL; 675 return (request); 676 677 default: 678 panic("invalid receive_state %d\n", isc->receive_state); 679 } 680 681 if (error != 0) { 682 /* 683 * Don't free the PDU; it's pointed to by isc->receive_pdu 684 * and will get freed in icl_soft_conn_close(). 685 */ 686 icl_conn_fail(ic); 687 } 688 689 return (NULL); 690 } 691 692 static void 693 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 694 { 695 struct icl_conn *ic = &isc->ic; 696 struct icl_pdu *response; 697 698 for (;;) { 699 if (ic->ic_disconnecting) 700 return; 701 702 /* 703 * Loop until we have a complete PDU or there is not enough 704 * data in the socket buffer. 705 */ 706 if (*rs < isc->receive_len) { 707 #if 0 708 ICL_DEBUG("not enough data; have %zd, need %zd", 709 *rs, isc->receive_len); 710 #endif 711 return; 712 } 713 714 response = icl_conn_receive_pdu(isc, r, rs); 715 if (response == NULL) 716 continue; 717 718 if (response->ip_ahs_len > 0) { 719 ICL_WARN("received PDU with unsupported " 720 "AHS; opcode 0x%x; dropping connection", 721 response->ip_bhs->bhs_opcode); 722 icl_soft_conn_pdu_free(ic, response); 723 icl_conn_fail(ic); 724 return; 725 } 726 727 (ic->ic_receive)(response); 728 } 729 } 730 731 static void 732 icl_receive_thread(void *arg) 733 { 734 struct icl_soft_conn *isc = arg; 735 struct icl_conn *ic = &isc->ic; 736 size_t available, read = 0; 737 struct socket *so; 738 struct mbuf *m, *r = NULL; 739 struct uio uio; 740 int error, flags; 741 742 so = ic->ic_socket; 743 744 for (;;) { 745 SOCKBUF_LOCK(&so->so_rcv); 746 if (ic->ic_disconnecting) { 747 SOCKBUF_UNLOCK(&so->so_rcv); 748 break; 749 } 750 751 /* 752 * Set the low watermark, to be checked by 753 * soreadable() in icl_soupcall_receive() 754 * to avoid unnecessary wakeups until there 755 * is enough data received to read the PDU. 756 */ 757 available = sbavail(&so->so_rcv); 758 if (read + available < isc->receive_len) { 759 so->so_rcv.sb_lowat = isc->receive_len - read; 760 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); 761 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 762 available = sbavail(&so->so_rcv); 763 } 764 SOCKBUF_UNLOCK(&so->so_rcv); 765 766 if (available == 0) { 767 if (so->so_error != 0) { 768 ICL_DEBUG("connection error %d; " 769 "dropping connection", so->so_error); 770 icl_conn_fail(ic); 771 break; 772 } 773 continue; 774 } 775 776 memset(&uio, 0, sizeof(uio)); 777 uio.uio_resid = available; 778 flags = MSG_DONTWAIT; 779 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 780 if (error != 0) { 781 ICL_DEBUG("soreceive error %d", error); 782 break; 783 } 784 if (uio.uio_resid != 0) { 785 m_freem(m); 786 ICL_DEBUG("short read"); 787 break; 788 } 789 if (r) 790 m_cat(r, m); 791 else 792 r = m; 793 read += available; 794 795 icl_conn_receive_pdus(isc, &r, &read); 796 } 797 798 if (r) 799 m_freem(r); 800 801 ICL_CONN_LOCK(ic); 802 isc->receive_running = false; 803 cv_signal(&isc->send_cv); 804 ICL_CONN_UNLOCK(ic); 805 kthread_exit(); 806 } 807 808 static int 809 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 810 { 811 struct icl_soft_conn *isc; 812 813 if (!soreadable(so)) 814 return (SU_OK); 815 816 isc = arg; 817 cv_signal(&isc->receive_cv); 818 return (SU_OK); 819 } 820 821 static int 822 icl_pdu_finalize(struct icl_pdu *request) 823 { 824 size_t padding, pdu_len; 825 uint32_t digest, zero = 0; 826 int ok; 827 struct icl_conn *ic; 828 829 ic = request->ip_conn; 830 831 icl_pdu_set_data_segment_length(request, request->ip_data_len); 832 833 pdu_len = icl_pdu_size(request); 834 835 if (ic->ic_header_crc32c) { 836 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, 837 ISCSI_BHS_SIZE); 838 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 839 (void *)&digest); 840 if (ok != 1) { 841 ICL_WARN("failed to append header digest"); 842 return (1); 843 } 844 } 845 846 if (request->ip_data_len != 0) { 847 padding = icl_pdu_padding(request); 848 if (padding > 0) { 849 ok = m_append(request->ip_data_mbuf, padding, 850 (void *)&zero); 851 if (ok != 1) { 852 ICL_WARN("failed to append padding"); 853 return (1); 854 } 855 } 856 857 if (ic->ic_data_crc32c) { 858 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 859 roundup2(request->ip_data_len, 4)); 860 861 ok = m_append(request->ip_data_mbuf, sizeof(digest), 862 (void *)&digest); 863 if (ok != 1) { 864 ICL_WARN("failed to append data digest"); 865 return (1); 866 } 867 } 868 869 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 870 request->ip_data_mbuf = NULL; 871 } 872 873 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 874 875 return (0); 876 } 877 878 static void 879 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) 880 { 881 struct icl_conn *ic = &isc->ic; 882 struct icl_pdu *request, *request2; 883 struct mbuf *m; 884 struct socket *so; 885 long available, size, size2; 886 int coalesced, error; 887 888 ICL_CONN_LOCK_ASSERT_NOT(ic); 889 890 so = ic->ic_socket; 891 892 SOCKBUF_LOCK(&so->so_snd); 893 /* 894 * Check how much space do we have for transmit. We can't just 895 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 896 * as it always frees the mbuf chain passed to it, even in case 897 * of error. 898 */ 899 available = sbspace(&so->so_snd); 900 isc->check_send_space = false; 901 902 /* 903 * Notify the socket upcall that we don't need wakeups 904 * for the time being. 905 */ 906 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 907 SOCKBUF_UNLOCK(&so->so_snd); 908 909 while (!STAILQ_EMPTY(queue)) { 910 request = STAILQ_FIRST(queue); 911 size = icl_pdu_size(request); 912 if (available < size) { 913 /* 914 * Set the low watermark, to be checked by 915 * sowriteable() in icl_soupcall_send() 916 * to avoid unnecessary wakeups until there 917 * is enough space for the PDU to fit. 918 */ 919 SOCKBUF_LOCK(&so->so_snd); 920 available = sbspace(&so->so_snd); 921 if (available < size) { 922 #if 1 923 ICL_DEBUG("no space to send; " 924 "have %ld, need %ld", 925 available, size); 926 #endif 927 so->so_snd.sb_lowat = max(size, 928 so->so_snd.sb_hiwat / 8); 929 SOCKBUF_UNLOCK(&so->so_snd); 930 return; 931 } 932 SOCKBUF_UNLOCK(&so->so_snd); 933 } 934 STAILQ_REMOVE_HEAD(queue, ip_next); 935 error = icl_pdu_finalize(request); 936 if (error != 0) { 937 ICL_DEBUG("failed to finalize PDU; " 938 "dropping connection"); 939 icl_soft_pdu_done(request, EIO); 940 icl_conn_fail(ic); 941 return; 942 } 943 if (coalesce) { 944 m = request->ip_bhs_mbuf; 945 for (coalesced = 1; ; coalesced++) { 946 request2 = STAILQ_FIRST(queue); 947 if (request2 == NULL) 948 break; 949 size2 = icl_pdu_size(request2); 950 if (available < size + size2) 951 break; 952 STAILQ_REMOVE_HEAD(queue, ip_next); 953 error = icl_pdu_finalize(request2); 954 if (error != 0) { 955 ICL_DEBUG("failed to finalize PDU; " 956 "dropping connection"); 957 icl_soft_pdu_done(request, EIO); 958 icl_soft_pdu_done(request2, EIO); 959 icl_conn_fail(ic); 960 return; 961 } 962 while (m->m_next) 963 m = m->m_next; 964 m_cat(m, request2->ip_bhs_mbuf); 965 request2->ip_bhs_mbuf = NULL; 966 request->ip_bhs_mbuf->m_pkthdr.len += size2; 967 size += size2; 968 icl_soft_pdu_done(request2, 0); 969 } 970 #if 0 971 if (coalesced > 1) { 972 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 973 coalesced, size); 974 } 975 #endif 976 } 977 available -= size; 978 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 979 NULL, MSG_DONTWAIT, curthread); 980 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 981 if (error != 0) { 982 ICL_DEBUG("failed to send PDU, error %d; " 983 "dropping connection", error); 984 icl_soft_pdu_done(request, error); 985 icl_conn_fail(ic); 986 return; 987 } 988 icl_soft_pdu_done(request, 0); 989 } 990 } 991 992 static void 993 icl_send_thread(void *arg) 994 { 995 struct icl_soft_conn *isc; 996 struct icl_conn *ic; 997 struct icl_pdu_stailq queue; 998 999 isc = arg; 1000 ic = &isc->ic; 1001 1002 STAILQ_INIT(&queue); 1003 1004 ICL_CONN_LOCK(ic); 1005 for (;;) { 1006 for (;;) { 1007 /* 1008 * Populate the local queue from the main one. 1009 * This way the icl_conn_send_pdus() can go through 1010 * all the queued PDUs without holding any locks. 1011 */ 1012 if (STAILQ_EMPTY(&queue) || isc->check_send_space) 1013 STAILQ_CONCAT(&queue, &isc->to_send); 1014 1015 ICL_CONN_UNLOCK(ic); 1016 icl_conn_send_pdus(isc, &queue); 1017 ICL_CONN_LOCK(ic); 1018 1019 /* 1020 * The icl_soupcall_send() was called since the last 1021 * call to sbspace(); go around; 1022 */ 1023 if (isc->check_send_space) 1024 continue; 1025 1026 /* 1027 * Local queue is empty, but we still have PDUs 1028 * in the main one; go around. 1029 */ 1030 if (STAILQ_EMPTY(&queue) && 1031 !STAILQ_EMPTY(&isc->to_send)) 1032 continue; 1033 1034 /* 1035 * There might be some stuff in the local queue, 1036 * which didn't get sent due to not having enough send 1037 * space. Wait for socket upcall. 1038 */ 1039 break; 1040 } 1041 1042 if (ic->ic_disconnecting) { 1043 //ICL_DEBUG("terminating"); 1044 break; 1045 } 1046 1047 cv_wait(&isc->send_cv, ic->ic_lock); 1048 } 1049 1050 /* 1051 * We're exiting; move PDUs back to the main queue, so they can 1052 * get freed properly. At this point ordering doesn't matter. 1053 */ 1054 STAILQ_CONCAT(&isc->to_send, &queue); 1055 1056 isc->send_running = false; 1057 cv_signal(&isc->send_cv); 1058 ICL_CONN_UNLOCK(ic); 1059 kthread_exit(); 1060 } 1061 1062 static int 1063 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1064 { 1065 struct icl_soft_conn *isc; 1066 struct icl_conn *ic; 1067 1068 if (!sowriteable(so)) 1069 return (SU_OK); 1070 1071 isc = arg; 1072 ic = &isc->ic; 1073 1074 ICL_CONN_LOCK(ic); 1075 isc->check_send_space = true; 1076 ICL_CONN_UNLOCK(ic); 1077 1078 cv_signal(&isc->send_cv); 1079 1080 return (SU_OK); 1081 } 1082 1083 static void 1084 icl_soft_free_mext_pg(struct mbuf *m) 1085 { 1086 struct icl_soft_pdu *isp; 1087 1088 M_ASSERTEXTPG(m); 1089 1090 /* 1091 * Nothing to do for the pages; they are owned by the PDU / 1092 * I/O request. 1093 */ 1094 1095 /* Drop reference on the PDU. */ 1096 isp = m->m_ext.ext_arg1; 1097 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 1098 icl_soft_pdu_call_cb(&isp->ip); 1099 } 1100 1101 static int 1102 icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 1103 struct bio *bp, size_t offset, size_t len, int flags) 1104 { 1105 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1106 struct mbuf *m, *m_tail; 1107 vm_offset_t vaddr; 1108 size_t mtodo, page_offset, todo; 1109 int i; 1110 1111 KASSERT(len > 0, ("len == 0")); 1112 1113 m_tail = request->ip_data_mbuf; 1114 if (m_tail != NULL) 1115 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 1116 ; 1117 1118 MPASS(bp->bio_flags & BIO_UNMAPPED); 1119 if (offset < PAGE_SIZE - bp->bio_ma_offset) { 1120 page_offset = bp->bio_ma_offset + offset; 1121 i = 0; 1122 } else { 1123 offset -= PAGE_SIZE - bp->bio_ma_offset; 1124 for (i = 1; offset >= PAGE_SIZE; i++) 1125 offset -= PAGE_SIZE; 1126 page_offset = offset; 1127 } 1128 1129 if (flags & ICL_NOCOPY) { 1130 m = NULL; 1131 while (len > 0) { 1132 if (m == NULL) { 1133 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, 1134 icl_soft_free_mext_pg); 1135 if (__predict_false(m == NULL)) 1136 return (ENOMEM); 1137 atomic_add_int(&isp->ref_cnt, 1); 1138 m->m_ext.ext_arg1 = isp; 1139 m->m_epg_1st_off = page_offset; 1140 } 1141 1142 todo = MIN(len, PAGE_SIZE - page_offset); 1143 1144 m->m_epg_pa[m->m_epg_npgs] = 1145 VM_PAGE_TO_PHYS(bp->bio_ma[i]); 1146 m->m_epg_npgs++; 1147 m->m_epg_last_len = todo; 1148 m->m_len += todo; 1149 m->m_ext.ext_size += PAGE_SIZE; 1150 MBUF_EXT_PGS_ASSERT_SANITY(m); 1151 1152 if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { 1153 if (m_tail != NULL) 1154 m_tail->m_next = m; 1155 else 1156 request->ip_data_mbuf = m; 1157 m_tail = m; 1158 request->ip_data_len += m->m_len; 1159 m = NULL; 1160 } 1161 1162 page_offset = 0; 1163 len -= todo; 1164 i++; 1165 } 1166 1167 if (m != NULL) { 1168 if (m_tail != NULL) 1169 m_tail->m_next = m; 1170 else 1171 request->ip_data_mbuf = m; 1172 request->ip_data_len += m->m_len; 1173 } 1174 return (0); 1175 } 1176 1177 m = m_getm2(NULL, len, flags, MT_DATA, 0); 1178 if (__predict_false(m == NULL)) 1179 return (ENOMEM); 1180 1181 if (request->ip_data_mbuf == NULL) { 1182 request->ip_data_mbuf = m; 1183 request->ip_data_len = len; 1184 } else { 1185 m_tail->m_next = m; 1186 request->ip_data_len += len; 1187 } 1188 1189 while (len > 0) { 1190 todo = MIN(len, PAGE_SIZE - page_offset); 1191 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1192 1193 do { 1194 mtodo = min(todo, M_SIZE(m) - m->m_len); 1195 memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + 1196 page_offset, mtodo); 1197 m->m_len += mtodo; 1198 if (m->m_len == M_SIZE(m)) 1199 m = m->m_next; 1200 page_offset += mtodo; 1201 todo -= mtodo; 1202 } while (todo > 0); 1203 1204 page_offset = 0; 1205 len -= todo; 1206 i++; 1207 } 1208 1209 return (0); 1210 } 1211 1212 static int 1213 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1214 const void *addr, size_t len, int flags) 1215 { 1216 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1217 struct mbuf *mb, *newmb; 1218 size_t copylen, off = 0; 1219 1220 KASSERT(len > 0, ("len == 0")); 1221 1222 if (flags & ICL_NOCOPY) { 1223 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1224 if (newmb == NULL) { 1225 ICL_WARN("failed to allocate mbuf"); 1226 return (ENOMEM); 1227 } 1228 1229 newmb->m_flags |= M_RDONLY; 1230 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1231 icl_soft_mbuf_done, isp, NULL); 1232 newmb->m_len = len; 1233 } else { 1234 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1235 if (newmb == NULL) { 1236 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1237 return (ENOMEM); 1238 } 1239 1240 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1241 copylen = min(M_TRAILINGSPACE(mb), len - off); 1242 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1243 mb->m_len = copylen; 1244 off += copylen; 1245 } 1246 KASSERT(off == len, ("%s: off != len", __func__)); 1247 } 1248 1249 if (request->ip_data_mbuf == NULL) { 1250 request->ip_data_mbuf = newmb; 1251 request->ip_data_len = len; 1252 } else { 1253 m_cat(request->ip_data_mbuf, newmb); 1254 request->ip_data_len += len; 1255 } 1256 1257 return (0); 1258 } 1259 1260 void 1261 icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 1262 size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) 1263 { 1264 vm_offset_t vaddr; 1265 size_t page_offset, todo; 1266 int i; 1267 1268 MPASS(bp->bio_flags & BIO_UNMAPPED); 1269 if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { 1270 page_offset = bp->bio_ma_offset + bio_off; 1271 i = 0; 1272 } else { 1273 bio_off -= PAGE_SIZE - bp->bio_ma_offset; 1274 for (i = 1; bio_off >= PAGE_SIZE; i++) 1275 bio_off -= PAGE_SIZE; 1276 page_offset = bio_off; 1277 } 1278 1279 while (len > 0) { 1280 todo = MIN(len, PAGE_SIZE - page_offset); 1281 1282 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1283 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + 1284 page_offset); 1285 1286 page_offset = 0; 1287 pdu_off += todo; 1288 len -= todo; 1289 i++; 1290 } 1291 } 1292 1293 void 1294 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1295 size_t off, void *addr, size_t len) 1296 { 1297 1298 m_copydata(ip->ip_data_mbuf, off, len, addr); 1299 } 1300 1301 static void 1302 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1303 { 1304 1305 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1306 } 1307 1308 static void 1309 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1310 icl_pdu_cb cb) 1311 { 1312 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1313 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1314 1315 ICL_CONN_LOCK_ASSERT(ic); 1316 isp->ref_cnt++; 1317 isp->cb = cb; 1318 1319 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1320 ICL_DEBUG("icl_pdu_queue on closed connection"); 1321 icl_soft_pdu_done(ip, ENOTCONN); 1322 return; 1323 } 1324 1325 if (!STAILQ_EMPTY(&isc->to_send)) { 1326 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1327 /* 1328 * If the queue is not empty, someone else had already 1329 * signaled the send thread; no need to do that again, 1330 * just return. 1331 */ 1332 return; 1333 } 1334 1335 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1336 cv_signal(&isc->send_cv); 1337 } 1338 1339 static struct icl_conn * 1340 icl_soft_new_conn(const char *name, struct mtx *lock) 1341 { 1342 struct icl_soft_conn *isc; 1343 struct icl_conn *ic; 1344 1345 refcount_acquire(&icl_ncons); 1346 1347 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, 1348 M_WAITOK | M_ZERO); 1349 1350 STAILQ_INIT(&isc->to_send); 1351 cv_init(&isc->send_cv, "icl_tx"); 1352 cv_init(&isc->receive_cv, "icl_rx"); 1353 1354 ic = &isc->ic; 1355 ic->ic_lock = lock; 1356 #ifdef DIAGNOSTIC 1357 refcount_init(&ic->ic_outstanding_pdus, 0); 1358 #endif 1359 ic->ic_name = name; 1360 ic->ic_offload = "None"; 1361 ic->ic_unmapped = PMAP_HAS_DMAP; 1362 1363 return (ic); 1364 } 1365 1366 void 1367 icl_soft_conn_free(struct icl_conn *ic) 1368 { 1369 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1370 1371 #ifdef DIAGNOSTIC 1372 KASSERT(ic->ic_outstanding_pdus == 0, 1373 ("destroying session with %d outstanding PDUs", 1374 ic->ic_outstanding_pdus)); 1375 #endif 1376 cv_destroy(&isc->send_cv); 1377 cv_destroy(&isc->receive_cv); 1378 kobj_delete((struct kobj *)isc, M_ICL_SOFT); 1379 refcount_release(&icl_ncons); 1380 } 1381 1382 static int 1383 icl_conn_start(struct icl_conn *ic) 1384 { 1385 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1386 size_t minspace; 1387 struct sockopt opt; 1388 int error, one = 1; 1389 1390 ICL_CONN_LOCK(ic); 1391 1392 /* 1393 * XXX: Ugly hack. 1394 */ 1395 if (ic->ic_socket == NULL) { 1396 ICL_CONN_UNLOCK(ic); 1397 return (EINVAL); 1398 } 1399 1400 isc->receive_state = ICL_CONN_STATE_BHS; 1401 isc->receive_len = sizeof(struct iscsi_bhs); 1402 ic->ic_disconnecting = false; 1403 1404 ICL_CONN_UNLOCK(ic); 1405 1406 /* 1407 * For sendspace, this is required because the current code cannot 1408 * send a PDU in pieces; thus, the minimum buffer size is equal 1409 * to the maximum PDU size. "+4" is to account for possible padding. 1410 */ 1411 minspace = sizeof(struct iscsi_bhs) + 1412 ic->ic_max_send_data_segment_length + 1413 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1414 if (sendspace < minspace) { 1415 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1416 minspace); 1417 sendspace = minspace; 1418 } 1419 minspace = sizeof(struct iscsi_bhs) + 1420 ic->ic_max_recv_data_segment_length + 1421 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1422 if (recvspace < minspace) { 1423 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1424 minspace); 1425 recvspace = minspace; 1426 } 1427 1428 error = soreserve(ic->ic_socket, sendspace, recvspace); 1429 if (error != 0) { 1430 ICL_WARN("soreserve failed with error %d", error); 1431 icl_soft_conn_close(ic); 1432 return (error); 1433 } 1434 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1435 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1436 1437 /* 1438 * Disable Nagle. 1439 */ 1440 bzero(&opt, sizeof(opt)); 1441 opt.sopt_dir = SOPT_SET; 1442 opt.sopt_level = IPPROTO_TCP; 1443 opt.sopt_name = TCP_NODELAY; 1444 opt.sopt_val = &one; 1445 opt.sopt_valsize = sizeof(one); 1446 error = sosetopt(ic->ic_socket, &opt); 1447 if (error != 0) { 1448 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1449 icl_soft_conn_close(ic); 1450 return (error); 1451 } 1452 1453 /* 1454 * Register socket upcall, to get notified about incoming PDUs 1455 * and free space to send outgoing ones. 1456 */ 1457 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1458 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); 1459 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1460 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1461 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); 1462 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1463 1464 /* 1465 * Start threads. 1466 */ 1467 ICL_CONN_LOCK(ic); 1468 isc->send_running = isc->receive_running = true; 1469 ICL_CONN_UNLOCK(ic); 1470 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1471 ic->ic_name); 1472 if (error != 0) { 1473 ICL_WARN("kthread_add(9) failed with error %d", error); 1474 ICL_CONN_LOCK(ic); 1475 isc->send_running = isc->receive_running = false; 1476 cv_signal(&isc->send_cv); 1477 ICL_CONN_UNLOCK(ic); 1478 icl_soft_conn_close(ic); 1479 return (error); 1480 } 1481 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1482 ic->ic_name); 1483 if (error != 0) { 1484 ICL_WARN("kthread_add(9) failed with error %d", error); 1485 ICL_CONN_LOCK(ic); 1486 isc->receive_running = false; 1487 cv_signal(&isc->send_cv); 1488 ICL_CONN_UNLOCK(ic); 1489 icl_soft_conn_close(ic); 1490 return (error); 1491 } 1492 1493 return (0); 1494 } 1495 1496 int 1497 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1498 { 1499 struct file *fp; 1500 struct socket *so; 1501 cap_rights_t rights; 1502 int error; 1503 1504 ICL_CONN_LOCK_ASSERT_NOT(ic); 1505 1506 #ifdef ICL_KERNEL_PROXY 1507 /* 1508 * We're transitioning to Full Feature phase, and we don't 1509 * really care. 1510 */ 1511 if (fd == 0) { 1512 ICL_CONN_LOCK(ic); 1513 if (ic->ic_socket == NULL) { 1514 ICL_CONN_UNLOCK(ic); 1515 ICL_WARN("proxy handoff without connect"); 1516 return (EINVAL); 1517 } 1518 ICL_CONN_UNLOCK(ic); 1519 return (0); 1520 } 1521 #endif 1522 1523 /* 1524 * Steal the socket from userland. 1525 */ 1526 error = fget(curthread, fd, 1527 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1528 if (error != 0) 1529 return (error); 1530 if (fp->f_type != DTYPE_SOCKET) { 1531 fdrop(fp, curthread); 1532 return (EINVAL); 1533 } 1534 so = fp->f_data; 1535 if (so->so_type != SOCK_STREAM) { 1536 fdrop(fp, curthread); 1537 return (EINVAL); 1538 } 1539 1540 ICL_CONN_LOCK(ic); 1541 1542 if (ic->ic_socket != NULL) { 1543 ICL_CONN_UNLOCK(ic); 1544 fdrop(fp, curthread); 1545 return (EBUSY); 1546 } 1547 1548 ic->ic_socket = fp->f_data; 1549 fp->f_ops = &badfileops; 1550 fp->f_data = NULL; 1551 fdrop(fp, curthread); 1552 ICL_CONN_UNLOCK(ic); 1553 1554 error = icl_conn_start(ic); 1555 1556 return (error); 1557 } 1558 1559 void 1560 icl_soft_conn_close(struct icl_conn *ic) 1561 { 1562 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1563 struct icl_pdu *pdu; 1564 struct socket *so; 1565 1566 /* 1567 * Wake up the threads, so they can properly terminate. 1568 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. 1569 */ 1570 ICL_CONN_LOCK(ic); 1571 if (!ic->ic_disconnecting) { 1572 so = ic->ic_socket; 1573 if (so) 1574 SOCKBUF_LOCK(&so->so_rcv); 1575 ic->ic_disconnecting = true; 1576 if (so) 1577 SOCKBUF_UNLOCK(&so->so_rcv); 1578 } 1579 while (isc->receive_running || isc->send_running) { 1580 cv_signal(&isc->receive_cv); 1581 cv_signal(&isc->send_cv); 1582 cv_wait(&isc->send_cv, ic->ic_lock); 1583 } 1584 1585 /* Some other thread could close the connection same time. */ 1586 so = ic->ic_socket; 1587 if (so == NULL) { 1588 ICL_CONN_UNLOCK(ic); 1589 return; 1590 } 1591 ic->ic_socket = NULL; 1592 1593 /* 1594 * Deregister socket upcalls. 1595 */ 1596 ICL_CONN_UNLOCK(ic); 1597 SOCKBUF_LOCK(&so->so_snd); 1598 if (so->so_snd.sb_upcall != NULL) 1599 soupcall_clear(so, SO_SND); 1600 SOCKBUF_UNLOCK(&so->so_snd); 1601 SOCKBUF_LOCK(&so->so_rcv); 1602 if (so->so_rcv.sb_upcall != NULL) 1603 soupcall_clear(so, SO_RCV); 1604 SOCKBUF_UNLOCK(&so->so_rcv); 1605 soclose(so); 1606 ICL_CONN_LOCK(ic); 1607 1608 if (isc->receive_pdu != NULL) { 1609 //ICL_DEBUG("freeing partially received PDU"); 1610 icl_soft_conn_pdu_free(ic, isc->receive_pdu); 1611 isc->receive_pdu = NULL; 1612 } 1613 1614 /* 1615 * Remove any outstanding PDUs from the send queue. 1616 */ 1617 while (!STAILQ_EMPTY(&isc->to_send)) { 1618 pdu = STAILQ_FIRST(&isc->to_send); 1619 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); 1620 icl_soft_pdu_done(pdu, ENOTCONN); 1621 } 1622 1623 KASSERT(STAILQ_EMPTY(&isc->to_send), 1624 ("destroying session with non-empty send queue")); 1625 ICL_CONN_UNLOCK(ic); 1626 } 1627 1628 int 1629 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1630 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1631 { 1632 1633 return (0); 1634 } 1635 1636 void 1637 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1638 { 1639 } 1640 1641 int 1642 icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, 1643 union ctl_io *io, uint32_t *transfer_tag, void **prvp) 1644 { 1645 1646 return (0); 1647 } 1648 1649 void 1650 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1651 { 1652 } 1653 1654 static int 1655 icl_soft_limits(struct icl_drv_limits *idl) 1656 { 1657 1658 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1659 idl->idl_max_send_data_segment_length = max_data_segment_length; 1660 idl->idl_max_burst_length = max_burst_length; 1661 idl->idl_first_burst_length = first_burst_length; 1662 1663 return (0); 1664 } 1665 1666 #ifdef ICL_KERNEL_PROXY 1667 int 1668 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1669 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1670 { 1671 1672 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1673 from_sa, to_sa)); 1674 } 1675 1676 int 1677 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1678 { 1679 int error; 1680 1681 ICL_CONN_LOCK_ASSERT_NOT(ic); 1682 1683 if (so->so_type != SOCK_STREAM) 1684 return (EINVAL); 1685 1686 ICL_CONN_LOCK(ic); 1687 if (ic->ic_socket != NULL) { 1688 ICL_CONN_UNLOCK(ic); 1689 return (EBUSY); 1690 } 1691 ic->ic_socket = so; 1692 ICL_CONN_UNLOCK(ic); 1693 1694 error = icl_conn_start(ic); 1695 1696 return (error); 1697 } 1698 #endif /* ICL_KERNEL_PROXY */ 1699 1700 static int 1701 icl_soft_load(void) 1702 { 1703 int error; 1704 1705 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1706 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1707 UMA_ALIGN_PTR, 0); 1708 refcount_init(&icl_ncons, 0); 1709 1710 /* 1711 * The reason we call this "none" is that to the user, 1712 * it's known as "offload driver"; "offload driver: soft" 1713 * doesn't make much sense. 1714 */ 1715 error = icl_register("none", false, 0, 1716 icl_soft_limits, icl_soft_new_conn); 1717 KASSERT(error == 0, ("failed to register")); 1718 1719 #if defined(ICL_KERNEL_PROXY) && 0 1720 /* 1721 * Debugging aid for kernel proxy functionality. 1722 */ 1723 error = icl_register("proxytest", true, 0, 1724 icl_soft_limits, icl_soft_new_conn); 1725 KASSERT(error == 0, ("failed to register")); 1726 #endif 1727 1728 return (error); 1729 } 1730 1731 static int 1732 icl_soft_unload(void) 1733 { 1734 1735 if (icl_ncons != 0) 1736 return (EBUSY); 1737 1738 icl_unregister("none", false); 1739 #if defined(ICL_KERNEL_PROXY) && 0 1740 icl_unregister("proxytest", true); 1741 #endif 1742 1743 uma_zdestroy(icl_soft_pdu_zone); 1744 1745 return (0); 1746 } 1747 1748 static int 1749 icl_soft_modevent(module_t mod, int what, void *arg) 1750 { 1751 1752 switch (what) { 1753 case MOD_LOAD: 1754 return (icl_soft_load()); 1755 case MOD_UNLOAD: 1756 return (icl_soft_unload()); 1757 default: 1758 return (EINVAL); 1759 } 1760 } 1761 1762 moduledata_t icl_soft_data = { 1763 "icl_soft", 1764 icl_soft_modevent, 1765 0 1766 }; 1767 1768 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1769 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1770 MODULE_VERSION(icl_soft, 1); 1771