1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Software implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/bio.h> 38 #include <sys/capsicum.h> 39 #include <sys/condvar.h> 40 #include <sys/conf.h> 41 #include <sys/gsb_crc32.h> 42 #include <sys/file.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/mutex.h> 48 #include <sys/module.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/sysctl.h> 53 #include <sys/systm.h> 54 #include <sys/sx.h> 55 #include <sys/uio.h> 56 #include <vm/uma.h> 57 #include <vm/vm_page.h> 58 #include <netinet/in.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/iscsi/icl.h> 62 #include <dev/iscsi/iscsi_proto.h> 63 #include <icl_conn_if.h> 64 65 #define ICL_CONN_STATE_BHS 1 66 #define ICL_CONN_STATE_AHS 2 67 #define ICL_CONN_STATE_HEADER_DIGEST 3 68 #define ICL_CONN_STATE_DATA 4 69 #define ICL_CONN_STATE_DATA_DIGEST 5 70 71 struct icl_soft_conn { 72 struct icl_conn ic; 73 74 /* soft specific stuff goes here. */ 75 STAILQ_HEAD(, icl_pdu) to_send; 76 struct cv send_cv; 77 struct cv receive_cv; 78 struct icl_pdu *receive_pdu; 79 size_t receive_len; 80 int receive_state; 81 bool receive_running; 82 bool check_send_space; 83 bool send_running; 84 }; 85 86 struct icl_soft_pdu { 87 struct icl_pdu ip; 88 89 /* soft specific stuff goes here. */ 90 u_int ref_cnt; 91 icl_pdu_cb cb; 92 int error; 93 }; 94 95 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 96 "Software iSCSI"); 97 static int coalesce = 1; 98 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, 99 &coalesce, 0, "Try to coalesce PDUs before sending"); 100 static int partial_receive_len = 256 * 1024; 101 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 102 &partial_receive_len, 0, "Minimum read size for partially received " 103 "data segment"); 104 static int max_data_segment_length = 256 * 1024; 105 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, 106 &max_data_segment_length, 0, "Maximum data segment length"); 107 static int first_burst_length = 1024 * 1024; 108 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 109 &first_burst_length, 0, "First burst length"); 110 static int max_burst_length = 1024 * 1024; 111 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 112 &max_burst_length, 0, "Maximum burst length"); 113 static int sendspace = 1536 * 1024; 114 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, 115 &sendspace, 0, "Default send socket buffer size"); 116 static int recvspace = 1536 * 1024; 117 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, 118 &recvspace, 0, "Default receive socket buffer size"); 119 120 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 121 static uma_zone_t icl_soft_pdu_zone; 122 123 static volatile u_int icl_ncons; 124 125 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 126 127 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 128 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 129 static icl_conn_pdu_data_segment_length_t 130 icl_soft_conn_pdu_data_segment_length; 131 static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio; 132 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 133 static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio; 134 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 135 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 136 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; 137 static icl_conn_handoff_t icl_soft_conn_handoff; 138 static icl_conn_free_t icl_soft_conn_free; 139 static icl_conn_close_t icl_soft_conn_close; 140 static icl_conn_task_setup_t icl_soft_conn_task_setup; 141 static icl_conn_task_done_t icl_soft_conn_task_done; 142 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; 143 static icl_conn_transfer_done_t icl_soft_conn_transfer_done; 144 #ifdef ICL_KERNEL_PROXY 145 static icl_conn_connect_t icl_soft_conn_connect; 146 #endif 147 148 static kobj_method_t icl_soft_methods[] = { 149 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 150 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 151 KOBJMETHOD(icl_conn_pdu_data_segment_length, 152 icl_soft_conn_pdu_data_segment_length), 153 KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio), 154 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 155 KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio), 156 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 157 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 158 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), 159 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 160 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 161 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 162 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), 163 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), 164 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), 165 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), 166 #ifdef ICL_KERNEL_PROXY 167 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), 168 #endif 169 { 0, 0 } 170 }; 171 172 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); 173 174 static void 175 icl_conn_fail(struct icl_conn *ic) 176 { 177 if (ic->ic_socket == NULL) 178 return; 179 180 /* 181 * XXX 182 */ 183 ic->ic_socket->so_error = EDOOFUS; 184 (ic->ic_error)(ic); 185 } 186 187 static void 188 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 189 { 190 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 191 192 KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); 193 m_freem(ip->ip_bhs_mbuf); 194 m_freem(ip->ip_ahs_mbuf); 195 m_freem(ip->ip_data_mbuf); 196 uma_zfree(icl_soft_pdu_zone, isp); 197 #ifdef DIAGNOSTIC 198 refcount_release(&ic->ic_outstanding_pdus); 199 #endif 200 } 201 202 static void 203 icl_soft_pdu_call_cb(struct icl_pdu *ip) 204 { 205 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 206 207 if (isp->cb != NULL) 208 isp->cb(ip, isp->error); 209 #ifdef DIAGNOSTIC 210 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 211 #endif 212 uma_zfree(icl_soft_pdu_zone, isp); 213 } 214 215 static void 216 icl_soft_pdu_done(struct icl_pdu *ip, int error) 217 { 218 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 219 220 if (error != 0) 221 isp->error = error; 222 223 m_freem(ip->ip_bhs_mbuf); 224 ip->ip_bhs_mbuf = NULL; 225 m_freem(ip->ip_ahs_mbuf); 226 ip->ip_ahs_mbuf = NULL; 227 m_freem(ip->ip_data_mbuf); 228 ip->ip_data_mbuf = NULL; 229 230 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 231 icl_soft_pdu_call_cb(ip); 232 } 233 234 static void 235 icl_soft_mbuf_done(struct mbuf *mb) 236 { 237 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; 238 239 icl_soft_pdu_call_cb(&isp->ip); 240 } 241 242 /* 243 * Allocate icl_pdu with empty BHS to fill up by the caller. 244 */ 245 struct icl_pdu * 246 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 247 { 248 struct icl_soft_pdu *isp; 249 struct icl_pdu *ip; 250 251 #ifdef DIAGNOSTIC 252 refcount_acquire(&ic->ic_outstanding_pdus); 253 #endif 254 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); 255 if (isp == NULL) { 256 ICL_WARN("failed to allocate soft PDU"); 257 #ifdef DIAGNOSTIC 258 refcount_release(&ic->ic_outstanding_pdus); 259 #endif 260 return (NULL); 261 } 262 ip = &isp->ip; 263 ip->ip_conn = ic; 264 265 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); 266 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); 267 if (ip->ip_bhs_mbuf == NULL) { 268 ICL_WARN("failed to allocate BHS mbuf"); 269 icl_soft_conn_pdu_free(ic, ip); 270 return (NULL); 271 } 272 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 273 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 274 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 275 276 return (ip); 277 } 278 279 static int 280 icl_pdu_ahs_length(const struct icl_pdu *request) 281 { 282 283 return (request->ip_bhs->bhs_total_ahs_len * 4); 284 } 285 286 static size_t 287 icl_pdu_data_segment_length(const struct icl_pdu *request) 288 { 289 uint32_t len = 0; 290 291 len += request->ip_bhs->bhs_data_segment_len[0]; 292 len <<= 8; 293 len += request->ip_bhs->bhs_data_segment_len[1]; 294 len <<= 8; 295 len += request->ip_bhs->bhs_data_segment_len[2]; 296 297 return (len); 298 } 299 300 size_t 301 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 302 const struct icl_pdu *request) 303 { 304 305 return (icl_pdu_data_segment_length(request)); 306 } 307 308 static void 309 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 310 { 311 312 response->ip_bhs->bhs_data_segment_len[2] = len; 313 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 314 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 315 } 316 317 static size_t 318 icl_pdu_padding(const struct icl_pdu *ip) 319 { 320 321 if ((ip->ip_data_len % 4) != 0) 322 return (4 - (ip->ip_data_len % 4)); 323 324 return (0); 325 } 326 327 static size_t 328 icl_pdu_size(const struct icl_pdu *response) 329 { 330 size_t len; 331 332 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 333 334 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 335 icl_pdu_padding(response); 336 if (response->ip_conn->ic_header_crc32c) 337 len += ISCSI_HEADER_DIGEST_SIZE; 338 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 339 len += ISCSI_DATA_DIGEST_SIZE; 340 341 return (len); 342 } 343 344 static void 345 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) 346 { 347 348 m_copydata(*r, 0, s, buf); 349 m_adj(*r, s); 350 while ((*r) != NULL && (*r)->m_len == 0) 351 *r = m_free(*r); 352 *rs -= s; 353 } 354 355 static void 356 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) 357 { 358 359 request->ip_ahs_len = icl_pdu_ahs_length(request); 360 if (request->ip_ahs_len == 0) 361 return; 362 363 request->ip_ahs_mbuf = *r; 364 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); 365 *rs -= request->ip_ahs_len; 366 } 367 368 static int 369 mbuf_crc32c_helper(void *arg, void *data, u_int len) 370 { 371 uint32_t *digestp = arg; 372 373 *digestp = calculate_crc32c(*digestp, data, len); 374 return (0); 375 } 376 377 static uint32_t 378 icl_mbuf_to_crc32c(struct mbuf *m0, size_t len) 379 { 380 uint32_t digest = 0xffffffff; 381 382 m_apply(m0, 0, len, mbuf_crc32c_helper, &digest); 383 digest = digest ^ 0xffffffff; 384 385 return (digest); 386 } 387 388 static int 389 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 390 { 391 uint32_t received_digest, valid_digest; 392 393 if (request->ip_conn->ic_header_crc32c == false) 394 return (0); 395 396 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 397 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); 398 399 /* Temporary attach AHS to BHS to calculate header digest. */ 400 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; 401 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); 402 request->ip_bhs_mbuf->m_next = NULL; 403 if (received_digest != valid_digest) { 404 ICL_WARN("header digest check failed; got 0x%x, " 405 "should be 0x%x", received_digest, valid_digest); 406 return (-1); 407 } 408 409 return (0); 410 } 411 412 /* 413 * Return the number of bytes that should be waiting in the receive socket 414 * before icl_pdu_receive_data_segment() gets called. 415 */ 416 static size_t 417 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 418 { 419 size_t len; 420 421 len = icl_pdu_data_segment_length(request); 422 if (len == 0) 423 return (0); 424 425 /* 426 * Account for the parts of data segment already read from 427 * the socket buffer. 428 */ 429 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 430 len -= request->ip_data_len; 431 432 /* 433 * Don't always wait for the full data segment to be delivered 434 * to the socket; this might badly affect performance due to 435 * TCP window scaling. 436 */ 437 if (len > partial_receive_len) { 438 #if 0 439 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 440 len, partial_receive_len)); 441 #endif 442 len = partial_receive_len; 443 444 return (len); 445 } 446 447 /* 448 * Account for padding. Note that due to the way code is written, 449 * the icl_pdu_receive_data_segment() must always receive padding 450 * along with the last part of data segment, because it would be 451 * impossible to tell whether we've already received the full data 452 * segment including padding, or without it. 453 */ 454 if ((len % 4) != 0) 455 len += 4 - (len % 4); 456 457 #if 0 458 ICL_DEBUG("need %zd bytes of data", len)); 459 #endif 460 461 return (len); 462 } 463 464 static int 465 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, 466 size_t *rs, bool *more_neededp) 467 { 468 struct icl_soft_conn *isc; 469 size_t len, padding = 0; 470 struct mbuf *m; 471 472 isc = (struct icl_soft_conn *)request->ip_conn; 473 474 *more_neededp = false; 475 isc->receive_len = 0; 476 477 len = icl_pdu_data_segment_length(request); 478 if (len == 0) 479 return (0); 480 481 if ((len % 4) != 0) 482 padding = 4 - (len % 4); 483 484 /* 485 * Account for already received parts of data segment. 486 */ 487 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 488 len -= request->ip_data_len; 489 490 if (len + padding > *rs) { 491 /* 492 * Not enough data in the socket buffer. Receive as much 493 * as we can. Don't receive padding, since, obviously, it's 494 * not the end of data segment yet. 495 */ 496 #if 0 497 ICL_DEBUG("limited from %zd to %zd", 498 len + padding, *rs - padding)); 499 #endif 500 len = *rs - padding; 501 *more_neededp = true; 502 padding = 0; 503 } 504 505 /* 506 * Must not try to receive padding without at least one byte 507 * of actual data segment. 508 */ 509 if (len > 0) { 510 m = *r; 511 *r = m_split(m, len + padding, M_WAITOK); 512 *rs -= len + padding; 513 514 if (request->ip_data_mbuf == NULL) 515 request->ip_data_mbuf = m; 516 else 517 m_cat(request->ip_data_mbuf, m); 518 519 request->ip_data_len += len; 520 } else 521 ICL_DEBUG("len 0"); 522 523 if (*more_neededp) 524 isc->receive_len = icl_pdu_data_segment_receive_len(request); 525 526 return (0); 527 } 528 529 static int 530 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) 531 { 532 uint32_t received_digest, valid_digest; 533 534 if (request->ip_conn->ic_data_crc32c == false) 535 return (0); 536 537 if (request->ip_data_len == 0) 538 return (0); 539 540 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 541 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); 542 543 /* 544 * Note that ip_data_mbuf also contains padding; since digest 545 * calculation is supposed to include that, we iterate over 546 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 547 */ 548 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 549 roundup2(request->ip_data_len, 4)); 550 if (received_digest != valid_digest) { 551 ICL_WARN("data digest check failed; got 0x%x, " 552 "should be 0x%x", received_digest, valid_digest); 553 return (-1); 554 } 555 556 return (0); 557 } 558 559 /* 560 * Somewhat contrary to the name, this attempts to receive only one 561 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 562 */ 563 static struct icl_pdu * 564 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 565 { 566 struct icl_conn *ic = &isc->ic; 567 struct icl_pdu *request; 568 size_t len; 569 int error = 0; 570 bool more_needed; 571 572 if (isc->receive_state == ICL_CONN_STATE_BHS) { 573 KASSERT(isc->receive_pdu == NULL, 574 ("isc->receive_pdu != NULL")); 575 request = icl_soft_conn_new_pdu(ic, M_NOWAIT); 576 if (request == NULL) { 577 ICL_DEBUG("failed to allocate PDU; " 578 "dropping connection"); 579 icl_conn_fail(ic); 580 return (NULL); 581 } 582 isc->receive_pdu = request; 583 } else { 584 KASSERT(isc->receive_pdu != NULL, 585 ("isc->receive_pdu == NULL")); 586 request = isc->receive_pdu; 587 } 588 589 switch (isc->receive_state) { 590 case ICL_CONN_STATE_BHS: 591 //ICL_DEBUG("receiving BHS"); 592 icl_soft_receive_buf(r, rs, request->ip_bhs, 593 sizeof(struct iscsi_bhs)); 594 595 /* 596 * We don't enforce any limit for AHS length; 597 * its length is stored in 8 bit field. 598 */ 599 600 len = icl_pdu_data_segment_length(request); 601 if (len > ic->ic_max_recv_data_segment_length) { 602 ICL_WARN("received data segment " 603 "length %zd is larger than negotiated; " 604 "dropping connection", len); 605 error = EINVAL; 606 break; 607 } 608 609 isc->receive_state = ICL_CONN_STATE_AHS; 610 isc->receive_len = icl_pdu_ahs_length(request); 611 break; 612 613 case ICL_CONN_STATE_AHS: 614 //ICL_DEBUG("receiving AHS"); 615 icl_pdu_receive_ahs(request, r, rs); 616 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; 617 if (ic->ic_header_crc32c == false) 618 isc->receive_len = 0; 619 else 620 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; 621 break; 622 623 case ICL_CONN_STATE_HEADER_DIGEST: 624 //ICL_DEBUG("receiving header digest"); 625 error = icl_pdu_check_header_digest(request, r, rs); 626 if (error != 0) { 627 ICL_DEBUG("header digest failed; " 628 "dropping connection"); 629 break; 630 } 631 632 isc->receive_state = ICL_CONN_STATE_DATA; 633 isc->receive_len = icl_pdu_data_segment_receive_len(request); 634 break; 635 636 case ICL_CONN_STATE_DATA: 637 //ICL_DEBUG("receiving data segment"); 638 error = icl_pdu_receive_data_segment(request, r, rs, 639 &more_needed); 640 if (error != 0) { 641 ICL_DEBUG("failed to receive data segment;" 642 "dropping connection"); 643 break; 644 } 645 646 if (more_needed) 647 break; 648 649 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; 650 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 651 isc->receive_len = 0; 652 else 653 isc->receive_len = ISCSI_DATA_DIGEST_SIZE; 654 break; 655 656 case ICL_CONN_STATE_DATA_DIGEST: 657 //ICL_DEBUG("receiving data digest"); 658 error = icl_pdu_check_data_digest(request, r, rs); 659 if (error != 0) { 660 ICL_DEBUG("data digest failed; " 661 "dropping connection"); 662 break; 663 } 664 665 /* 666 * We've received complete PDU; reset the receive state machine 667 * and return the PDU. 668 */ 669 isc->receive_state = ICL_CONN_STATE_BHS; 670 isc->receive_len = sizeof(struct iscsi_bhs); 671 isc->receive_pdu = NULL; 672 return (request); 673 674 default: 675 panic("invalid receive_state %d\n", isc->receive_state); 676 } 677 678 if (error != 0) { 679 /* 680 * Don't free the PDU; it's pointed to by isc->receive_pdu 681 * and will get freed in icl_soft_conn_close(). 682 */ 683 icl_conn_fail(ic); 684 } 685 686 return (NULL); 687 } 688 689 static void 690 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) 691 { 692 struct icl_conn *ic = &isc->ic; 693 struct icl_pdu *response; 694 695 for (;;) { 696 if (ic->ic_disconnecting) 697 return; 698 699 /* 700 * Loop until we have a complete PDU or there is not enough 701 * data in the socket buffer. 702 */ 703 if (*rs < isc->receive_len) { 704 #if 0 705 ICL_DEBUG("not enough data; have %zd, need %zd", 706 *rs, isc->receive_len); 707 #endif 708 return; 709 } 710 711 response = icl_conn_receive_pdu(isc, r, rs); 712 if (response == NULL) 713 continue; 714 715 if (response->ip_ahs_len > 0) { 716 ICL_WARN("received PDU with unsupported " 717 "AHS; opcode 0x%x; dropping connection", 718 response->ip_bhs->bhs_opcode); 719 icl_soft_conn_pdu_free(ic, response); 720 icl_conn_fail(ic); 721 return; 722 } 723 724 (ic->ic_receive)(response); 725 } 726 } 727 728 static void 729 icl_receive_thread(void *arg) 730 { 731 struct icl_soft_conn *isc = arg; 732 struct icl_conn *ic = &isc->ic; 733 size_t available, read = 0; 734 struct socket *so; 735 struct mbuf *m, *r = NULL; 736 struct uio uio; 737 int error, flags; 738 739 so = ic->ic_socket; 740 741 for (;;) { 742 SOCKBUF_LOCK(&so->so_rcv); 743 if (ic->ic_disconnecting) { 744 SOCKBUF_UNLOCK(&so->so_rcv); 745 break; 746 } 747 748 /* 749 * Set the low watermark, to be checked by 750 * soreadable() in icl_soupcall_receive() 751 * to avoid unnecessary wakeups until there 752 * is enough data received to read the PDU. 753 */ 754 available = sbavail(&so->so_rcv); 755 if (read + available < isc->receive_len) { 756 so->so_rcv.sb_lowat = isc->receive_len - read; 757 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); 758 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 759 available = sbavail(&so->so_rcv); 760 } 761 SOCKBUF_UNLOCK(&so->so_rcv); 762 763 if (available == 0) { 764 if (so->so_error != 0) { 765 ICL_DEBUG("connection error %d; " 766 "dropping connection", so->so_error); 767 icl_conn_fail(ic); 768 break; 769 } 770 continue; 771 } 772 773 memset(&uio, 0, sizeof(uio)); 774 uio.uio_resid = available; 775 flags = MSG_DONTWAIT; 776 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 777 if (error != 0) { 778 ICL_DEBUG("soreceive error %d", error); 779 break; 780 } 781 if (uio.uio_resid != 0) { 782 m_freem(m); 783 ICL_DEBUG("short read"); 784 break; 785 } 786 if (r) 787 m_cat(r, m); 788 else 789 r = m; 790 read += available; 791 792 icl_conn_receive_pdus(isc, &r, &read); 793 } 794 795 if (r) 796 m_freem(r); 797 798 ICL_CONN_LOCK(ic); 799 isc->receive_running = false; 800 cv_signal(&isc->send_cv); 801 ICL_CONN_UNLOCK(ic); 802 kthread_exit(); 803 } 804 805 static int 806 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 807 { 808 struct icl_soft_conn *isc; 809 810 if (!soreadable(so)) 811 return (SU_OK); 812 813 isc = arg; 814 cv_signal(&isc->receive_cv); 815 return (SU_OK); 816 } 817 818 static int 819 icl_pdu_finalize(struct icl_pdu *request) 820 { 821 size_t padding, pdu_len; 822 uint32_t digest, zero = 0; 823 int ok; 824 struct icl_conn *ic; 825 826 ic = request->ip_conn; 827 828 icl_pdu_set_data_segment_length(request, request->ip_data_len); 829 830 pdu_len = icl_pdu_size(request); 831 832 if (ic->ic_header_crc32c) { 833 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, 834 ISCSI_BHS_SIZE); 835 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 836 (void *)&digest); 837 if (ok != 1) { 838 ICL_WARN("failed to append header digest"); 839 return (1); 840 } 841 } 842 843 if (request->ip_data_len != 0) { 844 padding = icl_pdu_padding(request); 845 if (padding > 0) { 846 ok = m_append(request->ip_data_mbuf, padding, 847 (void *)&zero); 848 if (ok != 1) { 849 ICL_WARN("failed to append padding"); 850 return (1); 851 } 852 } 853 854 if (ic->ic_data_crc32c) { 855 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, 856 roundup2(request->ip_data_len, 4)); 857 858 ok = m_append(request->ip_data_mbuf, sizeof(digest), 859 (void *)&digest); 860 if (ok != 1) { 861 ICL_WARN("failed to append data digest"); 862 return (1); 863 } 864 } 865 866 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 867 request->ip_data_mbuf = NULL; 868 } 869 870 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 871 872 return (0); 873 } 874 875 static void 876 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) 877 { 878 struct icl_conn *ic = &isc->ic; 879 struct icl_pdu *request, *request2; 880 struct mbuf *m; 881 struct socket *so; 882 long available, size, size2; 883 #ifdef DEBUG_COALESCED 884 int coalesced; 885 #endif 886 int error; 887 888 ICL_CONN_LOCK_ASSERT_NOT(ic); 889 890 so = ic->ic_socket; 891 892 SOCKBUF_LOCK(&so->so_snd); 893 /* 894 * Check how much space do we have for transmit. We can't just 895 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 896 * as it always frees the mbuf chain passed to it, even in case 897 * of error. 898 */ 899 available = sbspace(&so->so_snd); 900 isc->check_send_space = false; 901 902 /* 903 * Notify the socket upcall that we don't need wakeups 904 * for the time being. 905 */ 906 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 907 SOCKBUF_UNLOCK(&so->so_snd); 908 909 while (!STAILQ_EMPTY(queue)) { 910 request = STAILQ_FIRST(queue); 911 size = icl_pdu_size(request); 912 if (available < size) { 913 /* 914 * Set the low watermark, to be checked by 915 * sowriteable() in icl_soupcall_send() 916 * to avoid unnecessary wakeups until there 917 * is enough space for the PDU to fit. 918 */ 919 SOCKBUF_LOCK(&so->so_snd); 920 available = sbspace(&so->so_snd); 921 if (available < size) { 922 #if 1 923 ICL_DEBUG("no space to send; " 924 "have %ld, need %ld", 925 available, size); 926 #endif 927 so->so_snd.sb_lowat = max(size, 928 so->so_snd.sb_hiwat / 8); 929 SOCKBUF_UNLOCK(&so->so_snd); 930 return; 931 } 932 SOCKBUF_UNLOCK(&so->so_snd); 933 } 934 STAILQ_REMOVE_HEAD(queue, ip_next); 935 error = icl_pdu_finalize(request); 936 if (error != 0) { 937 ICL_DEBUG("failed to finalize PDU; " 938 "dropping connection"); 939 icl_soft_pdu_done(request, EIO); 940 icl_conn_fail(ic); 941 return; 942 } 943 if (coalesce) { 944 m = request->ip_bhs_mbuf; 945 for ( 946 #ifdef DEBUG_COALESCED 947 coalesced = 1 948 #endif 949 ; ; 950 #ifdef DEBUG_COALESCED 951 coalesced++ 952 #endif 953 ) { 954 request2 = STAILQ_FIRST(queue); 955 if (request2 == NULL) 956 break; 957 size2 = icl_pdu_size(request2); 958 if (available < size + size2) 959 break; 960 STAILQ_REMOVE_HEAD(queue, ip_next); 961 error = icl_pdu_finalize(request2); 962 if (error != 0) { 963 ICL_DEBUG("failed to finalize PDU; " 964 "dropping connection"); 965 icl_soft_pdu_done(request, EIO); 966 icl_soft_pdu_done(request2, EIO); 967 icl_conn_fail(ic); 968 return; 969 } 970 while (m->m_next) 971 m = m->m_next; 972 m_cat(m, request2->ip_bhs_mbuf); 973 request2->ip_bhs_mbuf = NULL; 974 request->ip_bhs_mbuf->m_pkthdr.len += size2; 975 size += size2; 976 icl_soft_pdu_done(request2, 0); 977 } 978 #ifdef DEBUG_COALESCED 979 if (coalesced > 1) { 980 ICL_DEBUG("coalesced %d PDUs into %ld bytes", 981 coalesced, size); 982 } 983 #endif 984 } 985 available -= size; 986 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 987 NULL, MSG_DONTWAIT, curthread); 988 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 989 if (error != 0) { 990 ICL_DEBUG("failed to send PDU, error %d; " 991 "dropping connection", error); 992 icl_soft_pdu_done(request, error); 993 icl_conn_fail(ic); 994 return; 995 } 996 icl_soft_pdu_done(request, 0); 997 } 998 } 999 1000 static void 1001 icl_send_thread(void *arg) 1002 { 1003 struct icl_soft_conn *isc; 1004 struct icl_conn *ic; 1005 struct icl_pdu_stailq queue; 1006 1007 isc = arg; 1008 ic = &isc->ic; 1009 1010 STAILQ_INIT(&queue); 1011 1012 ICL_CONN_LOCK(ic); 1013 for (;;) { 1014 for (;;) { 1015 /* 1016 * Populate the local queue from the main one. 1017 * This way the icl_conn_send_pdus() can go through 1018 * all the queued PDUs without holding any locks. 1019 */ 1020 if (STAILQ_EMPTY(&queue) || isc->check_send_space) 1021 STAILQ_CONCAT(&queue, &isc->to_send); 1022 1023 ICL_CONN_UNLOCK(ic); 1024 icl_conn_send_pdus(isc, &queue); 1025 ICL_CONN_LOCK(ic); 1026 1027 /* 1028 * The icl_soupcall_send() was called since the last 1029 * call to sbspace(); go around; 1030 */ 1031 if (isc->check_send_space) 1032 continue; 1033 1034 /* 1035 * Local queue is empty, but we still have PDUs 1036 * in the main one; go around. 1037 */ 1038 if (STAILQ_EMPTY(&queue) && 1039 !STAILQ_EMPTY(&isc->to_send)) 1040 continue; 1041 1042 /* 1043 * There might be some stuff in the local queue, 1044 * which didn't get sent due to not having enough send 1045 * space. Wait for socket upcall. 1046 */ 1047 break; 1048 } 1049 1050 if (ic->ic_disconnecting) { 1051 //ICL_DEBUG("terminating"); 1052 break; 1053 } 1054 1055 cv_wait(&isc->send_cv, ic->ic_lock); 1056 } 1057 1058 /* 1059 * We're exiting; move PDUs back to the main queue, so they can 1060 * get freed properly. At this point ordering doesn't matter. 1061 */ 1062 STAILQ_CONCAT(&isc->to_send, &queue); 1063 1064 isc->send_running = false; 1065 cv_signal(&isc->send_cv); 1066 ICL_CONN_UNLOCK(ic); 1067 kthread_exit(); 1068 } 1069 1070 static int 1071 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1072 { 1073 struct icl_soft_conn *isc; 1074 struct icl_conn *ic; 1075 1076 if (!sowriteable(so)) 1077 return (SU_OK); 1078 1079 isc = arg; 1080 ic = &isc->ic; 1081 1082 ICL_CONN_LOCK(ic); 1083 isc->check_send_space = true; 1084 ICL_CONN_UNLOCK(ic); 1085 1086 cv_signal(&isc->send_cv); 1087 1088 return (SU_OK); 1089 } 1090 1091 static void 1092 icl_soft_free_mext_pg(struct mbuf *m) 1093 { 1094 struct icl_soft_pdu *isp; 1095 1096 M_ASSERTEXTPG(m); 1097 1098 /* 1099 * Nothing to do for the pages; they are owned by the PDU / 1100 * I/O request. 1101 */ 1102 1103 /* Drop reference on the PDU. */ 1104 isp = m->m_ext.ext_arg1; 1105 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) 1106 icl_soft_pdu_call_cb(&isp->ip); 1107 } 1108 1109 static int 1110 icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 1111 struct bio *bp, size_t offset, size_t len, int flags) 1112 { 1113 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1114 struct mbuf *m, *m_tail; 1115 vm_offset_t vaddr; 1116 size_t mtodo, page_offset, todo; 1117 int i; 1118 1119 KASSERT(len > 0, ("len == 0")); 1120 1121 m_tail = request->ip_data_mbuf; 1122 if (m_tail != NULL) 1123 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 1124 ; 1125 1126 MPASS(bp->bio_flags & BIO_UNMAPPED); 1127 if (offset < PAGE_SIZE - bp->bio_ma_offset) { 1128 page_offset = bp->bio_ma_offset + offset; 1129 i = 0; 1130 } else { 1131 offset -= PAGE_SIZE - bp->bio_ma_offset; 1132 for (i = 1; offset >= PAGE_SIZE; i++) 1133 offset -= PAGE_SIZE; 1134 page_offset = offset; 1135 } 1136 1137 if (flags & ICL_NOCOPY) { 1138 m = NULL; 1139 while (len > 0) { 1140 if (m == NULL) { 1141 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, 1142 icl_soft_free_mext_pg); 1143 if (__predict_false(m == NULL)) 1144 return (ENOMEM); 1145 atomic_add_int(&isp->ref_cnt, 1); 1146 m->m_ext.ext_arg1 = isp; 1147 m->m_epg_1st_off = page_offset; 1148 } 1149 1150 todo = MIN(len, PAGE_SIZE - page_offset); 1151 1152 m->m_epg_pa[m->m_epg_npgs] = 1153 VM_PAGE_TO_PHYS(bp->bio_ma[i]); 1154 m->m_epg_npgs++; 1155 m->m_epg_last_len = todo; 1156 m->m_len += todo; 1157 m->m_ext.ext_size += PAGE_SIZE; 1158 MBUF_EXT_PGS_ASSERT_SANITY(m); 1159 1160 if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { 1161 if (m_tail != NULL) 1162 m_tail->m_next = m; 1163 else 1164 request->ip_data_mbuf = m; 1165 m_tail = m; 1166 request->ip_data_len += m->m_len; 1167 m = NULL; 1168 } 1169 1170 page_offset = 0; 1171 len -= todo; 1172 i++; 1173 } 1174 1175 if (m != NULL) { 1176 if (m_tail != NULL) 1177 m_tail->m_next = m; 1178 else 1179 request->ip_data_mbuf = m; 1180 request->ip_data_len += m->m_len; 1181 } 1182 return (0); 1183 } 1184 1185 m = m_getm2(NULL, len, flags, MT_DATA, 0); 1186 if (__predict_false(m == NULL)) 1187 return (ENOMEM); 1188 1189 if (request->ip_data_mbuf == NULL) { 1190 request->ip_data_mbuf = m; 1191 request->ip_data_len = len; 1192 } else { 1193 m_tail->m_next = m; 1194 request->ip_data_len += len; 1195 } 1196 1197 while (len > 0) { 1198 todo = MIN(len, PAGE_SIZE - page_offset); 1199 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1200 1201 do { 1202 mtodo = min(todo, M_SIZE(m) - m->m_len); 1203 memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + 1204 page_offset, mtodo); 1205 m->m_len += mtodo; 1206 if (m->m_len == M_SIZE(m)) 1207 m = m->m_next; 1208 page_offset += mtodo; 1209 todo -= mtodo; 1210 } while (todo > 0); 1211 1212 page_offset = 0; 1213 len -= todo; 1214 i++; 1215 } 1216 1217 return (0); 1218 } 1219 1220 static int 1221 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1222 const void *addr, size_t len, int flags) 1223 { 1224 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; 1225 struct mbuf *mb, *newmb; 1226 size_t copylen, off = 0; 1227 1228 KASSERT(len > 0, ("len == 0")); 1229 1230 if (flags & ICL_NOCOPY) { 1231 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); 1232 if (newmb == NULL) { 1233 ICL_WARN("failed to allocate mbuf"); 1234 return (ENOMEM); 1235 } 1236 1237 newmb->m_flags |= M_RDONLY; 1238 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, 1239 icl_soft_mbuf_done, isp, NULL); 1240 newmb->m_len = len; 1241 } else { 1242 newmb = m_getm2(NULL, len, flags, MT_DATA, 0); 1243 if (newmb == NULL) { 1244 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1245 return (ENOMEM); 1246 } 1247 1248 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1249 copylen = min(M_TRAILINGSPACE(mb), len - off); 1250 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1251 mb->m_len = copylen; 1252 off += copylen; 1253 } 1254 KASSERT(off == len, ("%s: off != len", __func__)); 1255 } 1256 1257 if (request->ip_data_mbuf == NULL) { 1258 request->ip_data_mbuf = newmb; 1259 request->ip_data_len = len; 1260 } else { 1261 m_cat(request->ip_data_mbuf, newmb); 1262 request->ip_data_len += len; 1263 } 1264 1265 return (0); 1266 } 1267 1268 void 1269 icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 1270 size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) 1271 { 1272 vm_offset_t vaddr; 1273 size_t page_offset, todo; 1274 int i __unused; 1275 1276 MPASS(bp->bio_flags & BIO_UNMAPPED); 1277 if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { 1278 page_offset = bp->bio_ma_offset + bio_off; 1279 i = 0; 1280 } else { 1281 bio_off -= PAGE_SIZE - bp->bio_ma_offset; 1282 for (i = 1; bio_off >= PAGE_SIZE; i++) 1283 bio_off -= PAGE_SIZE; 1284 page_offset = bio_off; 1285 } 1286 1287 while (len > 0) { 1288 todo = MIN(len, PAGE_SIZE - page_offset); 1289 1290 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); 1291 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + 1292 page_offset); 1293 1294 page_offset = 0; 1295 pdu_off += todo; 1296 len -= todo; 1297 i++; 1298 } 1299 } 1300 1301 void 1302 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1303 size_t off, void *addr, size_t len) 1304 { 1305 1306 m_copydata(ip->ip_data_mbuf, off, len, addr); 1307 } 1308 1309 static void 1310 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1311 { 1312 1313 icl_soft_conn_pdu_queue_cb(ic, ip, NULL); 1314 } 1315 1316 static void 1317 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 1318 icl_pdu_cb cb) 1319 { 1320 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1321 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; 1322 1323 ICL_CONN_LOCK_ASSERT(ic); 1324 isp->ref_cnt++; 1325 isp->cb = cb; 1326 1327 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1328 ICL_DEBUG("icl_pdu_queue on closed connection"); 1329 icl_soft_pdu_done(ip, ENOTCONN); 1330 return; 1331 } 1332 1333 if (!STAILQ_EMPTY(&isc->to_send)) { 1334 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1335 /* 1336 * If the queue is not empty, someone else had already 1337 * signaled the send thread; no need to do that again, 1338 * just return. 1339 */ 1340 return; 1341 } 1342 1343 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); 1344 cv_signal(&isc->send_cv); 1345 } 1346 1347 static struct icl_conn * 1348 icl_soft_new_conn(const char *name, struct mtx *lock) 1349 { 1350 struct icl_soft_conn *isc; 1351 struct icl_conn *ic; 1352 1353 refcount_acquire(&icl_ncons); 1354 1355 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, 1356 M_WAITOK | M_ZERO); 1357 1358 STAILQ_INIT(&isc->to_send); 1359 cv_init(&isc->send_cv, "icl_tx"); 1360 cv_init(&isc->receive_cv, "icl_rx"); 1361 1362 ic = &isc->ic; 1363 ic->ic_lock = lock; 1364 #ifdef DIAGNOSTIC 1365 refcount_init(&ic->ic_outstanding_pdus, 0); 1366 #endif 1367 ic->ic_name = name; 1368 ic->ic_offload = "None"; 1369 ic->ic_unmapped = PMAP_HAS_DMAP; 1370 1371 return (ic); 1372 } 1373 1374 void 1375 icl_soft_conn_free(struct icl_conn *ic) 1376 { 1377 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1378 1379 #ifdef DIAGNOSTIC 1380 KASSERT(ic->ic_outstanding_pdus == 0, 1381 ("destroying session with %d outstanding PDUs", 1382 ic->ic_outstanding_pdus)); 1383 #endif 1384 cv_destroy(&isc->send_cv); 1385 cv_destroy(&isc->receive_cv); 1386 kobj_delete((struct kobj *)isc, M_ICL_SOFT); 1387 refcount_release(&icl_ncons); 1388 } 1389 1390 static int 1391 icl_conn_start(struct icl_conn *ic) 1392 { 1393 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1394 size_t minspace; 1395 struct sockopt opt; 1396 int error, one = 1; 1397 1398 ICL_CONN_LOCK(ic); 1399 1400 /* 1401 * XXX: Ugly hack. 1402 */ 1403 if (ic->ic_socket == NULL) { 1404 ICL_CONN_UNLOCK(ic); 1405 return (EINVAL); 1406 } 1407 1408 isc->receive_state = ICL_CONN_STATE_BHS; 1409 isc->receive_len = sizeof(struct iscsi_bhs); 1410 ic->ic_disconnecting = false; 1411 1412 ICL_CONN_UNLOCK(ic); 1413 1414 /* 1415 * For sendspace, this is required because the current code cannot 1416 * send a PDU in pieces; thus, the minimum buffer size is equal 1417 * to the maximum PDU size. "+4" is to account for possible padding. 1418 */ 1419 minspace = sizeof(struct iscsi_bhs) + 1420 ic->ic_max_send_data_segment_length + 1421 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1422 if (sendspace < minspace) { 1423 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1424 minspace); 1425 sendspace = minspace; 1426 } 1427 minspace = sizeof(struct iscsi_bhs) + 1428 ic->ic_max_recv_data_segment_length + 1429 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1430 if (recvspace < minspace) { 1431 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1432 minspace); 1433 recvspace = minspace; 1434 } 1435 1436 error = soreserve(ic->ic_socket, sendspace, recvspace); 1437 if (error != 0) { 1438 ICL_WARN("soreserve failed with error %d", error); 1439 icl_soft_conn_close(ic); 1440 return (error); 1441 } 1442 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1443 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1444 1445 /* 1446 * Disable Nagle. 1447 */ 1448 bzero(&opt, sizeof(opt)); 1449 opt.sopt_dir = SOPT_SET; 1450 opt.sopt_level = IPPROTO_TCP; 1451 opt.sopt_name = TCP_NODELAY; 1452 opt.sopt_val = &one; 1453 opt.sopt_valsize = sizeof(one); 1454 error = sosetopt(ic->ic_socket, &opt); 1455 if (error != 0) { 1456 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1457 icl_soft_conn_close(ic); 1458 return (error); 1459 } 1460 1461 /* 1462 * Register socket upcall, to get notified about incoming PDUs 1463 * and free space to send outgoing ones. 1464 */ 1465 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1466 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); 1467 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1468 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1469 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); 1470 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1471 1472 /* 1473 * Start threads. 1474 */ 1475 ICL_CONN_LOCK(ic); 1476 isc->send_running = isc->receive_running = true; 1477 ICL_CONN_UNLOCK(ic); 1478 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1479 ic->ic_name); 1480 if (error != 0) { 1481 ICL_WARN("kthread_add(9) failed with error %d", error); 1482 ICL_CONN_LOCK(ic); 1483 isc->send_running = isc->receive_running = false; 1484 cv_signal(&isc->send_cv); 1485 ICL_CONN_UNLOCK(ic); 1486 icl_soft_conn_close(ic); 1487 return (error); 1488 } 1489 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1490 ic->ic_name); 1491 if (error != 0) { 1492 ICL_WARN("kthread_add(9) failed with error %d", error); 1493 ICL_CONN_LOCK(ic); 1494 isc->receive_running = false; 1495 cv_signal(&isc->send_cv); 1496 ICL_CONN_UNLOCK(ic); 1497 icl_soft_conn_close(ic); 1498 return (error); 1499 } 1500 1501 return (0); 1502 } 1503 1504 int 1505 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1506 { 1507 struct file *fp; 1508 struct socket *so; 1509 cap_rights_t rights; 1510 int error; 1511 1512 ICL_CONN_LOCK_ASSERT_NOT(ic); 1513 1514 #ifdef ICL_KERNEL_PROXY 1515 /* 1516 * We're transitioning to Full Feature phase, and we don't 1517 * really care. 1518 */ 1519 if (fd == 0) { 1520 ICL_CONN_LOCK(ic); 1521 if (ic->ic_socket == NULL) { 1522 ICL_CONN_UNLOCK(ic); 1523 ICL_WARN("proxy handoff without connect"); 1524 return (EINVAL); 1525 } 1526 ICL_CONN_UNLOCK(ic); 1527 return (0); 1528 } 1529 #endif 1530 1531 /* 1532 * Steal the socket from userland. 1533 */ 1534 error = fget(curthread, fd, 1535 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1536 if (error != 0) 1537 return (error); 1538 if (fp->f_type != DTYPE_SOCKET) { 1539 fdrop(fp, curthread); 1540 return (EINVAL); 1541 } 1542 so = fp->f_data; 1543 if (so->so_type != SOCK_STREAM) { 1544 fdrop(fp, curthread); 1545 return (EINVAL); 1546 } 1547 1548 ICL_CONN_LOCK(ic); 1549 1550 if (ic->ic_socket != NULL) { 1551 ICL_CONN_UNLOCK(ic); 1552 fdrop(fp, curthread); 1553 return (EBUSY); 1554 } 1555 1556 ic->ic_socket = fp->f_data; 1557 fp->f_ops = &badfileops; 1558 fp->f_data = NULL; 1559 fdrop(fp, curthread); 1560 ICL_CONN_UNLOCK(ic); 1561 1562 error = icl_conn_start(ic); 1563 1564 return (error); 1565 } 1566 1567 void 1568 icl_soft_conn_close(struct icl_conn *ic) 1569 { 1570 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; 1571 struct icl_pdu *pdu; 1572 struct socket *so; 1573 1574 /* 1575 * Wake up the threads, so they can properly terminate. 1576 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. 1577 */ 1578 ICL_CONN_LOCK(ic); 1579 if (!ic->ic_disconnecting) { 1580 so = ic->ic_socket; 1581 if (so) 1582 SOCKBUF_LOCK(&so->so_rcv); 1583 ic->ic_disconnecting = true; 1584 if (so) 1585 SOCKBUF_UNLOCK(&so->so_rcv); 1586 } 1587 while (isc->receive_running || isc->send_running) { 1588 cv_signal(&isc->receive_cv); 1589 cv_signal(&isc->send_cv); 1590 cv_wait(&isc->send_cv, ic->ic_lock); 1591 } 1592 1593 /* Some other thread could close the connection same time. */ 1594 so = ic->ic_socket; 1595 if (so == NULL) { 1596 ICL_CONN_UNLOCK(ic); 1597 return; 1598 } 1599 ic->ic_socket = NULL; 1600 1601 /* 1602 * Deregister socket upcalls. 1603 */ 1604 ICL_CONN_UNLOCK(ic); 1605 SOCKBUF_LOCK(&so->so_snd); 1606 if (so->so_snd.sb_upcall != NULL) 1607 soupcall_clear(so, SO_SND); 1608 SOCKBUF_UNLOCK(&so->so_snd); 1609 SOCKBUF_LOCK(&so->so_rcv); 1610 if (so->so_rcv.sb_upcall != NULL) 1611 soupcall_clear(so, SO_RCV); 1612 SOCKBUF_UNLOCK(&so->so_rcv); 1613 soclose(so); 1614 ICL_CONN_LOCK(ic); 1615 1616 if (isc->receive_pdu != NULL) { 1617 //ICL_DEBUG("freeing partially received PDU"); 1618 icl_soft_conn_pdu_free(ic, isc->receive_pdu); 1619 isc->receive_pdu = NULL; 1620 } 1621 1622 /* 1623 * Remove any outstanding PDUs from the send queue. 1624 */ 1625 while (!STAILQ_EMPTY(&isc->to_send)) { 1626 pdu = STAILQ_FIRST(&isc->to_send); 1627 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); 1628 icl_soft_pdu_done(pdu, ENOTCONN); 1629 } 1630 1631 KASSERT(STAILQ_EMPTY(&isc->to_send), 1632 ("destroying session with non-empty send queue")); 1633 ICL_CONN_UNLOCK(ic); 1634 } 1635 1636 int 1637 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1638 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 1639 { 1640 1641 return (0); 1642 } 1643 1644 void 1645 icl_soft_conn_task_done(struct icl_conn *ic, void *prv) 1646 { 1647 } 1648 1649 int 1650 icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, 1651 union ctl_io *io, uint32_t *transfer_tag, void **prvp) 1652 { 1653 1654 return (0); 1655 } 1656 1657 void 1658 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) 1659 { 1660 } 1661 1662 static int 1663 icl_soft_limits(struct icl_drv_limits *idl, int socket) 1664 { 1665 1666 idl->idl_max_recv_data_segment_length = max_data_segment_length; 1667 idl->idl_max_send_data_segment_length = max_data_segment_length; 1668 idl->idl_max_burst_length = max_burst_length; 1669 idl->idl_first_burst_length = first_burst_length; 1670 1671 return (0); 1672 } 1673 1674 #ifdef ICL_KERNEL_PROXY 1675 int 1676 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, 1677 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 1678 { 1679 1680 return (icl_soft_proxy_connect(ic, domain, socktype, protocol, 1681 from_sa, to_sa)); 1682 } 1683 1684 int 1685 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) 1686 { 1687 int error; 1688 1689 ICL_CONN_LOCK_ASSERT_NOT(ic); 1690 1691 if (so->so_type != SOCK_STREAM) 1692 return (EINVAL); 1693 1694 ICL_CONN_LOCK(ic); 1695 if (ic->ic_socket != NULL) { 1696 ICL_CONN_UNLOCK(ic); 1697 return (EBUSY); 1698 } 1699 ic->ic_socket = so; 1700 ICL_CONN_UNLOCK(ic); 1701 1702 error = icl_conn_start(ic); 1703 1704 return (error); 1705 } 1706 #endif /* ICL_KERNEL_PROXY */ 1707 1708 static int 1709 icl_soft_load(void) 1710 { 1711 int error; 1712 1713 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", 1714 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, 1715 UMA_ALIGN_PTR, 0); 1716 refcount_init(&icl_ncons, 0); 1717 1718 /* 1719 * The reason we call this "none" is that to the user, 1720 * it's known as "offload driver"; "offload driver: soft" 1721 * doesn't make much sense. 1722 */ 1723 error = icl_register("none", false, 0, 1724 icl_soft_limits, icl_soft_new_conn); 1725 KASSERT(error == 0, ("failed to register")); 1726 1727 #if defined(ICL_KERNEL_PROXY) && 0 1728 /* 1729 * Debugging aid for kernel proxy functionality. 1730 */ 1731 error = icl_register("proxytest", true, 0, 1732 icl_soft_limits, icl_soft_new_conn); 1733 KASSERT(error == 0, ("failed to register")); 1734 #endif 1735 1736 return (error); 1737 } 1738 1739 static int 1740 icl_soft_unload(void) 1741 { 1742 1743 if (icl_ncons != 0) 1744 return (EBUSY); 1745 1746 icl_unregister("none", false); 1747 #if defined(ICL_KERNEL_PROXY) && 0 1748 icl_unregister("proxytest", true); 1749 #endif 1750 1751 uma_zdestroy(icl_soft_pdu_zone); 1752 1753 return (0); 1754 } 1755 1756 static int 1757 icl_soft_modevent(module_t mod, int what, void *arg) 1758 { 1759 1760 switch (what) { 1761 case MOD_LOAD: 1762 return (icl_soft_load()); 1763 case MOD_UNLOAD: 1764 return (icl_soft_unload()); 1765 default: 1766 return (EINVAL); 1767 } 1768 } 1769 1770 moduledata_t icl_soft_data = { 1771 "icl_soft", 1772 icl_soft_modevent, 1773 0 1774 }; 1775 1776 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1777 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1778 MODULE_VERSION(icl_soft, 1); 1779