1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 /* 32 * iSCSI Common Layer. It's used by both the initiator and target to send 33 * and receive iSCSI PDUs. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/condvar.h> 42 #include <sys/conf.h> 43 #include <sys/file.h> 44 #include <sys/kernel.h> 45 #include <sys/kthread.h> 46 #include <sys/lock.h> 47 #include <sys/mbuf.h> 48 #include <sys/mutex.h> 49 #include <sys/module.h> 50 #include <sys/protosw.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/sysctl.h> 54 #include <sys/systm.h> 55 #include <sys/sx.h> 56 #include <sys/uio.h> 57 #include <vm/uma.h> 58 #include <netinet/in.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/iscsi/icl.h> 62 #include <dev/iscsi/iscsi_proto.h> 63 #include <icl_conn_if.h> 64 65 static int coalesce = 1; 66 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 67 &coalesce, 0, "Try to coalesce PDUs before sending"); 68 static int partial_receive_len = 128 * 1024; 69 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 70 &partial_receive_len, 0, "Minimum read size for partially received " 71 "data segment"); 72 static int sendspace = 1048576; 73 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 74 &sendspace, 0, "Default send socket buffer size"); 75 static int recvspace = 1048576; 76 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 77 &recvspace, 0, "Default receive socket buffer size"); 78 79 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); 80 static uma_zone_t icl_pdu_zone; 81 82 static volatile u_int icl_ncons; 83 84 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 85 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 86 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 87 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 88 89 STAILQ_HEAD(icl_pdu_stailq, icl_pdu); 90 91 static icl_conn_new_pdu_t icl_soft_conn_new_pdu; 92 static icl_conn_pdu_free_t icl_soft_conn_pdu_free; 93 static icl_conn_pdu_data_segment_length_t 94 icl_soft_conn_pdu_data_segment_length; 95 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; 96 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; 97 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; 98 static icl_conn_handoff_t icl_soft_conn_handoff; 99 static icl_conn_free_t icl_soft_conn_free; 100 static icl_conn_close_t icl_soft_conn_close; 101 static icl_conn_connected_t icl_soft_conn_connected; 102 103 static kobj_method_t icl_soft_methods[] = { 104 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), 105 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), 106 KOBJMETHOD(icl_conn_pdu_data_segment_length, 107 icl_soft_conn_pdu_data_segment_length), 108 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), 109 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), 110 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), 111 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), 112 KOBJMETHOD(icl_conn_free, icl_soft_conn_free), 113 KOBJMETHOD(icl_conn_close, icl_soft_conn_close), 114 KOBJMETHOD(icl_conn_connected, icl_soft_conn_connected), 115 { 0, 0 } 116 }; 117 118 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); 119 120 static void icl_conn_close(struct icl_conn *ic); 121 122 static void 123 icl_conn_fail(struct icl_conn *ic) 124 { 125 if (ic->ic_socket == NULL) 126 return; 127 128 /* 129 * XXX 130 */ 131 ic->ic_socket->so_error = EDOOFUS; 132 (ic->ic_error)(ic); 133 } 134 135 static struct mbuf * 136 icl_conn_receive(struct icl_conn *ic, size_t len) 137 { 138 struct uio uio; 139 struct socket *so; 140 struct mbuf *m; 141 int error, flags; 142 143 so = ic->ic_socket; 144 145 memset(&uio, 0, sizeof(uio)); 146 uio.uio_resid = len; 147 148 flags = MSG_DONTWAIT; 149 error = soreceive(so, NULL, &uio, &m, NULL, &flags); 150 if (error != 0) { 151 ICL_DEBUG("soreceive error %d", error); 152 return (NULL); 153 } 154 if (uio.uio_resid != 0) { 155 m_freem(m); 156 ICL_DEBUG("short read"); 157 return (NULL); 158 } 159 160 return (m); 161 } 162 163 static struct icl_pdu * 164 icl_pdu_new_empty(struct icl_conn *ic, int flags) 165 { 166 struct icl_pdu *ip; 167 168 #ifdef DIAGNOSTIC 169 refcount_acquire(&ic->ic_outstanding_pdus); 170 #endif 171 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 172 if (ip == NULL) { 173 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 174 #ifdef DIAGNOSTIC 175 refcount_release(&ic->ic_outstanding_pdus); 176 #endif 177 return (NULL); 178 } 179 180 ip->ip_conn = ic; 181 182 return (ip); 183 } 184 185 static void 186 icl_pdu_free(struct icl_pdu *ip) 187 { 188 struct icl_conn *ic; 189 190 ic = ip->ip_conn; 191 192 m_freem(ip->ip_bhs_mbuf); 193 m_freem(ip->ip_ahs_mbuf); 194 m_freem(ip->ip_data_mbuf); 195 uma_zfree(icl_pdu_zone, ip); 196 #ifdef DIAGNOSTIC 197 refcount_release(&ic->ic_outstanding_pdus); 198 #endif 199 } 200 201 void 202 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 203 { 204 icl_pdu_free(ip); 205 } 206 207 /* 208 * Allocate icl_pdu with empty BHS to fill up by the caller. 209 */ 210 struct icl_pdu * 211 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) 212 { 213 struct icl_pdu *ip; 214 215 ip = icl_pdu_new_empty(ic, flags); 216 if (ip == NULL) 217 return (NULL); 218 219 ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), 220 flags, MT_DATA, M_PKTHDR); 221 if (ip->ip_bhs_mbuf == NULL) { 222 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 223 icl_pdu_free(ip); 224 return (NULL); 225 } 226 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 227 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 228 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 229 230 return (ip); 231 } 232 233 static int 234 icl_pdu_ahs_length(const struct icl_pdu *request) 235 { 236 237 return (request->ip_bhs->bhs_total_ahs_len * 4); 238 } 239 240 static size_t 241 icl_pdu_data_segment_length(const struct icl_pdu *request) 242 { 243 uint32_t len = 0; 244 245 len += request->ip_bhs->bhs_data_segment_len[0]; 246 len <<= 8; 247 len += request->ip_bhs->bhs_data_segment_len[1]; 248 len <<= 8; 249 len += request->ip_bhs->bhs_data_segment_len[2]; 250 251 return (len); 252 } 253 254 size_t 255 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, 256 const struct icl_pdu *request) 257 { 258 259 return (icl_pdu_data_segment_length(request)); 260 } 261 262 static void 263 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 264 { 265 266 response->ip_bhs->bhs_data_segment_len[2] = len; 267 response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 268 response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 269 } 270 271 static size_t 272 icl_pdu_padding(const struct icl_pdu *ip) 273 { 274 275 if ((ip->ip_data_len % 4) != 0) 276 return (4 - (ip->ip_data_len % 4)); 277 278 return (0); 279 } 280 281 static size_t 282 icl_pdu_size(const struct icl_pdu *response) 283 { 284 size_t len; 285 286 KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 287 288 len = sizeof(struct iscsi_bhs) + response->ip_data_len + 289 icl_pdu_padding(response); 290 if (response->ip_conn->ic_header_crc32c) 291 len += ISCSI_HEADER_DIGEST_SIZE; 292 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 293 len += ISCSI_DATA_DIGEST_SIZE; 294 295 return (len); 296 } 297 298 static int 299 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 300 { 301 struct mbuf *m; 302 303 m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); 304 if (m == NULL) { 305 ICL_DEBUG("failed to receive BHS"); 306 return (-1); 307 } 308 309 request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); 310 if (request->ip_bhs_mbuf == NULL) { 311 ICL_WARN("m_pullup failed"); 312 return (-1); 313 } 314 request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); 315 316 /* 317 * XXX: For architectures with strict alignment requirements 318 * we may need to allocate ip_bhs and copy the data into it. 319 * For some reason, though, not doing this doesn't seem 320 * to cause problems; tested on sparc64. 321 */ 322 323 *availablep -= sizeof(struct iscsi_bhs); 324 return (0); 325 } 326 327 static int 328 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 329 { 330 331 request->ip_ahs_len = icl_pdu_ahs_length(request); 332 if (request->ip_ahs_len == 0) 333 return (0); 334 335 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 336 request->ip_ahs_len); 337 if (request->ip_ahs_mbuf == NULL) { 338 ICL_DEBUG("failed to receive AHS"); 339 return (-1); 340 } 341 342 *availablep -= request->ip_ahs_len; 343 return (0); 344 } 345 346 static uint32_t 347 icl_mbuf_to_crc32c(const struct mbuf *m0) 348 { 349 uint32_t digest = 0xffffffff; 350 const struct mbuf *m; 351 352 for (m = m0; m != NULL; m = m->m_next) 353 digest = calculate_crc32c(digest, 354 mtod(m, const void *), m->m_len); 355 356 digest = digest ^ 0xffffffff; 357 358 return (digest); 359 } 360 361 static int 362 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 363 { 364 struct mbuf *m; 365 uint32_t received_digest, valid_digest; 366 367 if (request->ip_conn->ic_header_crc32c == false) 368 return (0); 369 370 m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); 371 if (m == NULL) { 372 ICL_DEBUG("failed to receive header digest"); 373 return (-1); 374 } 375 376 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 377 m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); 378 m_freem(m); 379 380 *availablep -= ISCSI_HEADER_DIGEST_SIZE; 381 382 /* 383 * XXX: Handle AHS. 384 */ 385 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 386 if (received_digest != valid_digest) { 387 ICL_WARN("header digest check failed; got 0x%x, " 388 "should be 0x%x", received_digest, valid_digest); 389 return (-1); 390 } 391 392 return (0); 393 } 394 395 /* 396 * Return the number of bytes that should be waiting in the receive socket 397 * before icl_pdu_receive_data_segment() gets called. 398 */ 399 static size_t 400 icl_pdu_data_segment_receive_len(const struct icl_pdu *request) 401 { 402 size_t len; 403 404 len = icl_pdu_data_segment_length(request); 405 if (len == 0) 406 return (0); 407 408 /* 409 * Account for the parts of data segment already read from 410 * the socket buffer. 411 */ 412 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 413 len -= request->ip_data_len; 414 415 /* 416 * Don't always wait for the full data segment to be delivered 417 * to the socket; this might badly affect performance due to 418 * TCP window scaling. 419 */ 420 if (len > partial_receive_len) { 421 #if 0 422 ICL_DEBUG("need %zd bytes of data, limiting to %zd", 423 len, partial_receive_len)); 424 #endif 425 len = partial_receive_len; 426 427 return (len); 428 } 429 430 /* 431 * Account for padding. Note that due to the way code is written, 432 * the icl_pdu_receive_data_segment() must always receive padding 433 * along with the last part of data segment, because it would be 434 * impossible to tell whether we've already received the full data 435 * segment including padding, or without it. 436 */ 437 if ((len % 4) != 0) 438 len += 4 - (len % 4); 439 440 #if 0 441 ICL_DEBUG("need %zd bytes of data", len)); 442 #endif 443 444 return (len); 445 } 446 447 static int 448 icl_pdu_receive_data_segment(struct icl_pdu *request, 449 size_t *availablep, bool *more_neededp) 450 { 451 struct icl_conn *ic; 452 size_t len, padding = 0; 453 struct mbuf *m; 454 455 ic = request->ip_conn; 456 457 *more_neededp = false; 458 ic->ic_receive_len = 0; 459 460 len = icl_pdu_data_segment_length(request); 461 if (len == 0) 462 return (0); 463 464 if ((len % 4) != 0) 465 padding = 4 - (len % 4); 466 467 /* 468 * Account for already received parts of data segment. 469 */ 470 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 471 len -= request->ip_data_len; 472 473 if (len + padding > *availablep) { 474 /* 475 * Not enough data in the socket buffer. Receive as much 476 * as we can. Don't receive padding, since, obviously, it's 477 * not the end of data segment yet. 478 */ 479 #if 0 480 ICL_DEBUG("limited from %zd to %zd", 481 len + padding, *availablep - padding)); 482 #endif 483 len = *availablep - padding; 484 *more_neededp = true; 485 padding = 0; 486 } 487 488 /* 489 * Must not try to receive padding without at least one byte 490 * of actual data segment. 491 */ 492 if (len > 0) { 493 m = icl_conn_receive(request->ip_conn, len + padding); 494 if (m == NULL) { 495 ICL_DEBUG("failed to receive data segment"); 496 return (-1); 497 } 498 499 if (request->ip_data_mbuf == NULL) 500 request->ip_data_mbuf = m; 501 else 502 m_cat(request->ip_data_mbuf, m); 503 504 request->ip_data_len += len; 505 *availablep -= len + padding; 506 } else 507 ICL_DEBUG("len 0"); 508 509 if (*more_neededp) 510 ic->ic_receive_len = 511 icl_pdu_data_segment_receive_len(request); 512 513 return (0); 514 } 515 516 static int 517 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 518 { 519 struct mbuf *m; 520 uint32_t received_digest, valid_digest; 521 522 if (request->ip_conn->ic_data_crc32c == false) 523 return (0); 524 525 if (request->ip_data_len == 0) 526 return (0); 527 528 m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); 529 if (m == NULL) { 530 ICL_DEBUG("failed to receive data digest"); 531 return (-1); 532 } 533 534 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 535 m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); 536 m_freem(m); 537 538 *availablep -= ISCSI_DATA_DIGEST_SIZE; 539 540 /* 541 * Note that ip_data_mbuf also contains padding; since digest 542 * calculation is supposed to include that, we iterate over 543 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 544 */ 545 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 546 if (received_digest != valid_digest) { 547 ICL_WARN("data digest check failed; got 0x%x, " 548 "should be 0x%x", received_digest, valid_digest); 549 return (-1); 550 } 551 552 return (0); 553 } 554 555 /* 556 * Somewhat contrary to the name, this attempts to receive only one 557 * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 558 */ 559 static struct icl_pdu * 560 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 561 { 562 struct icl_pdu *request; 563 struct socket *so; 564 size_t len; 565 int error; 566 bool more_needed; 567 568 so = ic->ic_socket; 569 570 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 571 KASSERT(ic->ic_receive_pdu == NULL, 572 ("ic->ic_receive_pdu != NULL")); 573 request = icl_pdu_new_empty(ic, M_NOWAIT); 574 if (request == NULL) { 575 ICL_DEBUG("failed to allocate PDU; " 576 "dropping connection"); 577 icl_conn_fail(ic); 578 return (NULL); 579 } 580 ic->ic_receive_pdu = request; 581 } else { 582 KASSERT(ic->ic_receive_pdu != NULL, 583 ("ic->ic_receive_pdu == NULL")); 584 request = ic->ic_receive_pdu; 585 } 586 587 if (*availablep < ic->ic_receive_len) { 588 #if 0 589 ICL_DEBUG("not enough data; need %zd, " 590 "have %zd", ic->ic_receive_len, *availablep); 591 #endif 592 return (NULL); 593 } 594 595 switch (ic->ic_receive_state) { 596 case ICL_CONN_STATE_BHS: 597 //ICL_DEBUG("receiving BHS"); 598 error = icl_pdu_receive_bhs(request, availablep); 599 if (error != 0) { 600 ICL_DEBUG("failed to receive BHS; " 601 "dropping connection"); 602 break; 603 } 604 605 /* 606 * We don't enforce any limit for AHS length; 607 * its length is stored in 8 bit field. 608 */ 609 610 len = icl_pdu_data_segment_length(request); 611 if (len > ic->ic_max_data_segment_length) { 612 ICL_WARN("received data segment " 613 "length %zd is larger than negotiated " 614 "MaxDataSegmentLength %zd; " 615 "dropping connection", 616 len, ic->ic_max_data_segment_length); 617 error = EINVAL; 618 break; 619 } 620 621 ic->ic_receive_state = ICL_CONN_STATE_AHS; 622 ic->ic_receive_len = icl_pdu_ahs_length(request); 623 break; 624 625 case ICL_CONN_STATE_AHS: 626 //ICL_DEBUG("receiving AHS"); 627 error = icl_pdu_receive_ahs(request, availablep); 628 if (error != 0) { 629 ICL_DEBUG("failed to receive AHS; " 630 "dropping connection"); 631 break; 632 } 633 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 634 if (ic->ic_header_crc32c == false) 635 ic->ic_receive_len = 0; 636 else 637 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 638 break; 639 640 case ICL_CONN_STATE_HEADER_DIGEST: 641 //ICL_DEBUG("receiving header digest"); 642 error = icl_pdu_check_header_digest(request, availablep); 643 if (error != 0) { 644 ICL_DEBUG("header digest failed; " 645 "dropping connection"); 646 break; 647 } 648 649 ic->ic_receive_state = ICL_CONN_STATE_DATA; 650 ic->ic_receive_len = 651 icl_pdu_data_segment_receive_len(request); 652 break; 653 654 case ICL_CONN_STATE_DATA: 655 //ICL_DEBUG("receiving data segment"); 656 error = icl_pdu_receive_data_segment(request, availablep, 657 &more_needed); 658 if (error != 0) { 659 ICL_DEBUG("failed to receive data segment;" 660 "dropping connection"); 661 break; 662 } 663 664 if (more_needed) 665 break; 666 667 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 668 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 669 ic->ic_receive_len = 0; 670 else 671 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 672 break; 673 674 case ICL_CONN_STATE_DATA_DIGEST: 675 //ICL_DEBUG("receiving data digest"); 676 error = icl_pdu_check_data_digest(request, availablep); 677 if (error != 0) { 678 ICL_DEBUG("data digest failed; " 679 "dropping connection"); 680 break; 681 } 682 683 /* 684 * We've received complete PDU; reset the receive state machine 685 * and return the PDU. 686 */ 687 ic->ic_receive_state = ICL_CONN_STATE_BHS; 688 ic->ic_receive_len = sizeof(struct iscsi_bhs); 689 ic->ic_receive_pdu = NULL; 690 return (request); 691 692 default: 693 panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 694 } 695 696 if (error != 0) { 697 /* 698 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 699 * and will get freed in icl_conn_close(). 700 */ 701 icl_conn_fail(ic); 702 } 703 704 return (NULL); 705 } 706 707 static void 708 icl_conn_receive_pdus(struct icl_conn *ic, size_t available) 709 { 710 struct icl_pdu *response; 711 struct socket *so; 712 713 so = ic->ic_socket; 714 715 /* 716 * This can never happen; we're careful to only mess with ic->ic_socket 717 * pointer when the send/receive threads are not running. 718 */ 719 KASSERT(so != NULL, ("NULL socket")); 720 721 for (;;) { 722 if (ic->ic_disconnecting) 723 return; 724 725 if (so->so_error != 0) { 726 ICL_DEBUG("connection error %d; " 727 "dropping connection", so->so_error); 728 icl_conn_fail(ic); 729 return; 730 } 731 732 /* 733 * Loop until we have a complete PDU or there is not enough 734 * data in the socket buffer. 735 */ 736 if (available < ic->ic_receive_len) { 737 #if 0 738 ICL_DEBUG("not enough data; have %zd, " 739 "need %zd", available, 740 ic->ic_receive_len); 741 #endif 742 return; 743 } 744 745 response = icl_conn_receive_pdu(ic, &available); 746 if (response == NULL) 747 continue; 748 749 if (response->ip_ahs_len > 0) { 750 ICL_WARN("received PDU with unsupported " 751 "AHS; opcode 0x%x; dropping connection", 752 response->ip_bhs->bhs_opcode); 753 icl_pdu_free(response); 754 icl_conn_fail(ic); 755 return; 756 } 757 758 (ic->ic_receive)(response); 759 } 760 } 761 762 static void 763 icl_receive_thread(void *arg) 764 { 765 struct icl_conn *ic; 766 size_t available; 767 struct socket *so; 768 769 ic = arg; 770 so = ic->ic_socket; 771 772 ICL_CONN_LOCK(ic); 773 ic->ic_receive_running = true; 774 ICL_CONN_UNLOCK(ic); 775 776 for (;;) { 777 if (ic->ic_disconnecting) { 778 //ICL_DEBUG("terminating"); 779 break; 780 } 781 782 /* 783 * Set the low watermark, to be checked by 784 * soreadable() in icl_soupcall_receive() 785 * to avoid unneccessary wakeups until there 786 * is enough data received to read the PDU. 787 */ 788 SOCKBUF_LOCK(&so->so_rcv); 789 available = sbavail(&so->so_rcv); 790 if (available < ic->ic_receive_len) { 791 so->so_rcv.sb_lowat = ic->ic_receive_len; 792 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 793 } else 794 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 795 SOCKBUF_UNLOCK(&so->so_rcv); 796 797 icl_conn_receive_pdus(ic, available); 798 } 799 800 ICL_CONN_LOCK(ic); 801 ic->ic_receive_running = false; 802 cv_signal(&ic->ic_send_cv); 803 ICL_CONN_UNLOCK(ic); 804 kthread_exit(); 805 } 806 807 static int 808 icl_soupcall_receive(struct socket *so, void *arg, int waitflag) 809 { 810 struct icl_conn *ic; 811 812 if (!soreadable(so)) 813 return (SU_OK); 814 815 ic = arg; 816 cv_signal(&ic->ic_receive_cv); 817 return (SU_OK); 818 } 819 820 static int 821 icl_pdu_finalize(struct icl_pdu *request) 822 { 823 size_t padding, pdu_len; 824 uint32_t digest, zero = 0; 825 int ok; 826 struct icl_conn *ic; 827 828 ic = request->ip_conn; 829 830 icl_pdu_set_data_segment_length(request, request->ip_data_len); 831 832 pdu_len = icl_pdu_size(request); 833 834 if (ic->ic_header_crc32c) { 835 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 836 ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 837 (void *)&digest); 838 if (ok != 1) { 839 ICL_WARN("failed to append header digest"); 840 return (1); 841 } 842 } 843 844 if (request->ip_data_len != 0) { 845 padding = icl_pdu_padding(request); 846 if (padding > 0) { 847 ok = m_append(request->ip_data_mbuf, padding, 848 (void *)&zero); 849 if (ok != 1) { 850 ICL_WARN("failed to append padding"); 851 return (1); 852 } 853 } 854 855 if (ic->ic_data_crc32c) { 856 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 857 858 ok = m_append(request->ip_data_mbuf, sizeof(digest), 859 (void *)&digest); 860 if (ok != 1) { 861 ICL_WARN("failed to append data digest"); 862 return (1); 863 } 864 } 865 866 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 867 request->ip_data_mbuf = NULL; 868 } 869 870 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 871 872 return (0); 873 } 874 875 static void 876 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 877 { 878 struct icl_pdu *request, *request2; 879 struct socket *so; 880 size_t available, size, size2; 881 int coalesced, error; 882 883 ICL_CONN_LOCK_ASSERT_NOT(ic); 884 885 so = ic->ic_socket; 886 887 SOCKBUF_LOCK(&so->so_snd); 888 /* 889 * Check how much space do we have for transmit. We can't just 890 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 891 * as it always frees the mbuf chain passed to it, even in case 892 * of error. 893 */ 894 available = sbspace(&so->so_snd); 895 896 /* 897 * Notify the socket upcall that we don't need wakeups 898 * for the time being. 899 */ 900 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 901 SOCKBUF_UNLOCK(&so->so_snd); 902 903 while (!STAILQ_EMPTY(queue)) { 904 request = STAILQ_FIRST(queue); 905 size = icl_pdu_size(request); 906 if (available < size) { 907 908 /* 909 * Set the low watermark, to be checked by 910 * sowriteable() in icl_soupcall_send() 911 * to avoid unneccessary wakeups until there 912 * is enough space for the PDU to fit. 913 */ 914 SOCKBUF_LOCK(&so->so_snd); 915 available = sbspace(&so->so_snd); 916 if (available < size) { 917 #if 1 918 ICL_DEBUG("no space to send; " 919 "have %zd, need %zd", 920 available, size); 921 #endif 922 so->so_snd.sb_lowat = size; 923 SOCKBUF_UNLOCK(&so->so_snd); 924 return; 925 } 926 SOCKBUF_UNLOCK(&so->so_snd); 927 } 928 STAILQ_REMOVE_HEAD(queue, ip_next); 929 error = icl_pdu_finalize(request); 930 if (error != 0) { 931 ICL_DEBUG("failed to finalize PDU; " 932 "dropping connection"); 933 icl_conn_fail(ic); 934 icl_pdu_free(request); 935 return; 936 } 937 if (coalesce) { 938 coalesced = 1; 939 for (;;) { 940 request2 = STAILQ_FIRST(queue); 941 if (request2 == NULL) 942 break; 943 size2 = icl_pdu_size(request2); 944 if (available < size + size2) 945 break; 946 STAILQ_REMOVE_HEAD(queue, ip_next); 947 error = icl_pdu_finalize(request2); 948 if (error != 0) { 949 ICL_DEBUG("failed to finalize PDU; " 950 "dropping connection"); 951 icl_conn_fail(ic); 952 icl_pdu_free(request); 953 icl_pdu_free(request2); 954 return; 955 } 956 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 957 request2->ip_bhs_mbuf = NULL; 958 request->ip_bhs_mbuf->m_pkthdr.len += size2; 959 size += size2; 960 STAILQ_REMOVE_AFTER(queue, request, ip_next); 961 icl_pdu_free(request2); 962 coalesced++; 963 } 964 #if 0 965 if (coalesced > 1) { 966 ICL_DEBUG("coalesced %d PDUs into %zd bytes", 967 coalesced, size); 968 } 969 #endif 970 } 971 available -= size; 972 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 973 NULL, MSG_DONTWAIT, curthread); 974 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 975 if (error != 0) { 976 ICL_DEBUG("failed to send PDU, error %d; " 977 "dropping connection", error); 978 icl_conn_fail(ic); 979 icl_pdu_free(request); 980 return; 981 } 982 icl_pdu_free(request); 983 } 984 } 985 986 static void 987 icl_send_thread(void *arg) 988 { 989 struct icl_conn *ic; 990 struct icl_pdu_stailq queue; 991 992 ic = arg; 993 994 STAILQ_INIT(&queue); 995 996 ICL_CONN_LOCK(ic); 997 ic->ic_send_running = true; 998 999 for (;;) { 1000 for (;;) { 1001 /* 1002 * If the local queue is empty, populate it from 1003 * the main one. This way the icl_conn_send_pdus() 1004 * can go through all the queued PDUs without holding 1005 * any locks. 1006 */ 1007 if (STAILQ_EMPTY(&queue)) 1008 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 1009 1010 ic->ic_check_send_space = false; 1011 ICL_CONN_UNLOCK(ic); 1012 icl_conn_send_pdus(ic, &queue); 1013 ICL_CONN_LOCK(ic); 1014 1015 /* 1016 * The icl_soupcall_send() was called since the last 1017 * call to sbspace(); go around; 1018 */ 1019 if (ic->ic_check_send_space) 1020 continue; 1021 1022 /* 1023 * Local queue is empty, but we still have PDUs 1024 * in the main one; go around. 1025 */ 1026 if (STAILQ_EMPTY(&queue) && 1027 !STAILQ_EMPTY(&ic->ic_to_send)) 1028 continue; 1029 1030 /* 1031 * There might be some stuff in the local queue, 1032 * which didn't get sent due to not having enough send 1033 * space. Wait for socket upcall. 1034 */ 1035 break; 1036 } 1037 1038 if (ic->ic_disconnecting) { 1039 //ICL_DEBUG("terminating"); 1040 break; 1041 } 1042 1043 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1044 } 1045 1046 /* 1047 * We're exiting; move PDUs back to the main queue, so they can 1048 * get freed properly. At this point ordering doesn't matter. 1049 */ 1050 STAILQ_CONCAT(&ic->ic_to_send, &queue); 1051 1052 ic->ic_send_running = false; 1053 cv_signal(&ic->ic_send_cv); 1054 ICL_CONN_UNLOCK(ic); 1055 kthread_exit(); 1056 } 1057 1058 static int 1059 icl_soupcall_send(struct socket *so, void *arg, int waitflag) 1060 { 1061 struct icl_conn *ic; 1062 1063 if (!sowriteable(so)) 1064 return (SU_OK); 1065 1066 ic = arg; 1067 1068 ICL_CONN_LOCK(ic); 1069 ic->ic_check_send_space = true; 1070 ICL_CONN_UNLOCK(ic); 1071 1072 cv_signal(&ic->ic_send_cv); 1073 1074 return (SU_OK); 1075 } 1076 1077 static int 1078 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, 1079 int flags) 1080 { 1081 struct mbuf *mb, *newmb; 1082 size_t copylen, off = 0; 1083 1084 KASSERT(len > 0, ("len == 0")); 1085 1086 newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); 1087 if (newmb == NULL) { 1088 ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1089 return (ENOMEM); 1090 } 1091 1092 for (mb = newmb; mb != NULL; mb = mb->m_next) { 1093 copylen = min(M_TRAILINGSPACE(mb), len - off); 1094 memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1095 mb->m_len = copylen; 1096 off += copylen; 1097 } 1098 KASSERT(off == len, ("%s: off != len", __func__)); 1099 1100 if (request->ip_data_mbuf == NULL) { 1101 request->ip_data_mbuf = newmb; 1102 request->ip_data_len = len; 1103 } else { 1104 m_cat(request->ip_data_mbuf, newmb); 1105 request->ip_data_len += len; 1106 } 1107 1108 return (0); 1109 } 1110 1111 int 1112 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 1113 const void *addr, size_t len, int flags) 1114 { 1115 1116 return (icl_pdu_append_data(request, addr, len, flags)); 1117 } 1118 1119 static void 1120 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) 1121 { 1122 1123 m_copydata(ip->ip_data_mbuf, off, len, addr); 1124 } 1125 1126 void 1127 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 1128 size_t off, void *addr, size_t len) 1129 { 1130 1131 return (icl_pdu_get_data(ip, off, addr, len)); 1132 } 1133 1134 static void 1135 icl_pdu_queue(struct icl_pdu *ip) 1136 { 1137 struct icl_conn *ic; 1138 1139 ic = ip->ip_conn; 1140 1141 ICL_CONN_LOCK_ASSERT(ic); 1142 1143 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1144 ICL_DEBUG("icl_pdu_queue on closed connection"); 1145 icl_pdu_free(ip); 1146 return; 1147 } 1148 1149 if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1150 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1151 /* 1152 * If the queue is not empty, someone else had already 1153 * signaled the send thread; no need to do that again, 1154 * just return. 1155 */ 1156 return; 1157 } 1158 1159 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1160 cv_signal(&ic->ic_send_cv); 1161 } 1162 1163 void 1164 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 1165 { 1166 1167 icl_pdu_queue(ip); 1168 } 1169 1170 static struct icl_conn * 1171 icl_soft_new_conn(const char *name, struct mtx *lock) 1172 { 1173 struct icl_conn *ic; 1174 1175 refcount_acquire(&icl_ncons); 1176 1177 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); 1178 1179 STAILQ_INIT(&ic->ic_to_send); 1180 ic->ic_lock = lock; 1181 cv_init(&ic->ic_send_cv, "icl_tx"); 1182 cv_init(&ic->ic_receive_cv, "icl_rx"); 1183 #ifdef DIAGNOSTIC 1184 refcount_init(&ic->ic_outstanding_pdus, 0); 1185 #endif 1186 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1187 ic->ic_name = name; 1188 ic->ic_offload = "none"; 1189 1190 return (ic); 1191 } 1192 1193 void 1194 icl_soft_conn_free(struct icl_conn *ic) 1195 { 1196 1197 cv_destroy(&ic->ic_send_cv); 1198 cv_destroy(&ic->ic_receive_cv); 1199 kobj_delete((struct kobj *)ic, M_ICL_SOFT); 1200 refcount_release(&icl_ncons); 1201 } 1202 1203 static int 1204 icl_conn_start(struct icl_conn *ic) 1205 { 1206 size_t minspace; 1207 struct sockopt opt; 1208 int error, one = 1; 1209 1210 ICL_CONN_LOCK(ic); 1211 1212 /* 1213 * XXX: Ugly hack. 1214 */ 1215 if (ic->ic_socket == NULL) { 1216 ICL_CONN_UNLOCK(ic); 1217 return (EINVAL); 1218 } 1219 1220 ic->ic_receive_state = ICL_CONN_STATE_BHS; 1221 ic->ic_receive_len = sizeof(struct iscsi_bhs); 1222 ic->ic_disconnecting = false; 1223 1224 ICL_CONN_UNLOCK(ic); 1225 1226 /* 1227 * For sendspace, this is required because the current code cannot 1228 * send a PDU in pieces; thus, the minimum buffer size is equal 1229 * to the maximum PDU size. "+4" is to account for possible padding. 1230 * 1231 * What we should actually do here is to use autoscaling, but set 1232 * some minimal buffer size to "minspace". I don't know a way to do 1233 * that, though. 1234 */ 1235 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1236 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1237 if (sendspace < minspace) { 1238 ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1239 minspace); 1240 sendspace = minspace; 1241 } 1242 if (recvspace < minspace) { 1243 ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1244 minspace); 1245 recvspace = minspace; 1246 } 1247 1248 error = soreserve(ic->ic_socket, sendspace, recvspace); 1249 if (error != 0) { 1250 ICL_WARN("soreserve failed with error %d", error); 1251 icl_conn_close(ic); 1252 return (error); 1253 } 1254 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; 1255 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; 1256 1257 /* 1258 * Disable Nagle. 1259 */ 1260 bzero(&opt, sizeof(opt)); 1261 opt.sopt_dir = SOPT_SET; 1262 opt.sopt_level = IPPROTO_TCP; 1263 opt.sopt_name = TCP_NODELAY; 1264 opt.sopt_val = &one; 1265 opt.sopt_valsize = sizeof(one); 1266 error = sosetopt(ic->ic_socket, &opt); 1267 if (error != 0) { 1268 ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1269 icl_conn_close(ic); 1270 return (error); 1271 } 1272 1273 /* 1274 * Start threads. 1275 */ 1276 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1277 ic->ic_name); 1278 if (error != 0) { 1279 ICL_WARN("kthread_add(9) failed with error %d", error); 1280 icl_conn_close(ic); 1281 return (error); 1282 } 1283 1284 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1285 ic->ic_name); 1286 if (error != 0) { 1287 ICL_WARN("kthread_add(9) failed with error %d", error); 1288 icl_conn_close(ic); 1289 return (error); 1290 } 1291 1292 /* 1293 * Register socket upcall, to get notified about incoming PDUs 1294 * and free space to send outgoing ones. 1295 */ 1296 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1297 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1298 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1299 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1300 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1301 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1302 1303 return (0); 1304 } 1305 1306 int 1307 icl_soft_conn_handoff(struct icl_conn *ic, int fd) 1308 { 1309 struct file *fp; 1310 struct socket *so; 1311 cap_rights_t rights; 1312 int error; 1313 1314 ICL_CONN_LOCK_ASSERT_NOT(ic); 1315 1316 /* 1317 * Steal the socket from userland. 1318 */ 1319 error = fget(curthread, fd, 1320 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1321 if (error != 0) 1322 return (error); 1323 if (fp->f_type != DTYPE_SOCKET) { 1324 fdrop(fp, curthread); 1325 return (EINVAL); 1326 } 1327 so = fp->f_data; 1328 if (so->so_type != SOCK_STREAM) { 1329 fdrop(fp, curthread); 1330 return (EINVAL); 1331 } 1332 1333 ICL_CONN_LOCK(ic); 1334 1335 if (ic->ic_socket != NULL) { 1336 ICL_CONN_UNLOCK(ic); 1337 fdrop(fp, curthread); 1338 return (EBUSY); 1339 } 1340 1341 ic->ic_socket = fp->f_data; 1342 fp->f_ops = &badfileops; 1343 fp->f_data = NULL; 1344 fdrop(fp, curthread); 1345 ICL_CONN_UNLOCK(ic); 1346 1347 error = icl_conn_start(ic); 1348 1349 return (error); 1350 } 1351 1352 void 1353 icl_conn_close(struct icl_conn *ic) 1354 { 1355 struct icl_pdu *pdu; 1356 1357 ICL_CONN_LOCK_ASSERT_NOT(ic); 1358 1359 ICL_CONN_LOCK(ic); 1360 if (ic->ic_socket == NULL) { 1361 ICL_CONN_UNLOCK(ic); 1362 return; 1363 } 1364 1365 /* 1366 * Deregister socket upcalls. 1367 */ 1368 ICL_CONN_UNLOCK(ic); 1369 SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1370 if (ic->ic_socket->so_snd.sb_upcall != NULL) 1371 soupcall_clear(ic->ic_socket, SO_SND); 1372 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1373 SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1374 if (ic->ic_socket->so_rcv.sb_upcall != NULL) 1375 soupcall_clear(ic->ic_socket, SO_RCV); 1376 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1377 ICL_CONN_LOCK(ic); 1378 1379 ic->ic_disconnecting = true; 1380 1381 /* 1382 * Wake up the threads, so they can properly terminate. 1383 */ 1384 while (ic->ic_receive_running || ic->ic_send_running) { 1385 //ICL_DEBUG("waiting for send/receive threads to terminate"); 1386 cv_signal(&ic->ic_receive_cv); 1387 cv_signal(&ic->ic_send_cv); 1388 cv_wait(&ic->ic_send_cv, ic->ic_lock); 1389 } 1390 //ICL_DEBUG("send/receive threads terminated"); 1391 1392 ICL_CONN_UNLOCK(ic); 1393 soclose(ic->ic_socket); 1394 ICL_CONN_LOCK(ic); 1395 ic->ic_socket = NULL; 1396 1397 if (ic->ic_receive_pdu != NULL) { 1398 //ICL_DEBUG("freeing partially received PDU"); 1399 icl_pdu_free(ic->ic_receive_pdu); 1400 ic->ic_receive_pdu = NULL; 1401 } 1402 1403 /* 1404 * Remove any outstanding PDUs from the send queue. 1405 */ 1406 while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1407 pdu = STAILQ_FIRST(&ic->ic_to_send); 1408 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1409 icl_pdu_free(pdu); 1410 } 1411 1412 KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1413 ("destroying session with non-empty send queue")); 1414 #ifdef DIAGNOSTIC 1415 KASSERT(ic->ic_outstanding_pdus == 0, 1416 ("destroying session with %d outstanding PDUs", 1417 ic->ic_outstanding_pdus)); 1418 #endif 1419 ICL_CONN_UNLOCK(ic); 1420 } 1421 1422 void 1423 icl_soft_conn_close(struct icl_conn *ic) 1424 { 1425 1426 icl_conn_close(ic); 1427 } 1428 1429 bool 1430 icl_soft_conn_connected(struct icl_conn *ic) 1431 { 1432 ICL_CONN_LOCK_ASSERT_NOT(ic); 1433 1434 ICL_CONN_LOCK(ic); 1435 if (ic->ic_socket == NULL) { 1436 ICL_CONN_UNLOCK(ic); 1437 return (false); 1438 } 1439 if (ic->ic_socket->so_error != 0) { 1440 ICL_CONN_UNLOCK(ic); 1441 return (false); 1442 } 1443 ICL_CONN_UNLOCK(ic); 1444 return (true); 1445 } 1446 1447 static int 1448 icl_soft_limits(size_t *limitp) 1449 { 1450 1451 *limitp = 128 * 1024; 1452 1453 return (0); 1454 } 1455 1456 #ifdef ICL_KERNEL_PROXY 1457 int 1458 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) 1459 { 1460 int error; 1461 1462 ICL_CONN_LOCK_ASSERT_NOT(ic); 1463 1464 if (so->so_type != SOCK_STREAM) 1465 return (EINVAL); 1466 1467 ICL_CONN_LOCK(ic); 1468 if (ic->ic_socket != NULL) { 1469 ICL_CONN_UNLOCK(ic); 1470 return (EBUSY); 1471 } 1472 ic->ic_socket = so; 1473 ICL_CONN_UNLOCK(ic); 1474 1475 error = icl_conn_start(ic); 1476 1477 return (error); 1478 } 1479 #endif /* ICL_KERNEL_PROXY */ 1480 1481 static int 1482 icl_soft_load(void) 1483 { 1484 int error; 1485 1486 icl_pdu_zone = uma_zcreate("icl_pdu", 1487 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, 1488 UMA_ALIGN_PTR, 0); 1489 refcount_init(&icl_ncons, 0); 1490 1491 /* 1492 * The reason we call this "none" is that to the user, 1493 * it's known as "offload driver"; "offload driver: soft" 1494 * doesn't make much sense. 1495 */ 1496 error = icl_register("none", 0, icl_soft_limits, icl_soft_new_conn); 1497 KASSERT(error == 0, ("failed to register")); 1498 1499 return (error); 1500 } 1501 1502 static int 1503 icl_soft_unload(void) 1504 { 1505 1506 if (icl_ncons != 0) 1507 return (EBUSY); 1508 1509 icl_unregister("none"); 1510 1511 uma_zdestroy(icl_pdu_zone); 1512 1513 return (0); 1514 } 1515 1516 static int 1517 icl_soft_modevent(module_t mod, int what, void *arg) 1518 { 1519 1520 switch (what) { 1521 case MOD_LOAD: 1522 return (icl_soft_load()); 1523 case MOD_UNLOAD: 1524 return (icl_soft_unload()); 1525 default: 1526 return (EINVAL); 1527 } 1528 } 1529 1530 moduledata_t icl_soft_data = { 1531 "icl_soft", 1532 icl_soft_modevent, 1533 0 1534 }; 1535 1536 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 1537 MODULE_DEPEND(icl_soft, icl, 1, 1, 1); 1538 MODULE_VERSION(icl_soft, 1); 1539