1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/conf.h> 27 #include <sys/stat.h> 28 #include <sys/file.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/modctl.h> 32 #include <sys/priv.h> 33 #include <sys/cpuvar.h> 34 #include <sys/socket.h> 35 #include <sys/strsubr.h> 36 #include <sys/sysmacros.h> 37 #include <sys/sdt.h> 38 #include <netinet/tcp.h> 39 #include <inet/tcp.h> 40 #include <sys/socketvar.h> 41 #include <sys/pathname.h> 42 #include <sys/fs/snode.h> 43 #include <sys/fs/dv_node.h> 44 #include <sys/vnode.h> 45 #include <netinet/in.h> 46 #include <net/if.h> 47 #include <sys/sockio.h> 48 #include <sys/ksocket.h> 49 #include <sys/iscsi_protocol.h> 50 #include <sys/idm/idm.h> 51 #include <sys/idm/idm_so.h> 52 #include <sys/idm/idm_text.h> 53 54 #define IN_PROGRESS_DELAY 1 55 56 /* 57 * in6addr_any is currently all zeroes, but use the macro in case this 58 * ever changes. 59 */ 60 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; 61 62 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 63 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 64 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 65 66 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so); 67 static void idm_so_conn_destroy_common(idm_conn_t *ic); 68 static void idm_so_conn_connect_common(idm_conn_t *ic); 69 70 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc); 71 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc); 72 static void idm_set_tgt_connect_options(ksocket_t so); 73 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu); 74 75 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu); 76 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, 77 idm_buf_t *idb, uint32_t offset, uint32_t length); 78 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb); 79 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, 80 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length); 81 82 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, 83 uint32_t ro, uint32_t dlength); 84 85 static idm_status_t idm_so_handle_digest(idm_conn_t *it, 86 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx); 87 88 static void idm_so_socket_set_nonblock(struct sonode *node); 89 static void idm_so_socket_set_block(struct sonode *node); 90 91 /* 92 * Transport ops prototypes 93 */ 94 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu); 95 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 96 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 97 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu); 98 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu); 99 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu); 100 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt); 101 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it, 102 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 103 static void idm_so_notice_key_values(idm_conn_t *it, 104 nvlist_t *negotiated_nvl); 105 static kv_status_t idm_so_declare_key_values(idm_conn_t *it, 106 nvlist_t *config_nvl, nvlist_t *outgoing_nvl); 107 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic, 108 idm_transport_caps_t *caps); 109 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen); 110 static void idm_so_buf_free(idm_buf_t *idb); 111 static idm_status_t idm_so_buf_setup(idm_buf_t *idb); 112 static void idm_so_buf_teardown(idm_buf_t *idb); 113 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is); 114 static void idm_so_tgt_svc_destroy(idm_svc_t *is); 115 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is); 116 static void idm_so_tgt_svc_offline(idm_svc_t *is); 117 static void idm_so_tgt_conn_destroy(idm_conn_t *ic); 118 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic); 119 static void idm_so_conn_disconnect(idm_conn_t *ic); 120 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic); 121 static void idm_so_ini_conn_destroy(idm_conn_t *ic); 122 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic); 123 124 /* 125 * IDM Native Sockets transport operations 126 */ 127 static 128 idm_transport_ops_t idm_so_transport_ops = { 129 idm_so_tx, /* it_tx_pdu */ 130 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */ 131 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */ 132 idm_so_rx_datain, /* it_rx_datain */ 133 idm_so_rx_rtt, /* it_rx_rtt */ 134 idm_so_rx_dataout, /* it_rx_dataout */ 135 NULL, /* it_alloc_conn_rsrc */ 136 NULL, /* it_free_conn_rsrc */ 137 NULL, /* it_tgt_enable_datamover */ 138 NULL, /* it_ini_enable_datamover */ 139 NULL, /* it_conn_terminate */ 140 idm_so_free_task_rsrc, /* it_free_task_rsrc */ 141 idm_so_negotiate_key_values, /* it_negotiate_key_values */ 142 idm_so_notice_key_values, /* it_notice_key_values */ 143 idm_so_conn_is_capable, /* it_conn_is_capable */ 144 idm_so_buf_alloc, /* it_buf_alloc */ 145 idm_so_buf_free, /* it_buf_free */ 146 idm_so_buf_setup, /* it_buf_setup */ 147 idm_so_buf_teardown, /* it_buf_teardown */ 148 idm_so_tgt_svc_create, /* it_tgt_svc_create */ 149 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */ 150 idm_so_tgt_svc_online, /* it_tgt_svc_online */ 151 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */ 152 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */ 153 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */ 154 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */ 155 idm_so_ini_conn_create, /* it_ini_conn_create */ 156 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */ 157 idm_so_ini_conn_connect, /* it_ini_conn_connect */ 158 idm_so_conn_disconnect, /* it_ini_conn_disconnect */ 159 idm_so_declare_key_values /* it_declare_key_values */ 160 }; 161 162 /* 163 * idm_so_init() 164 * Sockets transport initialization 165 */ 166 void 167 idm_so_init(idm_transport_t *it) 168 { 169 /* Cache for IDM Data and R2T Transmit PDU's */ 170 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache", 171 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8, 172 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 173 174 /* Cache for IDM Receive PDU's */ 175 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache", 176 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8, 177 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 178 179 /* 128k buffer cache */ 180 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache", 181 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 182 183 /* Set the sockets transport ops */ 184 it->it_ops = &idm_so_transport_ops; 185 } 186 187 /* 188 * idm_so_fini() 189 * Sockets transport teardown 190 */ 191 void 192 idm_so_fini(void) 193 { 194 kmem_cache_destroy(idm.idm_so_128k_buf_cache); 195 kmem_cache_destroy(idm.idm_sotx_pdu_cache); 196 kmem_cache_destroy(idm.idm_sorx_pdu_cache); 197 } 198 199 ksocket_t 200 idm_socreate(int domain, int type, int protocol) 201 { 202 ksocket_t ks; 203 204 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP, 205 CRED())) { 206 return (ks); 207 } else { 208 return (NULL); 209 } 210 } 211 212 /* 213 * idm_soshutdown will disconnect the socket and prevent subsequent PDU 214 * reception and transmission. The sonode still exists but its state 215 * gets modified to indicate it is no longer connected. Calls to 216 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used 217 * regain control of a thread stuck in idm_sorecv. 218 */ 219 void 220 idm_soshutdown(ksocket_t so) 221 { 222 (void) ksocket_shutdown(so, SHUT_RDWR, CRED()); 223 } 224 225 /* 226 * idm_sodestroy releases all resources associated with a socket previously 227 * created with idm_socreate. The socket must be shutdown using 228 * idm_soshutdown before the socket is destroyed with idm_sodestroy, 229 * otherwise undefined behavior will result. 230 */ 231 void 232 idm_sodestroy(ksocket_t ks) 233 { 234 (void) ksocket_close(ks, CRED()); 235 } 236 237 /* 238 * Function to compare two addresses in sockaddr_storage format 239 */ 240 241 int 242 idm_ss_compare(const struct sockaddr_storage *cmp_ss1, 243 const struct sockaddr_storage *cmp_ss2, 244 boolean_t v4_mapped_as_v4) 245 { 246 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2; 247 const struct sockaddr_storage *ss1, *ss2; 248 struct in_addr *in1, *in2; 249 struct in6_addr *in61, *in62; 250 int i; 251 252 /* 253 * Normalize V4-mapped IPv6 addresses into V4 format if 254 * v4_mapped_as_v4 is B_TRUE. 255 */ 256 ss1 = cmp_ss1; 257 ss2 = cmp_ss2; 258 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) { 259 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 260 if (IN6_IS_ADDR_V4MAPPED(in61)) { 261 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1)); 262 mapped_v4_ss1.ss_family = AF_INET; 263 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port = 264 ((struct sockaddr_in *)ss1)->sin_port; 265 IN6_V4MAPPED_TO_INADDR(in61, 266 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr); 267 ss1 = &mapped_v4_ss1; 268 } 269 } 270 ss2 = cmp_ss2; 271 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) { 272 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 273 if (IN6_IS_ADDR_V4MAPPED(in62)) { 274 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2)); 275 mapped_v4_ss2.ss_family = AF_INET; 276 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port = 277 ((struct sockaddr_in *)ss2)->sin_port; 278 IN6_V4MAPPED_TO_INADDR(in62, 279 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr); 280 ss2 = &mapped_v4_ss2; 281 } 282 } 283 284 /* 285 * Compare ports, then address family, then ip address 286 */ 287 if (((struct sockaddr_in *)ss1)->sin_port != 288 ((struct sockaddr_in *)ss2)->sin_port) { 289 if (((struct sockaddr_in *)ss1)->sin_port > 290 ((struct sockaddr_in *)ss2)->sin_port) 291 return (1); 292 else 293 return (-1); 294 } 295 296 /* 297 * ports are the same 298 */ 299 if (ss1->ss_family != ss2->ss_family) { 300 if (ss1->ss_family == AF_INET) 301 return (1); 302 else 303 return (-1); 304 } 305 306 /* 307 * address families are the same 308 */ 309 if (ss1->ss_family == AF_INET) { 310 in1 = &((struct sockaddr_in *)ss1)->sin_addr; 311 in2 = &((struct sockaddr_in *)ss2)->sin_addr; 312 313 if (in1->s_addr > in2->s_addr) 314 return (1); 315 else if (in1->s_addr < in2->s_addr) 316 return (-1); 317 else 318 return (0); 319 } else if (ss1->ss_family == AF_INET6) { 320 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 321 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 322 323 for (i = 0; i < 4; i++) { 324 if (in61->s6_addr32[i] > in62->s6_addr32[i]) 325 return (1); 326 else if (in61->s6_addr32[i] < in62->s6_addr32[i]) 327 return (-1); 328 } 329 return (0); 330 } 331 332 return (1); 333 } 334 335 /* 336 * IP address filter functions to flag addresses that should not 337 * go out to initiators through discovery. 338 */ 339 static boolean_t 340 idm_v4_addr_okay(struct in_addr *in_addr) 341 { 342 in_addr_t addr = ntohl(in_addr->s_addr); 343 344 if ((INADDR_NONE == addr) || 345 (IN_MULTICAST(addr)) || 346 ((addr >> IN_CLASSA_NSHIFT) == 0) || 347 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { 348 return (B_FALSE); 349 } 350 return (B_TRUE); 351 } 352 353 static boolean_t 354 idm_v6_addr_okay(struct in6_addr *addr6) 355 { 356 357 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) || 358 (IN6_IS_ADDR_LOOPBACK(addr6)) || 359 (IN6_IS_ADDR_MULTICAST(addr6)) || 360 (IN6_IS_ADDR_V4MAPPED(addr6)) || 361 (IN6_IS_ADDR_V4COMPAT(addr6)) || 362 (IN6_IS_ADDR_LINKLOCAL(addr6))) { 363 return (B_FALSE); 364 } 365 return (B_TRUE); 366 } 367 368 /* 369 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is 370 * configured with by sending down a sequence of kernel ioctl to IP STREAMS. 371 */ 372 int 373 idm_get_ipaddr(idm_addr_list_t **ipaddr_p) 374 { 375 ksocket_t so4, so6; 376 struct lifnum lifn; 377 struct lifconf lifc; 378 struct lifreq *lp; 379 int rval; 380 int numifs; 381 int bufsize; 382 void *buf; 383 int i, j, n, rc; 384 struct sockaddr_storage ss; 385 struct sockaddr_in *sin; 386 struct sockaddr_in6 *sin6; 387 idm_addr_t *ip; 388 idm_addr_list_t *ipaddr; 389 int size_ipaddr; 390 391 *ipaddr_p = NULL; 392 size_ipaddr = 0; 393 buf = NULL; 394 395 /* create an ipv4 and ipv6 UDP socket */ 396 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL) 397 return (0); 398 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) { 399 idm_sodestroy(so6); 400 return (0); 401 } 402 403 404 retry_count: 405 /* snapshot the current number of interfaces */ 406 lifn.lifn_family = PF_UNSPEC; 407 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 408 lifn.lifn_count = 0; 409 /* use vp6 for ioctls with unspecified families by default */ 410 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED()) 411 != 0) { 412 goto cleanup; 413 } 414 415 numifs = lifn.lifn_count; 416 if (numifs <= 0) { 417 goto cleanup; 418 } 419 420 /* allocate extra room in case more interfaces appear */ 421 numifs += 10; 422 423 /* get the interface names and ip addresses */ 424 bufsize = numifs * sizeof (struct lifreq); 425 buf = kmem_alloc(bufsize, KM_SLEEP); 426 427 lifc.lifc_family = AF_UNSPEC; 428 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 429 lifc.lifc_len = bufsize; 430 lifc.lifc_buf = buf; 431 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED()); 432 if (rc != 0) { 433 goto cleanup; 434 } 435 /* if our extra room is used up, try again */ 436 if (bufsize <= lifc.lifc_len) { 437 kmem_free(buf, bufsize); 438 buf = NULL; 439 goto retry_count; 440 } 441 /* calc actual number of ifconfs */ 442 n = lifc.lifc_len / sizeof (struct lifreq); 443 444 /* get ip address */ 445 if (n > 0) { 446 size_ipaddr = sizeof (idm_addr_list_t) + 447 (n - 1) * sizeof (idm_addr_t); 448 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP); 449 } else { 450 goto cleanup; 451 } 452 453 /* 454 * Examine the array of interfaces and filter uninteresting ones 455 */ 456 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 457 458 /* 459 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive 460 */ 461 ss = lp->lifr_addr; 462 /* 463 * fetch the flags using the socket of the correct family 464 */ 465 switch (ss.ss_family) { 466 case AF_INET: 467 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp, 468 &rval, CRED()); 469 break; 470 case AF_INET6: 471 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp, 472 &rval, CRED()); 473 break; 474 default: 475 continue; 476 } 477 if (rc == 0) { 478 /* 479 * If we got the flags, skip uninteresting 480 * interfaces based on flags 481 */ 482 if ((lp->lifr_flags & IFF_UP) != IFF_UP) 483 continue; 484 if (lp->lifr_flags & 485 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 486 continue; 487 } 488 489 /* save ip address */ 490 ip = &ipaddr->al_addrs[j]; 491 switch (ss.ss_family) { 492 case AF_INET: 493 sin = (struct sockaddr_in *)&ss; 494 if (!idm_v4_addr_okay(&sin->sin_addr)) 495 continue; 496 ip->a_addr.i_addr.in4 = sin->sin_addr; 497 ip->a_addr.i_insize = sizeof (struct in_addr); 498 break; 499 case AF_INET6: 500 sin6 = (struct sockaddr_in6 *)&ss; 501 if (!idm_v6_addr_okay(&sin6->sin6_addr)) 502 continue; 503 ip->a_addr.i_addr.in6 = sin6->sin6_addr; 504 ip->a_addr.i_insize = sizeof (struct in6_addr); 505 break; 506 default: 507 continue; 508 } 509 j++; 510 } 511 512 if (j == 0) { 513 /* no valid ifaddr */ 514 kmem_free(ipaddr, size_ipaddr); 515 size_ipaddr = 0; 516 ipaddr = NULL; 517 } else { 518 ipaddr->al_out_cnt = j; 519 } 520 521 522 cleanup: 523 idm_sodestroy(so6); 524 idm_sodestroy(so4); 525 526 if (buf != NULL) 527 kmem_free(buf, bufsize); 528 529 *ipaddr_p = ipaddr; 530 return (size_ipaddr); 531 } 532 533 int 534 idm_sorecv(ksocket_t so, void *msg, size_t len) 535 { 536 iovec_t iov; 537 538 ASSERT(so != NULL); 539 ASSERT(len != 0); 540 541 /* 542 * Fill in iovec and receive data 543 */ 544 iov.iov_base = msg; 545 iov.iov_len = len; 546 547 return (idm_iov_sorecv(so, &iov, 1, len)); 548 } 549 550 /* 551 * idm_sosendto - Sends a buffered data on a non-connected socket. 552 * 553 * This function puts the data provided on the wire by calling sosendmsg. 554 * It will return only when all the data has been sent or if an error 555 * occurs. 556 * 557 * Returns 0 for success, the socket errno value if sosendmsg fails, and 558 * -1 if sosendmsg returns success but uio_resid != 0 559 */ 560 int 561 idm_sosendto(ksocket_t so, void *buff, size_t len, 562 struct sockaddr *name, socklen_t namelen) 563 { 564 struct msghdr msg; 565 struct iovec iov[1]; 566 int error; 567 size_t sent = 0; 568 569 iov[0].iov_base = buff; 570 iov[0].iov_len = len; 571 572 /* Initialization of the message header. */ 573 bzero(&msg, sizeof (msg)); 574 msg.msg_iov = iov; 575 msg.msg_iovlen = 1; 576 msg.msg_name = name; 577 msg.msg_namelen = namelen; 578 579 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) { 580 /* Data sent */ 581 if (sent == len) { 582 /* All data sent. Success. */ 583 return (0); 584 } else { 585 /* Not all data was sent. Failure */ 586 return (-1); 587 } 588 } 589 590 /* Send failed */ 591 return (error); 592 } 593 594 /* 595 * idm_iov_sosend - Sends an iovec on a connection. 596 * 597 * This function puts the data provided on the wire by calling sosendmsg. 598 * It will return only when all the data has been sent or if an error 599 * occurs. 600 * 601 * Returns 0 for success, the socket errno value if sosendmsg fails, and 602 * -1 if sosendmsg returns success but uio_resid != 0 603 */ 604 int 605 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 606 { 607 struct msghdr msg; 608 int error; 609 size_t sent = 0; 610 611 ASSERT(iop != NULL); 612 613 /* Initialization of the message header. */ 614 bzero(&msg, sizeof (msg)); 615 msg.msg_iov = iop; 616 msg.msg_iovlen = iovlen; 617 618 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) 619 == 0) { 620 /* Data sent */ 621 if (sent == total_len) { 622 /* All data sent. Success. */ 623 return (0); 624 } else { 625 /* Not all data was sent. Failure */ 626 return (-1); 627 } 628 } 629 630 /* Send failed */ 631 return (error); 632 } 633 634 /* 635 * idm_iov_sorecv - Receives an iovec from a connection 636 * 637 * This function gets the data asked for from the socket. It will return 638 * only when all the requested data has been retrieved or if an error 639 * occurs. 640 * 641 * Returns 0 for success, the socket errno value if sorecvmsg fails, and 642 * -1 if sorecvmsg returns success but uio_resid != 0 643 */ 644 int 645 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 646 { 647 struct msghdr msg; 648 int error; 649 size_t recv; 650 int flags; 651 652 ASSERT(iop != NULL); 653 654 /* Initialization of the message header. */ 655 bzero(&msg, sizeof (msg)); 656 msg.msg_iov = iop; 657 msg.msg_iovlen = iovlen; 658 flags = MSG_WAITALL; 659 660 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED())) 661 == 0) { 662 /* Received data */ 663 if (recv == total_len) { 664 /* All requested data received. Success */ 665 return (0); 666 } else { 667 /* 668 * Not all data was received. The connection has 669 * probably failed. 670 */ 671 return (-1); 672 } 673 } 674 675 /* Receive failed */ 676 return (error); 677 } 678 679 static void 680 idm_set_ini_preconnect_options(idm_so_conn_t *sc) 681 { 682 int conn_abort = 10000; 683 int conn_notify = 2000; 684 int abort = 30000; 685 686 /* Pre-connect socket options */ 687 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 688 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int), 689 CRED()); 690 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 691 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int), 692 CRED()); 693 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD, 694 (char *)&abort, sizeof (int), CRED()); 695 } 696 697 static void 698 idm_set_ini_postconnect_options(idm_so_conn_t *sc) 699 { 700 int32_t rcvbuf = IDM_RCVBUF_SIZE; 701 int32_t sndbuf = IDM_SNDBUF_SIZE; 702 const int on = 1; 703 704 /* Set postconnect options */ 705 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY, 706 (char *)&on, sizeof (int), CRED()); 707 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF, 708 (char *)&rcvbuf, sizeof (int), CRED()); 709 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF, 710 (char *)&sndbuf, sizeof (int), CRED()); 711 } 712 713 static void 714 idm_set_tgt_connect_options(ksocket_t ks) 715 { 716 int32_t rcvbuf = IDM_RCVBUF_SIZE; 717 int32_t sndbuf = IDM_SNDBUF_SIZE; 718 const int on = 1; 719 720 /* Set connect options */ 721 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF, 722 (char *)&rcvbuf, sizeof (int), CRED()); 723 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF, 724 (char *)&sndbuf, sizeof (int), CRED()); 725 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY, 726 (char *)&on, sizeof (on), CRED()); 727 } 728 729 static uint32_t 730 n2h24(const uchar_t *ptr) 731 { 732 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 733 } 734 735 736 static idm_status_t 737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) 738 { 739 iscsi_hdr_t *bhs; 740 uint32_t hdr_digest_crc; 741 uint32_t crc_calculated; 742 void *new_hdr; 743 int ahslen = 0; 744 int total_len = 0; 745 int iovlen = 0; 746 struct iovec iov[2]; 747 idm_so_conn_t *so_conn; 748 int rc; 749 750 so_conn = ic->ic_transport_private; 751 752 /* 753 * Read BHS 754 */ 755 bhs = pdu->isp_hdr; 756 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t)); 757 if (rc != IDM_STATUS_SUCCESS) { 758 return (IDM_STATUS_FAIL); 759 } 760 761 /* 762 * Check actual AHS length against the amount available in the buffer 763 */ 764 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 765 (bhs->hlength * sizeof (uint32_t)); 766 pdu->isp_datalen = n2h24(bhs->dlength); 767 if (ic->ic_conn_type == CONN_TYPE_TGT && 768 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) { 769 IDM_CONN_LOG(CE_WARN, 770 "idm_sorecvhdr: exceeded the max data segment length"); 771 return (IDM_STATUS_FAIL); 772 } 773 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) { 774 /* Allocate a new header segment and change the callback */ 775 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP); 776 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t)); 777 pdu->isp_hdr = new_hdr; 778 pdu->isp_flags |= IDM_PDU_ADDL_HDR; 779 780 /* 781 * This callback will restore the expected values after 782 * the RX PDU has been processed. 783 */ 784 pdu->isp_callback = idm_sorx_addl_pdu_cb; 785 } 786 787 /* 788 * Setup receipt of additional header and header digest (if enabled). 789 */ 790 if (bhs->hlength > 0) { 791 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1); 792 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t); 793 iov[iovlen].iov_len = ahslen; 794 total_len += iov[iovlen].iov_len; 795 iovlen++; 796 } 797 798 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 799 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 800 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 801 total_len += iov[iovlen].iov_len; 802 iovlen++; 803 } 804 805 if ((iovlen != 0) && 806 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen, 807 total_len) != 0)) { 808 return (IDM_STATUS_FAIL); 809 } 810 811 /* 812 * Validate header digest if enabled 813 */ 814 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 815 crc_calculated = idm_crc32c(pdu->isp_hdr, 816 sizeof (iscsi_hdr_t) + ahslen); 817 if (crc_calculated != hdr_digest_crc) { 818 /* Invalid Header Digest */ 819 return (IDM_STATUS_HEADER_DIGEST); 820 } 821 } 822 823 return (0); 824 } 825 826 /* 827 * idm_so_ini_conn_create() 828 * Allocate the sockets transport connection resources. 829 */ 830 static idm_status_t 831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 832 { 833 ksocket_t so; 834 idm_so_conn_t *so_conn; 835 idm_status_t idmrc; 836 837 so = idm_socreate(cr->cr_domain, cr->cr_type, 838 cr->cr_protocol); 839 if (so == NULL) { 840 return (IDM_STATUS_FAIL); 841 } 842 843 /* Bind the socket if configured to do so */ 844 if (cr->cr_bound) { 845 if (ksocket_bind(so, &cr->cr_bound_addr.sin, 846 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) { 847 idm_sodestroy(so); 848 return (IDM_STATUS_FAIL); 849 } 850 } 851 852 idmrc = idm_so_conn_create_common(ic, so); 853 if (idmrc != IDM_STATUS_SUCCESS) { 854 idm_soshutdown(so); 855 idm_sodestroy(so); 856 return (IDM_STATUS_FAIL); 857 } 858 859 so_conn = ic->ic_transport_private; 860 /* Set up socket options */ 861 idm_set_ini_preconnect_options(so_conn); 862 863 return (IDM_STATUS_SUCCESS); 864 } 865 866 /* 867 * idm_so_ini_conn_destroy() 868 * Tear down the sockets transport connection resources. 869 */ 870 static void 871 idm_so_ini_conn_destroy(idm_conn_t *ic) 872 { 873 idm_so_conn_destroy_common(ic); 874 } 875 876 /* 877 * idm_so_ini_conn_connect() 878 * Establish the connection referred to by the handle previously allocated via 879 * idm_so_ini_conn_create(). 880 */ 881 static idm_status_t 882 idm_so_ini_conn_connect(idm_conn_t *ic) 883 { 884 idm_so_conn_t *so_conn; 885 struct sonode *node = NULL; 886 int rc; 887 clock_t lbolt, conn_login_max, conn_login_interval; 888 boolean_t nonblock; 889 890 so_conn = ic->ic_transport_private; 891 nonblock = ic->ic_conn_params.nonblock_socket; 892 conn_login_max = ic->ic_conn_params.conn_login_max; 893 conn_login_interval = ddi_get_lbolt() + 894 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 895 896 if (nonblock == B_TRUE) { 897 node = ((struct sonode *)(so_conn->ic_so)); 898 /* Set to none block socket mode */ 899 idm_so_socket_set_nonblock(node); 900 do { 901 rc = ksocket_connect(so_conn->ic_so, 902 &ic->ic_ini_dst_addr.sin, 903 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 904 CRED()); 905 if (rc == 0 || rc == EISCONN) { 906 /* socket success or already success */ 907 rc = IDM_STATUS_SUCCESS; 908 break; 909 } 910 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) || 911 (rc == ECONNRESET)) { 912 /* socket connection timeout or refuse */ 913 break; 914 } 915 lbolt = ddi_get_lbolt(); 916 if (lbolt > conn_login_max) { 917 /* 918 * Connection retry timeout, 919 * failed connect to target. 920 */ 921 break; 922 } 923 if (lbolt < conn_login_interval) { 924 if ((rc == EINPROGRESS) || (rc == EALREADY)) { 925 /* TCP connect still in progress */ 926 delay(SEC_TO_TICK(IN_PROGRESS_DELAY)); 927 continue; 928 } else { 929 delay(conn_login_interval - lbolt); 930 } 931 } 932 conn_login_interval = ddi_get_lbolt() + 933 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 934 } while (rc != 0); 935 /* resume to nonblock mode */ 936 if (rc == IDM_STATUS_SUCCESS) { 937 idm_so_socket_set_block(node); 938 } 939 } else { 940 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin, 941 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()); 942 } 943 944 if (rc != 0) { 945 idm_soshutdown(so_conn->ic_so); 946 return (IDM_STATUS_FAIL); 947 } 948 949 idm_so_conn_connect_common(ic); 950 951 idm_set_ini_postconnect_options(so_conn); 952 953 return (IDM_STATUS_SUCCESS); 954 } 955 956 idm_status_t 957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so) 958 { 959 idm_status_t idmrc; 960 961 idmrc = idm_so_conn_create_common(ic, new_so); 962 963 return (idmrc); 964 } 965 966 static void 967 idm_so_tgt_conn_destroy(idm_conn_t *ic) 968 { 969 idm_so_conn_destroy_common(ic); 970 } 971 972 /* 973 * idm_so_tgt_conn_connect() 974 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 975 * is invoked from the SM as a result of an inbound connection request. 976 */ 977 static idm_status_t 978 idm_so_tgt_conn_connect(idm_conn_t *ic) 979 { 980 idm_so_conn_connect_common(ic); 981 982 return (IDM_STATUS_SUCCESS); 983 } 984 985 static idm_status_t 986 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so) 987 { 988 idm_so_conn_t *so_conn; 989 990 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP); 991 so_conn->ic_so = new_so; 992 993 ic->ic_transport_private = so_conn; 994 ic->ic_transport_hdrlen = 0; 995 996 /* Set the scoreboarding flag on this connection */ 997 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD; 998 ic->ic_conn_params.max_recv_dataseglen = 999 ISCSI_DEFAULT_MAX_RECV_SEG_LEN; 1000 ic->ic_conn_params.max_xmit_dataseglen = 1001 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN; 1002 1003 /* 1004 * Initialize tx thread mutex and list 1005 */ 1006 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL); 1007 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL); 1008 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t), 1009 offsetof(idm_pdu_t, idm_tx_link)); 1010 1011 return (IDM_STATUS_SUCCESS); 1012 } 1013 1014 static void 1015 idm_so_conn_destroy_common(idm_conn_t *ic) 1016 { 1017 idm_so_conn_t *so_conn = ic->ic_transport_private; 1018 1019 ic->ic_transport_private = NULL; 1020 idm_sodestroy(so_conn->ic_so); 1021 list_destroy(&so_conn->ic_tx_list); 1022 mutex_destroy(&so_conn->ic_tx_mutex); 1023 cv_destroy(&so_conn->ic_tx_cv); 1024 1025 kmem_free(so_conn, sizeof (idm_so_conn_t)); 1026 } 1027 1028 static void 1029 idm_so_conn_connect_common(idm_conn_t *ic) 1030 { 1031 idm_so_conn_t *so_conn; 1032 struct sockaddr_in6 t_addr; 1033 socklen_t t_addrlen = 0; 1034 1035 so_conn = ic->ic_transport_private; 1036 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1037 t_addrlen = sizeof (struct sockaddr_in6); 1038 1039 /* Set the local and remote addresses in the idm conn handle */ 1040 ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr, 1041 &t_addrlen, CRED()); 1042 bcopy(&t_addr, &ic->ic_laddr, t_addrlen); 1043 ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr, 1044 &t_addrlen, CRED()); 1045 bcopy(&t_addr, &ic->ic_raddr, t_addrlen); 1046 1047 mutex_enter(&ic->ic_mutex); 1048 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0, 1049 &p0, TS_RUN, minclsyspri); 1050 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0, 1051 &p0, TS_RUN, minclsyspri); 1052 1053 while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running) 1054 cv_wait(&ic->ic_cv, &ic->ic_mutex); 1055 mutex_exit(&ic->ic_mutex); 1056 } 1057 1058 /* 1059 * idm_so_conn_disconnect() 1060 * Shutdown the socket connection and stop the thread 1061 */ 1062 static void 1063 idm_so_conn_disconnect(idm_conn_t *ic) 1064 { 1065 idm_so_conn_t *so_conn; 1066 1067 so_conn = ic->ic_transport_private; 1068 1069 mutex_enter(&ic->ic_mutex); 1070 so_conn->ic_rx_thread_running = B_FALSE; 1071 so_conn->ic_tx_thread_running = B_FALSE; 1072 /* We need to wakeup the TX thread */ 1073 mutex_enter(&so_conn->ic_tx_mutex); 1074 cv_signal(&so_conn->ic_tx_cv); 1075 mutex_exit(&so_conn->ic_tx_mutex); 1076 mutex_exit(&ic->ic_mutex); 1077 1078 /* This should wakeup the RX thread if it is sleeping */ 1079 idm_soshutdown(so_conn->ic_so); 1080 1081 thread_join(so_conn->ic_tx_thread_did); 1082 thread_join(so_conn->ic_rx_thread_did); 1083 } 1084 1085 /* 1086 * idm_so_tgt_svc_create() 1087 * Establish a service on an IP address and port. idm_svc_req_t contains 1088 * the service parameters. 1089 */ 1090 /*ARGSUSED*/ 1091 static idm_status_t 1092 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 1093 { 1094 idm_so_svc_t *so_svc; 1095 1096 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP); 1097 1098 /* Set the new sockets service in svc handle */ 1099 is->is_so_svc = (void *)so_svc; 1100 1101 return (IDM_STATUS_SUCCESS); 1102 } 1103 1104 /* 1105 * idm_so_tgt_svc_destroy() 1106 * Teardown sockets resources allocated in idm_so_tgt_svc_create() 1107 */ 1108 static void 1109 idm_so_tgt_svc_destroy(idm_svc_t *is) 1110 { 1111 /* the socket will have been torn down; free the service */ 1112 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t)); 1113 } 1114 1115 /* 1116 * idm_so_tgt_svc_online() 1117 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create() 1118 */ 1119 1120 static idm_status_t 1121 idm_so_tgt_svc_online(idm_svc_t *is) 1122 { 1123 idm_so_svc_t *so_svc; 1124 idm_svc_req_t *sr = &is->is_svc_req; 1125 struct sockaddr_in6 sin6_ip; 1126 const uint32_t on = 1; 1127 const uint32_t off = 0; 1128 1129 mutex_enter(&is->is_mutex); 1130 so_svc = (idm_so_svc_t *)is->is_so_svc; 1131 1132 /* 1133 * Try creating an IPv6 socket first 1134 */ 1135 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) { 1136 mutex_exit(&is->is_mutex); 1137 return (IDM_STATUS_FAIL); 1138 } else { 1139 bzero(&sin6_ip, sizeof (sin6_ip)); 1140 sin6_ip.sin6_family = AF_INET6; 1141 sin6_ip.sin6_port = htons(sr->sr_port); 1142 sin6_ip.sin6_addr = in6addr_any; 1143 1144 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1145 SO_REUSEADDR, (char *)&on, sizeof (on), CRED()); 1146 /* 1147 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1148 */ 1149 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1150 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED()); 1151 1152 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip, 1153 sizeof (sin6_ip), CRED()) != 0) { 1154 mutex_exit(&is->is_mutex); 1155 idm_sodestroy(so_svc->is_so); 1156 return (IDM_STATUS_FAIL); 1157 } 1158 } 1159 1160 idm_set_tgt_connect_options(so_svc->is_so); 1161 1162 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) { 1163 mutex_exit(&is->is_mutex); 1164 idm_soshutdown(so_svc->is_so); 1165 idm_sodestroy(so_svc->is_so); 1166 return (IDM_STATUS_FAIL); 1167 } 1168 1169 /* Launch a watch thread */ 1170 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher, 1171 is, 0, &p0, TS_RUN, minclsyspri); 1172 1173 if (so_svc->is_thread == NULL) { 1174 /* Failure to launch; teardown the socket */ 1175 mutex_exit(&is->is_mutex); 1176 idm_soshutdown(so_svc->is_so); 1177 idm_sodestroy(so_svc->is_so); 1178 return (IDM_STATUS_FAIL); 1179 } 1180 ksocket_hold(so_svc->is_so); 1181 /* Wait for the port watcher thread to start */ 1182 while (!so_svc->is_thread_running) 1183 cv_wait(&is->is_cv, &is->is_mutex); 1184 mutex_exit(&is->is_mutex); 1185 1186 return (IDM_STATUS_SUCCESS); 1187 } 1188 1189 /* 1190 * idm_so_tgt_svc_offline 1191 * 1192 * Stop listening on the IP address and port identified by idm_svc_t. 1193 */ 1194 static void 1195 idm_so_tgt_svc_offline(idm_svc_t *is) 1196 { 1197 idm_so_svc_t *so_svc; 1198 mutex_enter(&is->is_mutex); 1199 so_svc = (idm_so_svc_t *)is->is_so_svc; 1200 so_svc->is_thread_running = B_FALSE; 1201 mutex_exit(&is->is_mutex); 1202 1203 /* 1204 * Teardown socket 1205 */ 1206 idm_sodestroy(so_svc->is_so); 1207 1208 /* 1209 * Now we expect the port watcher thread to terminate 1210 */ 1211 thread_join(so_svc->is_thread_did); 1212 } 1213 1214 /* 1215 * Watch thread for target service connection establishment. 1216 */ 1217 void 1218 idm_so_svc_port_watcher(void *arg) 1219 { 1220 idm_svc_t *svc = arg; 1221 ksocket_t new_so; 1222 idm_conn_t *ic; 1223 idm_status_t idmrc; 1224 idm_so_svc_t *so_svc; 1225 int rc; 1226 const uint32_t off = 0; 1227 struct sockaddr_in6 t_addr; 1228 socklen_t t_addrlen; 1229 1230 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1231 t_addrlen = sizeof (struct sockaddr_in6); 1232 mutex_enter(&svc->is_mutex); 1233 1234 so_svc = svc->is_so_svc; 1235 so_svc->is_thread_running = B_TRUE; 1236 so_svc->is_thread_did = so_svc->is_thread->t_did; 1237 1238 cv_signal(&svc->is_cv); 1239 1240 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc, 1241 svc->is_svc_req.sr_port); 1242 1243 while (so_svc->is_thread_running) { 1244 mutex_exit(&svc->is_mutex); 1245 1246 if ((rc = ksocket_accept(so_svc->is_so, 1247 (struct sockaddr *)&t_addr, &t_addrlen, 1248 &new_so, CRED())) != 0) { 1249 mutex_enter(&svc->is_mutex); 1250 if (rc == ECONNABORTED) 1251 continue; 1252 /* Connection problem */ 1253 break; 1254 } 1255 /* 1256 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1257 */ 1258 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT, 1259 (char *)&off, sizeof (off), CRED()); 1260 1261 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS, 1262 &ic); 1263 if (idmrc != IDM_STATUS_SUCCESS) { 1264 /* Drop connection */ 1265 idm_soshutdown(new_so); 1266 idm_sodestroy(new_so); 1267 mutex_enter(&svc->is_mutex); 1268 continue; 1269 } 1270 1271 idmrc = idm_so_tgt_conn_create(ic, new_so); 1272 if (idmrc != IDM_STATUS_SUCCESS) { 1273 idm_svc_conn_destroy(ic); 1274 idm_soshutdown(new_so); 1275 idm_sodestroy(new_so); 1276 mutex_enter(&svc->is_mutex); 1277 continue; 1278 } 1279 1280 /* 1281 * Kick the state machine. At CS_S3_XPT_UP the state machine 1282 * will notify the client (target) about the new connection. 1283 */ 1284 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL); 1285 1286 mutex_enter(&svc->is_mutex); 1287 } 1288 ksocket_rele(so_svc->is_so); 1289 so_svc->is_thread_running = B_FALSE; 1290 mutex_exit(&svc->is_mutex); 1291 1292 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc, 1293 svc->is_svc_req.sr_port); 1294 1295 thread_exit(); 1296 } 1297 1298 /* 1299 * idm_so_free_task_rsrc() stops any ongoing processing of the task and 1300 * frees resources associated with the task. 1301 * 1302 * It's not clear that this should return idm_status_t. What do we do 1303 * if it fails? 1304 */ 1305 static idm_status_t 1306 idm_so_free_task_rsrc(idm_task_t *idt) 1307 { 1308 idm_buf_t *idb; 1309 1310 /* 1311 * There is nothing to cleanup on initiator connections 1312 */ 1313 if (IDM_CONN_ISINI(idt->idt_ic)) 1314 return (IDM_STATUS_SUCCESS); 1315 1316 /* 1317 * If this is a target connection, call idm_buf_rx_from_ini_done for 1318 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE. 1319 * 1320 * In addition, remove any buffers associated with this task from 1321 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but 1322 * items don't actually get removed from that list (and completion 1323 * routines called) until idm_task_cleanup. 1324 */ 1325 mutex_enter(&idt->idt_mutex); 1326 1327 for (idb = list_head(&idt->idt_outbufv); idb != NULL; 1328 idb = list_next(&idt->idt_outbufv, idb)) { 1329 if (idb->idb_in_transport) { 1330 /* 1331 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1332 */ 1333 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1334 uintptr_t, idb->idb_buf, 1335 uint32_t, idb->idb_bufoffset, 1336 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1337 uint32_t, idb->idb_xfer_len, 1338 int, XFER_BUF_RX_FROM_INI); 1339 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); 1340 mutex_enter(&idt->idt_mutex); 1341 } 1342 } 1343 1344 for (idb = list_head(&idt->idt_inbufv); idb != NULL; 1345 idb = list_next(&idt->idt_inbufv, idb)) { 1346 /* 1347 * We want to remove these items from the tx_list as well, 1348 * but knowing it's in the idt_inbufv list is not a guarantee 1349 * that it's in the tx_list. If it's on the tx list then 1350 * let idm_sotx_thread() clean it up. 1351 */ 1352 if (idb->idb_in_transport && !idb->idb_tx_thread) { 1353 /* 1354 * idm_buf_tx_to_ini_done releases idt->idt_mutex 1355 */ 1356 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1357 uintptr_t, idb->idb_buf, 1358 uint32_t, idb->idb_bufoffset, 1359 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1360 uint32_t, idb->idb_xfer_len, 1361 int, XFER_BUF_TX_TO_INI); 1362 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 1363 mutex_enter(&idt->idt_mutex); 1364 } 1365 } 1366 1367 mutex_exit(&idt->idt_mutex); 1368 1369 return (IDM_STATUS_SUCCESS); 1370 } 1371 1372 /* 1373 * idm_so_negotiate_key_values() validates the key values for this connection 1374 */ 1375 /* ARGSUSED */ 1376 static kv_status_t 1377 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl, 1378 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 1379 { 1380 /* All parameters are negotiated at the iscsit level */ 1381 return (KV_HANDLED); 1382 } 1383 1384 /* 1385 * idm_so_notice_key_values() activates the negotiated key values for 1386 * this connection. 1387 */ 1388 static void 1389 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl) 1390 { 1391 char *nvp_name; 1392 nvpair_t *nvp; 1393 nvpair_t *next_nvp; 1394 int nvrc; 1395 idm_status_t idm_status; 1396 const idm_kv_xlate_t *ikvx; 1397 uint64_t num_val; 1398 1399 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL); 1400 nvp != NULL; nvp = next_nvp) { 1401 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp); 1402 nvp_name = nvpair_name(nvp); 1403 1404 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1405 switch (ikvx->ik_key_id) { 1406 case KI_HEADER_DIGEST: 1407 case KI_DATA_DIGEST: 1408 idm_status = idm_so_handle_digest(it, nvp, ikvx); 1409 ASSERT(idm_status == 0); 1410 1411 /* Remove processed item from negotiated_nvl list */ 1412 nvrc = nvlist_remove_all( 1413 negotiated_nvl, ikvx->ik_key_name); 1414 ASSERT(nvrc == 0); 1415 break; 1416 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1417 /* 1418 * Just pass the value down to idm layer. 1419 * No need to remove it from negotiated_nvl list here. 1420 */ 1421 nvrc = nvpair_value_uint64(nvp, &num_val); 1422 ASSERT(nvrc == 0); 1423 it->ic_conn_params.max_xmit_dataseglen = 1424 (uint32_t)num_val; 1425 break; 1426 default: 1427 break; 1428 } 1429 } 1430 } 1431 1432 /* 1433 * idm_so_declare_key_values() declares the key values for this connection 1434 */ 1435 /* ARGSUSED */ 1436 static kv_status_t 1437 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl, 1438 nvlist_t *outgoing_nvl) 1439 { 1440 char *nvp_name; 1441 nvpair_t *nvp; 1442 nvpair_t *next_nvp; 1443 kv_status_t kvrc; 1444 int nvrc = 0; 1445 const idm_kv_xlate_t *ikvx; 1446 uint64_t num_val; 1447 1448 for (nvp = nvlist_next_nvpair(config_nvl, NULL); 1449 nvp != NULL && nvrc == 0; nvp = next_nvp) { 1450 next_nvp = nvlist_next_nvpair(config_nvl, nvp); 1451 nvp_name = nvpair_name(nvp); 1452 1453 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1454 switch (ikvx->ik_key_id) { 1455 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1456 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) { 1457 break; 1458 } 1459 if (outgoing_nvl && 1460 (nvrc = nvlist_add_uint64(outgoing_nvl, 1461 nvp_name, num_val)) != 0) { 1462 break; 1463 } 1464 it->ic_conn_params.max_recv_dataseglen = 1465 (uint32_t)num_val; 1466 break; 1467 default: 1468 break; 1469 } 1470 } 1471 kvrc = idm_nvstat_to_kvstat(nvrc); 1472 return (kvrc); 1473 } 1474 1475 static idm_status_t 1476 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice, 1477 const idm_kv_xlate_t *ikvx) 1478 { 1479 int nvrc; 1480 char *digest_choice_string; 1481 1482 nvrc = nvpair_value_string(digest_choice, 1483 &digest_choice_string); 1484 ASSERT(nvrc == 0); 1485 if (strcasecmp(digest_choice_string, "crc32c") == 0) { 1486 switch (ikvx->ik_key_id) { 1487 case KI_HEADER_DIGEST: 1488 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST; 1489 break; 1490 case KI_DATA_DIGEST: 1491 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST; 1492 break; 1493 default: 1494 ASSERT(0); 1495 break; 1496 } 1497 } else if (strcasecmp(digest_choice_string, "none") == 0) { 1498 switch (ikvx->ik_key_id) { 1499 case KI_HEADER_DIGEST: 1500 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST; 1501 break; 1502 case KI_DATA_DIGEST: 1503 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST; 1504 break; 1505 default: 1506 ASSERT(0); 1507 break; 1508 } 1509 } else { 1510 ASSERT(0); 1511 } 1512 1513 return (IDM_STATUS_SUCCESS); 1514 } 1515 1516 1517 /* 1518 * idm_so_conn_is_capable() verifies that the passed connection is provided 1519 * for by the sockets interface. 1520 */ 1521 /* ARGSUSED */ 1522 static boolean_t 1523 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps) 1524 { 1525 return (B_TRUE); 1526 } 1527 1528 /* 1529 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The 1530 * idm_sorecv_scsidata() function invoked earlier actually reads the data 1531 * off the socket into the appropriate buffers. 1532 */ 1533 static void 1534 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu) 1535 { 1536 iscsi_data_hdr_t *bhs; 1537 idm_task_t *idt; 1538 idm_buf_t *idb; 1539 uint32_t datasn; 1540 size_t offset; 1541 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr; 1542 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp; 1543 1544 ASSERT(ic != NULL); 1545 ASSERT(pdu != NULL); 1546 1547 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1548 datasn = ntohl(bhs->datasn); 1549 offset = ntohl(bhs->offset); 1550 1551 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP); 1552 1553 /* 1554 * Look up the task corresponding to the initiator task tag 1555 * to get the buffers affiliated with the task. 1556 */ 1557 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1558 if (idt == NULL) { 1559 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task"); 1560 idm_pdu_rx_protocol_error(ic, pdu); 1561 return; 1562 } 1563 1564 idb = pdu->isp_sorx_buf; 1565 if (idb == NULL) { 1566 IDM_CONN_LOG(CE_WARN, 1567 "idm_so_rx_datain: failed to find buffer"); 1568 idm_task_rele(idt); 1569 idm_pdu_rx_protocol_error(ic, pdu); 1570 return; 1571 } 1572 1573 /* 1574 * DataSN values should be sequential and should not have any gaps or 1575 * repetitions. Check the DataSN with the one stored in the task. 1576 */ 1577 if (datasn == idt->idt_exp_datasn) { 1578 idt->idt_exp_datasn++; /* keep track of DataSN received */ 1579 } else { 1580 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order"); 1581 idm_task_rele(idt); 1582 idm_pdu_rx_protocol_error(ic, pdu); 1583 return; 1584 } 1585 1586 /* 1587 * PDUs in a sequence should be in continuously increasing 1588 * address offset 1589 */ 1590 if (offset != idb->idb_exp_offset) { 1591 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset"); 1592 idm_task_rele(idt); 1593 idm_pdu_rx_protocol_error(ic, pdu); 1594 return; 1595 } 1596 /* Expected next relative buffer offset */ 1597 idb->idb_exp_offset += n2h24(bhs->dlength); 1598 idt->idt_rx_bytes += n2h24(bhs->dlength); 1599 1600 idm_task_rele(idt); 1601 1602 /* 1603 * For now call scsi_rsp which will process the data rsp 1604 * Revisit, need to provide an explicit client entry point for 1605 * phase collapse completions. 1606 */ 1607 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) && 1608 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) { 1609 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu); 1610 } 1611 1612 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1613 } 1614 1615 /* 1616 * The idm_so_rx_dataout() function is used by the iSCSI target to read 1617 * data from the Data-Out PDU sent by the iSCSI initiator. 1618 * 1619 * This function gets the Initiator Task Tag from the PDU BHS and looks up the 1620 * task to get the buffers associated with the PDU. A PDU might span buffers. 1621 * The data is then read into the respective buffer. 1622 */ 1623 static void 1624 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu) 1625 { 1626 1627 iscsi_data_hdr_t *bhs; 1628 idm_task_t *idt; 1629 idm_buf_t *idb; 1630 size_t offset; 1631 1632 ASSERT(ic != NULL); 1633 ASSERT(pdu != NULL); 1634 1635 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1636 offset = ntohl(bhs->offset); 1637 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA); 1638 1639 /* 1640 * Look up the task corresponding to the initiator task tag 1641 * to get the buffers affiliated with the task. 1642 */ 1643 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1644 if (idt == NULL) { 1645 IDM_CONN_LOG(CE_WARN, 1646 "idm_so_rx_dataout: failed to find task"); 1647 idm_pdu_rx_protocol_error(ic, pdu); 1648 return; 1649 } 1650 1651 idb = pdu->isp_sorx_buf; 1652 if (idb == NULL) { 1653 IDM_CONN_LOG(CE_WARN, 1654 "idm_so_rx_dataout: failed to find buffer"); 1655 idm_task_rele(idt); 1656 idm_pdu_rx_protocol_error(ic, pdu); 1657 return; 1658 } 1659 1660 /* Keep track of data transferred - check data offsets */ 1661 if (offset != idb->idb_exp_offset) { 1662 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: " 1663 "%ld, %d", offset, idb->idb_exp_offset); 1664 idm_task_rele(idt); 1665 idm_pdu_rx_protocol_error(ic, pdu); 1666 return; 1667 } 1668 /* Expected next relative offset */ 1669 idb->idb_exp_offset += ntoh24(bhs->dlength); 1670 idt->idt_rx_bytes += n2h24(bhs->dlength); 1671 1672 /* 1673 * Call the buffer callback when the transfer is complete 1674 * 1675 * The connection state machine should only abort tasks after 1676 * shutting down the connection so we are assured that there 1677 * won't be a simultaneous attempt to abort this task at the 1678 * same time as we are processing this PDU (due to a connection 1679 * state change). 1680 */ 1681 if (bhs->flags & ISCSI_FLAG_FINAL) { 1682 /* 1683 * We only want to call idm_buf_rx_from_ini_done once 1684 * per transfer. It's possible that this task has 1685 * already been aborted in which case 1686 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done 1687 * for each buffer with idb_in_transport==B_TRUE. To 1688 * close this window and ensure that this doesn't happen, 1689 * we'll clear idb->idb_in_transport now while holding 1690 * the task mutex. This is only really an issue for 1691 * SCSI task abort -- if tasks were being aborted because 1692 * of a connection state change the state machine would 1693 * have already stopped the receive thread. 1694 */ 1695 mutex_enter(&idt->idt_mutex); 1696 1697 /* 1698 * Release the task hold here (obtained in idm_task_find) 1699 * because the task may complete synchronously during 1700 * idm_buf_rx_from_ini_done. Since we still have an active 1701 * buffer we know there is at least one additional hold on idt. 1702 */ 1703 idm_task_rele(idt); 1704 1705 /* 1706 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1707 */ 1708 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1709 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 1710 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1711 uint32_t, idb->idb_xfer_len, 1712 int, XFER_BUF_RX_FROM_INI); 1713 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS); 1714 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1715 return; 1716 } 1717 1718 idm_task_rele(idt); 1719 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1720 } 1721 1722 /* 1723 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle 1724 * the R2T PDU sent by the iSCSI target indicating that it is ready to 1725 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS 1726 * and looks up the task in the task tree using the itt to get the output 1727 * buffers associated the task. The R2T PDU contains the offset of the 1728 * requested data and the data length. This function then constructs a 1729 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out 1730 * PDU is associated with the R2T by the Target Transfer Tag (ttt). 1731 */ 1732 1733 static void 1734 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu) 1735 { 1736 idm_task_t *idt; 1737 idm_buf_t *idb; 1738 iscsi_rtt_hdr_t *rtt_hdr; 1739 uint32_t data_offset; 1740 uint32_t data_length; 1741 1742 ASSERT(ic != NULL); 1743 ASSERT(pdu != NULL); 1744 1745 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr; 1746 data_offset = ntohl(rtt_hdr->data_offset); 1747 data_length = ntohl(rtt_hdr->data_length); 1748 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt); 1749 1750 if (idt == NULL) { 1751 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task"); 1752 idm_pdu_rx_protocol_error(ic, pdu); 1753 return; 1754 } 1755 1756 /* Find the buffer bound to the task by the iSCSI initiator */ 1757 mutex_enter(&idt->idt_mutex); 1758 idb = idm_buf_find(&idt->idt_outbufv, data_offset); 1759 if (idb == NULL) { 1760 mutex_exit(&idt->idt_mutex); 1761 idm_task_rele(idt); 1762 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer"); 1763 idm_pdu_rx_protocol_error(ic, pdu); 1764 return; 1765 } 1766 1767 /* return buffer contains this data */ 1768 if (data_offset + data_length > idb->idb_buflen) { 1769 /* Overflow */ 1770 mutex_exit(&idt->idt_mutex); 1771 idm_task_rele(idt); 1772 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside " 1773 "buffer"); 1774 idm_pdu_rx_protocol_error(ic, pdu); 1775 return; 1776 } 1777 1778 idt->idt_r2t_ttt = rtt_hdr->ttt; 1779 idt->idt_exp_datasn = 0; 1780 1781 idm_so_send_rtt_data(ic, idt, idb, data_offset, 1782 ntohl(rtt_hdr->data_length)); 1783 mutex_exit(&idt->idt_mutex); 1784 1785 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1786 idm_task_rele(idt); 1787 1788 } 1789 1790 idm_status_t 1791 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu) 1792 { 1793 uint8_t pad[ISCSI_PAD_WORD_LEN]; 1794 int pad_len; 1795 uint32_t data_digest_crc; 1796 uint32_t crc_calculated; 1797 int total_len; 1798 idm_so_conn_t *so_conn; 1799 1800 so_conn = ic->ic_transport_private; 1801 1802 pad_len = ((ISCSI_PAD_WORD_LEN - 1803 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 1804 (ISCSI_PAD_WORD_LEN - 1)); 1805 1806 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */ 1807 1808 total_len = pdu->isp_datalen; 1809 1810 if (pad_len) { 1811 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad; 1812 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len; 1813 total_len += pad_len; 1814 pdu->isp_iovlen++; 1815 } 1816 1817 /* setup data digest */ 1818 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1819 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1820 (char *)&data_digest_crc; 1821 pdu->isp_iov[pdu->isp_iovlen].iov_len = 1822 sizeof (data_digest_crc); 1823 total_len += sizeof (data_digest_crc); 1824 pdu->isp_iovlen++; 1825 } 1826 1827 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base; 1828 1829 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0], 1830 pdu->isp_iovlen, total_len) != 0) { 1831 return (IDM_STATUS_IO); 1832 } 1833 1834 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1835 crc_calculated = idm_crc32c(pdu->isp_data, 1836 pdu->isp_datalen); 1837 if (pad_len) { 1838 crc_calculated = idm_crc32c_continued((char *)&pad, 1839 pad_len, crc_calculated); 1840 } 1841 if (crc_calculated != data_digest_crc) { 1842 IDM_CONN_LOG(CE_WARN, 1843 "idm_sorecvdata: " 1844 "CRC error: actual 0x%x, calc 0x%x", 1845 data_digest_crc, crc_calculated); 1846 1847 /* Invalid Data Digest */ 1848 return (IDM_STATUS_DATA_DIGEST); 1849 } 1850 } 1851 1852 return (IDM_STATUS_SUCCESS); 1853 } 1854 1855 /* 1856 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The 1857 * Data-type PDU header must be read into the idm_pdu_t structure prior to 1858 * calling this function. 1859 */ 1860 idm_status_t 1861 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu) 1862 { 1863 iscsi_data_hdr_t *bhs; 1864 idm_task_t *task; 1865 uint32_t offset; 1866 uint8_t opcode; 1867 uint32_t dlength; 1868 list_t *buflst; 1869 uint32_t xfer_bytes; 1870 idm_status_t status; 1871 1872 ASSERT(ic != NULL); 1873 ASSERT(pdu != NULL); 1874 1875 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1876 1877 offset = ntohl(bhs->offset); 1878 opcode = bhs->opcode; 1879 dlength = n2h24(bhs->dlength); 1880 1881 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) || 1882 (opcode == ISCSI_OP_SCSI_DATA)); 1883 1884 /* 1885 * Successful lookup implicitly gets a "hold" on the task. This 1886 * hold must be released before leaving this function. At one 1887 * point we were caching this task context and retaining the hold 1888 * but it turned out to be very difficult to release the hold properly. 1889 * The task can be aborted and the connection shutdown between this 1890 * call and the subsequent expected call to idm_so_rx_datain/ 1891 * idm_so_rx_dataout (in which case those functions are not called). 1892 * Releasing the hold in the PDU callback doesn't work well either 1893 * because the whole task may be completed by then at which point 1894 * it is too late to release the hold -- for better or worse this 1895 * code doesn't wait on the refcnts during normal operation. 1896 * idm_task_find() is very fast and it is not a huge burden if we 1897 * have to do it twice. 1898 */ 1899 task = idm_task_find(ic, bhs->itt, bhs->ttt); 1900 if (task == NULL) { 1901 IDM_CONN_LOG(CE_WARN, 1902 "idm_sorecv_scsidata: could not find task"); 1903 return (IDM_STATUS_FAIL); 1904 } 1905 1906 mutex_enter(&task->idt_mutex); 1907 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ? 1908 &task->idt_inbufv : &task->idt_outbufv; 1909 pdu->isp_sorx_buf = idm_buf_find(buflst, offset); 1910 mutex_exit(&task->idt_mutex); 1911 1912 if (pdu->isp_sorx_buf == NULL) { 1913 idm_task_rele(task); 1914 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find " 1915 "buffer for offset %x opcode=%x", 1916 offset, opcode); 1917 return (IDM_STATUS_FAIL); 1918 } 1919 1920 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength); 1921 ASSERT(xfer_bytes != 0); 1922 if (xfer_bytes != dlength) { 1923 idm_task_rele(task); 1924 /* 1925 * Buffer overflow, connection error. The PDU data is still 1926 * sitting in the socket so we can't use the connection 1927 * again until that data is drained. 1928 */ 1929 return (IDM_STATUS_FAIL); 1930 } 1931 1932 status = idm_sorecvdata(ic, pdu); 1933 1934 idm_task_rele(task); 1935 1936 return (status); 1937 } 1938 1939 static uint32_t 1940 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength) 1941 { 1942 uint32_t buf_ro = ro - idb->idb_bufoffset; 1943 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro); 1944 1945 ASSERT(ro >= idb->idb_bufoffset); 1946 1947 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1948 (caddr_t)idb->idb_buf + buf_ro; 1949 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len; 1950 pdu->isp_iovlen++; 1951 1952 return (xfer_len); 1953 } 1954 1955 int 1956 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu) 1957 { 1958 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP); 1959 ASSERT(pdu->isp_data != NULL); 1960 1961 pdu->isp_databuflen = pdu->isp_datalen; 1962 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data; 1963 pdu->isp_iov[0].iov_len = pdu->isp_datalen; 1964 pdu->isp_iovlen = 1; 1965 /* 1966 * Since we are associating a new data buffer with this received 1967 * PDU we need to set a specific callback to free the data 1968 * after the PDU is processed. 1969 */ 1970 pdu->isp_flags |= IDM_PDU_ADDL_DATA; 1971 pdu->isp_callback = idm_sorx_addl_pdu_cb; 1972 1973 return (idm_sorecvdata(ic, pdu)); 1974 } 1975 1976 void 1977 idm_sorx_thread(void *arg) 1978 { 1979 boolean_t conn_failure = B_FALSE; 1980 idm_conn_t *ic = (idm_conn_t *)arg; 1981 idm_so_conn_t *so_conn; 1982 idm_pdu_t *pdu; 1983 idm_status_t rc; 1984 1985 idm_conn_hold(ic); 1986 1987 mutex_enter(&ic->ic_mutex); 1988 1989 so_conn = ic->ic_transport_private; 1990 so_conn->ic_rx_thread_running = B_TRUE; 1991 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did; 1992 cv_signal(&ic->ic_cv); 1993 1994 while (so_conn->ic_rx_thread_running) { 1995 mutex_exit(&ic->ic_mutex); 1996 1997 /* 1998 * Get PDU with default header size (large enough for 1999 * BHS plus any anticipated AHS). PDU from 2000 * the cache will have all values set correctly 2001 * for sockets RX including callback. 2002 */ 2003 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP); 2004 pdu->isp_ic = ic; 2005 pdu->isp_flags = 0; 2006 pdu->isp_transport_hdrlen = 0; 2007 2008 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) { 2009 /* 2010 * Call idm_pdu_complete so that we call the callback 2011 * and ensure any memory allocated in idm_sorecvhdr 2012 * gets freed up. 2013 */ 2014 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2015 2016 /* 2017 * If ic_rx_thread_running is still set then 2018 * this is some kind of connection problem 2019 * on the socket. In this case we want to 2020 * generate an event. Otherwise some other 2021 * thread closed the socket due to another 2022 * issue in which case we don't need to 2023 * generate an event. 2024 */ 2025 mutex_enter(&ic->ic_mutex); 2026 if (so_conn->ic_rx_thread_running) { 2027 conn_failure = B_TRUE; 2028 so_conn->ic_rx_thread_running = B_FALSE; 2029 } 2030 2031 continue; 2032 } 2033 2034 /* 2035 * Header has been read and validated. Now we need 2036 * to read the PDU data payload (if present). SCSI data 2037 * need to be transferred from the socket directly into 2038 * the associated transfer buffer for the SCSI task. 2039 */ 2040 if (pdu->isp_datalen != 0) { 2041 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) || 2042 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) { 2043 rc = idm_sorecv_scsidata(ic, pdu); 2044 /* 2045 * All SCSI errors are fatal to the 2046 * connection right now since we have no 2047 * place to put the data. What we need 2048 * is some kind of sink to dispose of unwanted 2049 * SCSI data. For example an invalid task tag 2050 * should not kill the connection (although 2051 * we may want to drop the connection). 2052 */ 2053 } else { 2054 /* 2055 * Not data PDUs so allocate a buffer for the 2056 * data segment and read the remaining data. 2057 */ 2058 rc = idm_sorecv_nonscsidata(ic, pdu); 2059 } 2060 if (rc != 0) { 2061 /* 2062 * Call idm_pdu_complete so that we call the 2063 * callback and ensure any memory allocated 2064 * in idm_sorecvhdr gets freed up. 2065 */ 2066 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2067 2068 /* 2069 * If ic_rx_thread_running is still set then 2070 * this is some kind of connection problem 2071 * on the socket. In this case we want to 2072 * generate an event. Otherwise some other 2073 * thread closed the socket due to another 2074 * issue in which case we don't need to 2075 * generate an event. 2076 */ 2077 mutex_enter(&ic->ic_mutex); 2078 if (so_conn->ic_rx_thread_running) { 2079 conn_failure = B_TRUE; 2080 so_conn->ic_rx_thread_running = B_FALSE; 2081 } 2082 continue; 2083 } 2084 } 2085 2086 /* 2087 * Process RX PDU 2088 */ 2089 idm_pdu_rx(ic, pdu); 2090 2091 mutex_enter(&ic->ic_mutex); 2092 } 2093 2094 mutex_exit(&ic->ic_mutex); 2095 2096 /* 2097 * If we dropped out of the RX processing loop because of 2098 * a socket problem or other connection failure (including 2099 * digest errors) then we need to generate a state machine 2100 * event to shut the connection down. 2101 * If the state machine is already in, for example, INIT_ERROR, this 2102 * event will get dropped, and the TX thread will never be notified 2103 * to shut down. To be safe, we'll just notify it here. 2104 */ 2105 if (conn_failure) { 2106 if (so_conn->ic_tx_thread_running) { 2107 so_conn->ic_tx_thread_running = B_FALSE; 2108 mutex_enter(&so_conn->ic_tx_mutex); 2109 cv_signal(&so_conn->ic_tx_cv); 2110 mutex_exit(&so_conn->ic_tx_mutex); 2111 } 2112 2113 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc); 2114 } 2115 2116 idm_conn_rele(ic); 2117 2118 thread_exit(); 2119 } 2120 2121 /* 2122 * idm_so_tx 2123 * 2124 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry 2125 * point. By definition, it is supposed to be fast. So, simply queue 2126 * the entry and return. The real work is done by idm_i_so_tx() via 2127 * idm_sotx_thread(). 2128 */ 2129 2130 static void 2131 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu) 2132 { 2133 idm_so_conn_t *so_conn = ic->ic_transport_private; 2134 2135 ASSERT(pdu->isp_ic == ic); 2136 mutex_enter(&so_conn->ic_tx_mutex); 2137 2138 if (!so_conn->ic_tx_thread_running) { 2139 mutex_exit(&so_conn->ic_tx_mutex); 2140 idm_pdu_complete(pdu, IDM_STATUS_ABORTED); 2141 return; 2142 } 2143 2144 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu); 2145 cv_signal(&so_conn->ic_tx_cv); 2146 mutex_exit(&so_conn->ic_tx_mutex); 2147 } 2148 2149 static idm_status_t 2150 idm_i_so_tx(idm_pdu_t *pdu) 2151 { 2152 idm_conn_t *ic = pdu->isp_ic; 2153 idm_status_t status = IDM_STATUS_SUCCESS; 2154 uint8_t pad[ISCSI_PAD_WORD_LEN]; 2155 int pad_len; 2156 uint32_t hdr_digest_crc; 2157 uint32_t data_digest_crc = 0; 2158 int total_len = 0; 2159 int iovlen = 0; 2160 struct iovec iov[6]; 2161 idm_so_conn_t *so_conn; 2162 2163 so_conn = ic->ic_transport_private; 2164 2165 /* Setup BHS */ 2166 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr; 2167 iov[iovlen].iov_len = pdu->isp_hdrlen; 2168 total_len += iov[iovlen].iov_len; 2169 iovlen++; 2170 2171 /* Setup header digest */ 2172 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2173 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) { 2174 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen); 2175 2176 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 2177 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 2178 total_len += iov[iovlen].iov_len; 2179 iovlen++; 2180 } 2181 2182 /* Setup the data */ 2183 if (pdu->isp_datalen) { 2184 idm_task_t *idt; 2185 idm_buf_t *idb; 2186 iscsi_data_hdr_t *ihp; 2187 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr; 2188 /* Write of immediate data */ 2189 if (ic->ic_ffp && 2190 (ihp->opcode == ISCSI_OP_SCSI_CMD || 2191 ihp->opcode == ISCSI_OP_SCSI_DATA)) { 2192 idt = idm_task_find(ic, ihp->itt, ihp->ttt); 2193 if (idt) { 2194 mutex_enter(&idt->idt_mutex); 2195 idb = idm_buf_find(&idt->idt_outbufv, 0); 2196 mutex_exit(&idt->idt_mutex); 2197 /* 2198 * If the initiator call to idm_buf_alloc 2199 * failed then we can get to this point 2200 * without a bound buffer. The associated 2201 * connection failure will clean things up 2202 * later. It would be nice to come up with 2203 * a cleaner way to handle this. In 2204 * particular it seems absurd to look up 2205 * the task and the buffer just to update 2206 * this counter. 2207 */ 2208 if (idb) 2209 idb->idb_xfer_len += pdu->isp_datalen; 2210 idm_task_rele(idt); 2211 } 2212 } 2213 2214 iov[iovlen].iov_base = (caddr_t)pdu->isp_data; 2215 iov[iovlen].iov_len = pdu->isp_datalen; 2216 total_len += iov[iovlen].iov_len; 2217 iovlen++; 2218 } 2219 2220 /* Setup the data pad if necessary */ 2221 pad_len = ((ISCSI_PAD_WORD_LEN - 2222 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 2223 (ISCSI_PAD_WORD_LEN - 1)); 2224 2225 if (pad_len) { 2226 bzero(pad, sizeof (pad)); 2227 iov[iovlen].iov_base = (void *)&pad; 2228 iov[iovlen].iov_len = pad_len; 2229 total_len += iov[iovlen].iov_len; 2230 iovlen++; 2231 } 2232 2233 /* 2234 * Setup the data digest if enabled. Data-digest is not sent 2235 * for login-phase PDUs. 2236 */ 2237 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) && 2238 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2239 (pdu->isp_datalen || pad_len)) { 2240 /* 2241 * RFC3720/10.2.3: A zero-length Data Segment also 2242 * implies a zero-length data digest. 2243 */ 2244 if (pdu->isp_datalen) { 2245 data_digest_crc = idm_crc32c(pdu->isp_data, 2246 pdu->isp_datalen); 2247 } 2248 if (pad_len) { 2249 data_digest_crc = idm_crc32c_continued(&pad, 2250 pad_len, data_digest_crc); 2251 } 2252 2253 iov[iovlen].iov_base = (caddr_t)&data_digest_crc; 2254 iov[iovlen].iov_len = sizeof (data_digest_crc); 2255 total_len += iov[iovlen].iov_len; 2256 iovlen++; 2257 } 2258 2259 /* Transmit the PDU */ 2260 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen, 2261 total_len) != 0) { 2262 /* Set error status */ 2263 IDM_CONN_LOG(CE_WARN, 2264 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p " 2265 "data: %p", (void *) so_conn->ic_so, (void *) ic, 2266 (void *) pdu->isp_data); 2267 status = IDM_STATUS_IO; 2268 } 2269 2270 /* 2271 * Success does not mean that the PDU actually reached the 2272 * remote node since it could get dropped along the way. 2273 */ 2274 idm_pdu_complete(pdu, status); 2275 2276 return (status); 2277 } 2278 2279 /* 2280 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the 2281 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength, 2282 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN. 2283 * A target can invoke this function multiple times for a single read command 2284 * (identified by the same ITT) to split the input into several sequences. 2285 * 2286 * DataSN starts with 0 for the first data PDU of an input command and advances 2287 * by 1 for each subsequent data PDU. Each sequence will have its own F bit, 2288 * which is set to 1 for the last data PDU of a sequence. 2289 * 2290 * Scope for Prototype build: 2291 * The data PDUs within a sequence will be sent in order with the buffer offset 2292 * in increasing order. i.e. initiator and target must have negotiated the 2293 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced. 2294 * 2295 * Caller holds idt->idt_mutex 2296 */ 2297 static idm_status_t 2298 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 2299 { 2300 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private; 2301 idm_pdu_t tmppdu; 2302 2303 ASSERT(mutex_owned(&idt->idt_mutex)); 2304 2305 /* 2306 * Put the idm_buf_t on the tx queue. It will be transmitted by 2307 * idm_sotx_thread. 2308 */ 2309 mutex_enter(&so_conn->ic_tx_mutex); 2310 2311 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2312 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2313 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2314 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI); 2315 2316 if (!so_conn->ic_tx_thread_running) { 2317 mutex_exit(&so_conn->ic_tx_mutex); 2318 /* 2319 * Don't release idt->idt_mutex since we're supposed to hold 2320 * in when calling idm_buf_tx_to_ini_done 2321 */ 2322 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 2323 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2324 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2325 uint32_t, idb->idb_xfer_len, 2326 int, XFER_BUF_TX_TO_INI); 2327 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 2328 return (IDM_STATUS_FAIL); 2329 } 2330 2331 /* 2332 * Build a template for the data PDU headers we will use so that 2333 * the SN values will stay consistent with other PDU's we are 2334 * transmitting like R2T and SCSI status. 2335 */ 2336 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2337 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl; 2338 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2339 ISCSI_OP_SCSI_DATA_RSP); 2340 idb->idb_tx_thread = B_TRUE; 2341 list_insert_tail(&so_conn->ic_tx_list, (void *)idb); 2342 cv_signal(&so_conn->ic_tx_cv); 2343 mutex_exit(&so_conn->ic_tx_mutex); 2344 mutex_exit(&idt->idt_mutex); 2345 2346 /* 2347 * Returning success here indicates the transfer was successfully 2348 * dispatched -- it does not mean that the transfer completed 2349 * successfully. 2350 */ 2351 return (IDM_STATUS_SUCCESS); 2352 } 2353 2354 /* 2355 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the 2356 * data blocks it is ready to receive from the initiator in response to a WRITE 2357 * SCSI command. The target iSCSI layer passes the information about the desired 2358 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer 2359 * offset and datalen are passed via the 'idb' argument. 2360 * 2361 * Scope for Prototype build: 2362 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have 2363 * negotiated the "InitialR2T" to "Yes". 2364 * 2365 * Caller holds idt->idt_mutex 2366 */ 2367 static idm_status_t 2368 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 2369 { 2370 idm_pdu_t *pdu; 2371 iscsi_rtt_hdr_t *rtt; 2372 2373 ASSERT(mutex_owned(&idt->idt_mutex)); 2374 2375 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2376 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2377 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2378 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI); 2379 2380 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2381 pdu->isp_ic = idt->idt_ic; 2382 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t)); 2383 2384 /* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */ 2385 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP); 2386 2387 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */ 2388 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr); 2389 2390 rtt->opcode = ISCSI_OP_RTT_RSP; 2391 rtt->flags = ISCSI_FLAG_FINAL; 2392 rtt->data_offset = htonl(idb->idb_bufoffset); 2393 rtt->data_length = htonl(idb->idb_xfer_len); 2394 rtt->rttsn = htonl(idt->idt_exp_rttsn++); 2395 2396 /* Keep track of buffer offsets */ 2397 idb->idb_exp_offset = idb->idb_bufoffset; 2398 mutex_exit(&idt->idt_mutex); 2399 2400 /* 2401 * Transmit the PDU. 2402 */ 2403 idm_pdu_tx(pdu); 2404 2405 return (IDM_STATUS_SUCCESS); 2406 } 2407 2408 static idm_status_t 2409 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen) 2410 { 2411 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) { 2412 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache, 2413 KM_NOSLEEP); 2414 idb->idb_buf_private = idm.idm_so_128k_buf_cache; 2415 } else { 2416 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP); 2417 idb->idb_buf_private = NULL; 2418 } 2419 2420 if (idb->idb_buf == NULL) { 2421 IDM_CONN_LOG(CE_NOTE, 2422 "idm_so_buf_alloc: failed buffer allocation"); 2423 return (IDM_STATUS_FAIL); 2424 } 2425 2426 return (IDM_STATUS_SUCCESS); 2427 } 2428 2429 /* ARGSUSED */ 2430 static idm_status_t 2431 idm_so_buf_setup(idm_buf_t *idb) 2432 { 2433 /* Ensure bufalloc'd flag is unset */ 2434 idb->idb_bufalloc = B_FALSE; 2435 2436 return (IDM_STATUS_SUCCESS); 2437 } 2438 2439 /* ARGSUSED */ 2440 static void 2441 idm_so_buf_teardown(idm_buf_t *idb) 2442 { 2443 /* nothing to do here */ 2444 } 2445 2446 static void 2447 idm_so_buf_free(idm_buf_t *idb) 2448 { 2449 if (idb->idb_buf_private == NULL) { 2450 kmem_free(idb->idb_buf, idb->idb_buflen); 2451 } else { 2452 kmem_cache_free(idb->idb_buf_private, idb->idb_buf); 2453 } 2454 } 2455 2456 static void 2457 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb, 2458 uint32_t offset, uint32_t length) 2459 { 2460 idm_so_conn_t *so_conn = ic->ic_transport_private; 2461 idm_pdu_t tmppdu; 2462 idm_buf_t *rtt_buf; 2463 2464 ASSERT(mutex_owned(&idt->idt_mutex)); 2465 2466 /* 2467 * Allocate a buffer to represent the RTT transfer. We could further 2468 * optimize this by allocating the buffers internally from an rtt 2469 * specific buffer cache since this is socket-specific code but for 2470 * now we will keep it simple. 2471 */ 2472 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length); 2473 if (rtt_buf == NULL) { 2474 /* 2475 * If we're in FFP then the failure was likely a resource 2476 * allocation issue and we should close the connection by 2477 * sending a CE_TRANSPORT_FAIL event. 2478 * 2479 * If we're not in FFP then idm_buf_alloc will always 2480 * fail and the state is transitioning to "complete" anyway 2481 * so we won't bother to send an event. 2482 */ 2483 mutex_enter(&ic->ic_state_mutex); 2484 if (ic->ic_ffp) 2485 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, 2486 NULL, CT_NONE); 2487 mutex_exit(&ic->ic_state_mutex); 2488 return; 2489 } 2490 2491 rtt_buf->idb_buf_cb = NULL; 2492 rtt_buf->idb_cb_arg = NULL; 2493 rtt_buf->idb_bufoffset = offset; 2494 rtt_buf->idb_xfer_len = length; 2495 rtt_buf->idb_ic = idt->idt_ic; 2496 rtt_buf->idb_task_binding = idt; 2497 2498 /* 2499 * Put the idm_buf_t on the tx queue. It will be transmitted by 2500 * idm_sotx_thread. 2501 */ 2502 mutex_enter(&so_conn->ic_tx_mutex); 2503 2504 if (!so_conn->ic_tx_thread_running) { 2505 idm_buf_free(rtt_buf); 2506 mutex_exit(&so_conn->ic_tx_mutex); 2507 return; 2508 } 2509 2510 /* 2511 * This new buffer represents an additional reference on the task 2512 */ 2513 idm_task_hold(idt); 2514 2515 /* 2516 * Build a template for the data PDU headers we will use so that 2517 * the SN values will stay consistent with other PDU's we are 2518 * transmitting like R2T and SCSI status. 2519 */ 2520 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2521 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl; 2522 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2523 ISCSI_OP_SCSI_DATA); 2524 rtt_buf->idb_tx_thread = B_TRUE; 2525 rtt_buf->idb_in_transport = B_TRUE; 2526 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf); 2527 cv_signal(&so_conn->ic_tx_cv); 2528 mutex_exit(&so_conn->ic_tx_mutex); 2529 } 2530 2531 static void 2532 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb) 2533 { 2534 /* 2535 * Don't worry about status -- we assume any error handling 2536 * is performed by the caller (idm_sotx_thread). 2537 */ 2538 idb->idb_in_transport = B_FALSE; 2539 idm_task_rele(idt); 2540 idm_buf_free(idb); 2541 } 2542 2543 static idm_status_t 2544 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb, 2545 uint32_t buf_region_offset, uint32_t buf_region_length) 2546 { 2547 idm_conn_t *ic; 2548 uint32_t max_dataseglen; 2549 size_t remainder, chunk; 2550 uint32_t data_offset = buf_region_offset; 2551 iscsi_data_hdr_t *bhs; 2552 idm_pdu_t *pdu; 2553 idm_status_t tx_status; 2554 2555 ASSERT(mutex_owned(&idt->idt_mutex)); 2556 2557 ic = idt->idt_ic; 2558 2559 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen; 2560 remainder = buf_region_length; 2561 2562 while (remainder) { 2563 if (idt->idt_state != TASK_ACTIVE) { 2564 ASSERT((idt->idt_state != TASK_IDLE) && 2565 (idt->idt_state != TASK_COMPLETE)); 2566 return (IDM_STATUS_ABORTED); 2567 } 2568 2569 /* check to see if we need to chunk the data */ 2570 if (remainder > max_dataseglen) { 2571 chunk = max_dataseglen; 2572 } else { 2573 chunk = remainder; 2574 } 2575 2576 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */ 2577 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2578 pdu->isp_ic = ic; 2579 2580 /* 2581 * We've already built a build a header template 2582 * to use during the transfer. Use this template so that 2583 * the SN values stay consistent with any unrelated PDU's 2584 * being transmitted. 2585 */ 2586 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr, 2587 sizeof (iscsi_hdr_t)); 2588 2589 /* 2590 * Set DataSN, data offset, and flags in BHS 2591 * For the prototype build, A = 0, S = 0, U = 0 2592 */ 2593 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr); 2594 2595 bhs->datasn = htonl(idt->idt_exp_datasn++); 2596 2597 hton24(bhs->dlength, chunk); 2598 bhs->offset = htonl(idb->idb_bufoffset + data_offset); 2599 2600 if (chunk == remainder) { 2601 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */ 2602 } 2603 2604 /* Instrument the data-send DTrace probe. */ 2605 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) { 2606 DTRACE_ISCSI_2(data__send, 2607 idm_conn_t *, idt->idt_ic, 2608 iscsi_data_rsp_hdr_t *, 2609 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 2610 } 2611 /* setup data */ 2612 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset; 2613 pdu->isp_datalen = (uint_t)chunk; 2614 remainder -= chunk; 2615 data_offset += chunk; 2616 2617 /* 2618 * Now that we're done working with idt_exp_datasn, 2619 * idt->idt_state and idb->idb_bufoffset we can release 2620 * the task lock -- don't want to hold it across the 2621 * call to idm_i_so_tx since we could block. 2622 */ 2623 mutex_exit(&idt->idt_mutex); 2624 2625 /* 2626 * Transmit the PDU. Call the internal routine directly 2627 * as there is already implicit ordering. 2628 */ 2629 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) { 2630 mutex_enter(&idt->idt_mutex); 2631 return (tx_status); 2632 } 2633 2634 mutex_enter(&idt->idt_mutex); 2635 idt->idt_tx_bytes += chunk; 2636 } 2637 2638 return (IDM_STATUS_SUCCESS); 2639 } 2640 2641 /* 2642 * TX PDU cache 2643 */ 2644 /* ARGSUSED */ 2645 int 2646 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags) 2647 { 2648 idm_pdu_t *pdu = hdl; 2649 2650 bzero(pdu, sizeof (idm_pdu_t)); 2651 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2652 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2653 pdu->isp_callback = idm_sotx_cache_pdu_cb; 2654 pdu->isp_magic = IDM_PDU_MAGIC; 2655 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t)); 2656 2657 return (0); 2658 } 2659 2660 /* ARGSUSED */ 2661 void 2662 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2663 { 2664 /* reset values between use */ 2665 pdu->isp_datalen = 0; 2666 2667 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu); 2668 } 2669 2670 /* 2671 * RX PDU cache 2672 */ 2673 /* ARGSUSED */ 2674 int 2675 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags) 2676 { 2677 idm_pdu_t *pdu = hdl; 2678 2679 bzero(pdu, sizeof (idm_pdu_t)); 2680 pdu->isp_magic = IDM_PDU_MAGIC; 2681 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2682 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2683 2684 return (0); 2685 } 2686 2687 /* ARGSUSED */ 2688 static void 2689 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2690 { 2691 pdu->isp_iovlen = 0; 2692 pdu->isp_sorx_buf = 0; 2693 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu); 2694 } 2695 2696 static void 2697 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2698 { 2699 /* 2700 * We had to modify our cached RX PDU with a longer header buffer 2701 * and/or a longer data buffer. Release the new buffers and fix 2702 * the fields back to what we would expect for a cached RX PDU. 2703 */ 2704 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) { 2705 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen); 2706 } 2707 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) { 2708 kmem_free(pdu->isp_data, pdu->isp_datalen); 2709 } 2710 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); 2711 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2712 pdu->isp_data = NULL; 2713 pdu->isp_datalen = 0; 2714 pdu->isp_sorx_buf = 0; 2715 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2716 idm_sorx_cache_pdu_cb(pdu, status); 2717 } 2718 2719 /* 2720 * This thread is only active when I/O is queued for transmit 2721 * because the socket is busy. 2722 */ 2723 void 2724 idm_sotx_thread(void *arg) 2725 { 2726 idm_conn_t *ic = arg; 2727 idm_tx_obj_t *object, *next; 2728 idm_so_conn_t *so_conn; 2729 idm_status_t status = IDM_STATUS_SUCCESS; 2730 2731 idm_conn_hold(ic); 2732 2733 mutex_enter(&ic->ic_mutex); 2734 so_conn = ic->ic_transport_private; 2735 so_conn->ic_tx_thread_running = B_TRUE; 2736 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did; 2737 cv_signal(&ic->ic_cv); 2738 mutex_exit(&ic->ic_mutex); 2739 2740 mutex_enter(&so_conn->ic_tx_mutex); 2741 2742 while (so_conn->ic_tx_thread_running) { 2743 while (list_is_empty(&so_conn->ic_tx_list)) { 2744 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic); 2745 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex); 2746 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic); 2747 2748 if (!so_conn->ic_tx_thread_running) { 2749 goto tx_bail; 2750 } 2751 } 2752 2753 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2754 list_remove(&so_conn->ic_tx_list, object); 2755 mutex_exit(&so_conn->ic_tx_mutex); 2756 2757 switch (object->idm_tx_obj_magic) { 2758 case IDM_PDU_MAGIC: 2759 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic, 2760 idm_pdu_t *, (idm_pdu_t *)object); 2761 2762 status = idm_i_so_tx((idm_pdu_t *)object); 2763 break; 2764 2765 case IDM_BUF_MAGIC: { 2766 idm_buf_t *idb = (idm_buf_t *)object; 2767 idm_task_t *idt = idb->idb_task_binding; 2768 2769 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic, 2770 idm_buf_t *, idb); 2771 2772 mutex_enter(&idt->idt_mutex); 2773 status = idm_so_send_buf_region(idt, 2774 idb, 0, idb->idb_xfer_len); 2775 2776 /* 2777 * TX thread owns the buffer so we expect it to 2778 * be "in transport" 2779 */ 2780 ASSERT(idb->idb_in_transport); 2781 if (IDM_CONN_ISTGT(ic)) { 2782 /* 2783 * idm_buf_tx_to_ini_done releases 2784 * idt->idt_mutex 2785 */ 2786 DTRACE_ISCSI_8(xfer__done, 2787 idm_conn_t *, idt->idt_ic, 2788 uintptr_t, idb->idb_buf, 2789 uint32_t, idb->idb_bufoffset, 2790 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2791 uint32_t, idb->idb_xfer_len, 2792 int, XFER_BUF_TX_TO_INI); 2793 idm_buf_tx_to_ini_done(idt, idb, status); 2794 } else { 2795 idm_so_send_rtt_data_done(idt, idb); 2796 mutex_exit(&idt->idt_mutex); 2797 } 2798 break; 2799 } 2800 2801 default: 2802 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic " 2803 "(0x%08x)", object->idm_tx_obj_magic); 2804 status = IDM_STATUS_FAIL; 2805 } 2806 2807 mutex_enter(&so_conn->ic_tx_mutex); 2808 2809 if (status != IDM_STATUS_SUCCESS) { 2810 so_conn->ic_tx_thread_running = B_FALSE; 2811 idm_conn_event(ic, CE_TRANSPORT_FAIL, status); 2812 } 2813 } 2814 2815 /* 2816 * Before we leave, we need to abort every item remaining in the 2817 * TX list. 2818 */ 2819 2820 tx_bail: 2821 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2822 2823 while (object != NULL) { 2824 next = list_next(&so_conn->ic_tx_list, object); 2825 2826 list_remove(&so_conn->ic_tx_list, object); 2827 switch (object->idm_tx_obj_magic) { 2828 case IDM_PDU_MAGIC: 2829 idm_pdu_complete((idm_pdu_t *)object, 2830 IDM_STATUS_ABORTED); 2831 break; 2832 2833 case IDM_BUF_MAGIC: { 2834 idm_buf_t *idb = (idm_buf_t *)object; 2835 idm_task_t *idt = idb->idb_task_binding; 2836 mutex_exit(&so_conn->ic_tx_mutex); 2837 mutex_enter(&idt->idt_mutex); 2838 /* 2839 * TX thread owns the buffer so we expect it to 2840 * be "in transport" 2841 */ 2842 ASSERT(idb->idb_in_transport); 2843 if (IDM_CONN_ISTGT(ic)) { 2844 /* 2845 * idm_buf_tx_to_ini_done releases 2846 * idt->idt_mutex 2847 */ 2848 DTRACE_ISCSI_8(xfer__done, 2849 idm_conn_t *, idt->idt_ic, 2850 uintptr_t, idb->idb_buf, 2851 uint32_t, idb->idb_bufoffset, 2852 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2853 uint32_t, idb->idb_xfer_len, 2854 int, XFER_BUF_TX_TO_INI); 2855 idm_buf_tx_to_ini_done(idt, idb, 2856 IDM_STATUS_ABORTED); 2857 } else { 2858 idm_so_send_rtt_data_done(idt, idb); 2859 mutex_exit(&idt->idt_mutex); 2860 } 2861 mutex_enter(&so_conn->ic_tx_mutex); 2862 break; 2863 } 2864 default: 2865 IDM_CONN_LOG(CE_WARN, 2866 "idm_sotx_thread: Unexpected magic " 2867 "(0x%08x)", object->idm_tx_obj_magic); 2868 } 2869 2870 object = next; 2871 } 2872 2873 mutex_exit(&so_conn->ic_tx_mutex); 2874 idm_conn_rele(ic); 2875 thread_exit(); 2876 /*NOTREACHED*/ 2877 } 2878 2879 static void 2880 idm_so_socket_set_nonblock(struct sonode *node) 2881 { 2882 (void) VOP_SETFL(node->so_vnode, node->so_flag, 2883 (node->so_state | FNONBLOCK), CRED(), NULL); 2884 } 2885 2886 static void 2887 idm_so_socket_set_block(struct sonode *node) 2888 { 2889 (void) VOP_SETFL(node->so_vnode, node->so_flag, 2890 (node->so_state & (~FNONBLOCK)), CRED(), NULL); 2891 } 2892