1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2022 RackTop Systems, Inc. 23 * Copyright 2011-2021 Tintri by DDN, Inc. All rights reserved. 24 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/time.h> 33 #include <sys/varargs.h> 34 #include <sys/modctl.h> 35 #include <sys/pathname.h> 36 #include <sys/vnode.h> 37 #include <sys/socket.h> 38 #include <sys/ksocket.h> 39 #include <sys/stream.h> 40 #include <sys/strsubr.h> 41 42 #include <smbsrv/smb_vops.h> 43 #include <smbsrv/smb.h> 44 #include <smbsrv/smb_kproto.h> 45 #include <smbsrv/smb_kstat.h> 46 47 /* 48 * How many iovec we'll handle as a local array (no allocation) 49 * See also IOV_MAX_STACK <sys/limits.h> but we need this to 50 * work also with _FAKE_KERNEL 51 */ 52 #define SMB_LOCAL_IOV_MAX 16 53 54 /* 55 * SMB Network Socket API 56 * 57 * smb_socreate: Creates an socket based on domain/type. 58 * smb_soshutdown: Disconnect a socket created with smb_socreate 59 * smb_sodestroy: Release resources associated with a socket 60 * smb_sosend: Send the contents of a buffer on a socket 61 * smb_sorecv: Receive data into a buffer from a socket 62 * smb_iov_sosend: Send the contents of an iovec on a socket 63 * smb_iov_sorecv: Receive data into an iovec from a socket 64 */ 65 66 ksocket_t 67 smb_socreate(int domain, int type, int protocol) 68 { 69 ksocket_t sock; 70 int err = 0; 71 72 err = ksocket_socket(&sock, domain, type, protocol, KSOCKET_SLEEP, 73 CRED()); 74 75 if (err != 0) 76 return (NULL); 77 else 78 return (sock); 79 } 80 81 /* 82 * smb_soshutdown will disconnect the socket and prevent subsequent PDU 83 * reception and transmission. The sonode still exists but its state 84 * gets modified to indicate it is no longer connected. Calls to 85 * smb_sorecv/smb_iov_sorecv will return so smb_soshutdown can be used 86 * regain control of a thread stuck in smb_sorecv. 87 */ 88 void 89 smb_soshutdown(ksocket_t so) 90 { 91 (void) ksocket_shutdown(so, SHUT_RDWR, CRED()); 92 } 93 94 /* 95 * smb_sodestroy releases all resources associated with a socket previously 96 * created with smb_socreate. The socket must be shutdown using smb_soshutdown 97 * before the socket is destroyed with smb_sodestroy, otherwise undefined 98 * behavior will result. 99 */ 100 void 101 smb_sodestroy(ksocket_t so) 102 { 103 (void) ksocket_close(so, CRED()); 104 } 105 106 int 107 smb_sorecv(ksocket_t so, void *msg, size_t len) 108 { 109 size_t recvd; 110 int err; 111 112 ASSERT(so != NULL); 113 ASSERT(len != 0); 114 115 if ((err = ksocket_recv(so, msg, len, MSG_WAITALL, &recvd, 116 CRED())) != 0) { 117 return (err); 118 } 119 120 /* Successful receive */ 121 return ((recvd == len) ? 0 : -1); 122 } 123 124 /* 125 * Receive a message as an mbuf chain (returned in *mpp) 126 * where the length requested is len. 127 * 128 * Some day hopefully this will be able to receive an actual 129 * mblk chain from the network stack (without copying), and 130 * either wrap those to create mbufs, or use mblks directly. 131 * For now, we allocate buffers here to recv into. 132 */ 133 int 134 smb_net_recv_mbufs(smb_session_t *s, mbuf_t **mpp, size_t len) 135 { 136 struct nmsghdr msg; 137 uio_t uio; 138 iovec_t iov[SMB_LOCAL_IOV_MAX]; 139 mbuf_t *mhead = NULL; 140 size_t rlen; 141 int rc; 142 143 bzero(&msg, sizeof (msg)); 144 bzero(&uio, sizeof (uio)); 145 ASSERT(len > 0); 146 147 mhead = smb_mbuf_alloc_chain(len); 148 149 uio.uio_resid = len; 150 uio.uio_iov = iov; 151 uio.uio_iovcnt = SMB_LOCAL_IOV_MAX; 152 153 rc = smb_mbuf_mkuio(mhead, &uio); 154 if (rc != 0) 155 goto errout; 156 157 msg.msg_iov = uio.uio_iov; 158 msg.msg_iovlen = uio.uio_iovcnt; 159 rlen = len; 160 rc = ksocket_recvmsg(s->sock, &msg, MSG_WAITALL, &rlen, CRED()); 161 if (rc != 0) 162 goto errout; 163 if (rlen != len) { 164 rc = SET_ERROR(EIO); 165 goto errout; 166 } 167 168 *mpp = mhead; 169 return (rc); 170 171 errout: 172 m_freem(mhead); 173 return (rc); 174 } 175 176 /* 177 * smb_net_txl_constructor 178 * 179 * Transmit list constructor 180 */ 181 void 182 smb_net_txl_constructor(smb_txlst_t *txl) 183 { 184 ASSERT(txl->tl_magic != SMB_TXLST_MAGIC); 185 186 mutex_init(&txl->tl_mutex, NULL, MUTEX_DEFAULT, NULL); 187 cv_init(&txl->tl_wait_cv, NULL, CV_DEFAULT, NULL); 188 txl->tl_active = B_FALSE; 189 txl->tl_magic = SMB_TXLST_MAGIC; 190 } 191 192 /* 193 * smb_net_txl_destructor 194 * 195 * Transmit list destructor 196 */ 197 void 198 smb_net_txl_destructor(smb_txlst_t *txl) 199 { 200 ASSERT(txl->tl_magic == SMB_TXLST_MAGIC); 201 202 txl->tl_magic = 0; 203 cv_destroy(&txl->tl_wait_cv); 204 mutex_destroy(&txl->tl_mutex); 205 } 206 207 static void 208 smb_net_send_free(void *arg) 209 { 210 mbuf_t *m = arg; 211 (void) m_free(m); 212 } 213 214 /* 215 * Create an mblk that wraps the passed mbuf 216 * 217 * Note we need a place to store a frtn_t for each mbuf. 218 * For M_EXT packets (most are) we have lots of unused space 219 * after the headers: M_dat.MH.MH_dat.MH_ext (a.k.a. m_ext) 220 * If not M_EXT but there's enough trailing space, just use 221 * the trailing space, otherwise convert to external type 222 * (which means copying the data, so do only if necessary). 223 * 224 * To simplify the code, the frtn_t is always located at the 225 * end of the mbuf (in space we make sure is unused). 226 */ 227 static mblk_t * 228 smb_net_wrap_mbuf(mbuf_t *mbuf) 229 { 230 frtn_t *frtn; 231 mblk_t *mblk; 232 233 if ((mbuf->m_flags & M_EXT) == 0 && 234 M_TRAILINGSPACE(mbuf) < sizeof (*frtn)) { 235 /* 236 * Convert to M_EXT type, like MCLGET(), 237 * but copy before updating mbuf->m_ext, 238 * which would otherwise overwrite data. 239 */ 240 caddr_t buf = smb_mbufcl_alloc(); 241 ASSERT(mbuf->m_len <= MLEN); 242 bcopy(mbuf->m_data, buf, mbuf->m_len); 243 mbuf->m_ext.ext_buf = buf; 244 mbuf->m_data = buf; 245 mbuf->m_flags |= M_EXT; 246 mbuf->m_ext.ext_size = MCLBYTES; 247 mbuf->m_ext.ext_free = smb_mbufcl_free; 248 } 249 250 /* 251 * Store frtn_t at the end of the mbuf data area. 252 * Note: This is the _internal_ data area (unused) 253 * not the external data pointed to by m_data. 254 */ 255 frtn = (void *) &mbuf->m_dat[MLEN - sizeof (*frtn)]; 256 257 frtn->free_func = smb_net_send_free; 258 frtn->free_arg = (caddr_t)mbuf; 259 260 mblk = esballoca_wait((void *)mbuf->m_data, mbuf->m_len, 261 BPRI_MED, frtn); 262 if (mblk != NULL) { 263 mblk->b_wptr += mbuf->m_len; 264 mblk->b_datap->db_type = M_DATA; 265 } 266 267 return (mblk); 268 } 269 270 /* 271 * This routine sends an mbuf chain by encapsulating each segment 272 * with an mblk_t setup with external storage (zero-copy). 273 * 274 * Note: the mbufs passed in are free'd via smb_net_send_free. 275 */ 276 static int 277 smb_net_send_mblks(smb_session_t *s, mbuf_t *mbuf_head) 278 { 279 struct nmsghdr msg; 280 mblk_t *mblk_head; 281 mblk_t *mblk_prev; 282 mblk_t *mblk; 283 mbuf_t *mbuf_prev; 284 mbuf_t *mbuf; 285 smb_txlst_t *txl; 286 int rc = 0; 287 288 bzero(&msg, sizeof (msg)); 289 290 mblk_prev = NULL; 291 mblk_head = NULL; 292 mbuf_prev = NULL; 293 mbuf = mbuf_head; 294 while (mbuf != NULL) { 295 mblk = smb_net_wrap_mbuf(mbuf); 296 if (mblk == NULL) { 297 rc = ENOSR; 298 break; 299 } 300 if (mblk_head == NULL) 301 mblk_head = mblk; 302 if (mblk_prev != NULL) 303 mblk_prev->b_cont = mblk; 304 305 mblk_prev = mblk; 306 mbuf_prev = mbuf; 307 mbuf = mbuf->m_next; 308 } 309 if (rc != 0) { 310 /* Bailed with ENOSR. Cleanup */ 311 if (mbuf != NULL) { 312 if (mbuf_prev != NULL) 313 mbuf_prev->m_next = NULL; 314 m_freem(mbuf); 315 } 316 if (mblk_head != NULL) 317 freemsg(mblk_head); 318 return (rc); 319 } 320 321 /* 322 * Wait for our turn to send. 323 */ 324 DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s); 325 txl = &s->s_txlst; 326 mutex_enter(&txl->tl_mutex); 327 while (txl->tl_active) 328 cv_wait(&txl->tl_wait_cv, &txl->tl_mutex); 329 txl->tl_active = B_TRUE; 330 mutex_exit(&txl->tl_mutex); 331 DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s); 332 333 /* 334 * OK, send it. 335 */ 336 rc = ksocket_sendmblk(s->sock, &msg, 0, &mblk_head, CRED()); 337 if (rc != 0) { 338 if (mblk_head != NULL) { 339 freemsg(mblk_head); 340 mblk_head = NULL; 341 } 342 } 343 344 mutex_enter(&txl->tl_mutex); 345 txl->tl_active = B_FALSE; 346 cv_signal(&txl->tl_wait_cv); 347 mutex_exit(&txl->tl_mutex); 348 349 return (rc); 350 } 351 352 /* 353 * This routine sends an mbuf chain by copying its segments 354 * (scatter/gather) via UIO. 355 * 356 * The mbuf chain is always free'd (error or not) 357 */ 358 static int 359 smb_net_send_uio(smb_session_t *s, mbuf_t *mbuf_head) 360 { 361 struct nmsghdr msg; 362 uio_t uio; 363 iovec_t iov_local[SMB_LOCAL_IOV_MAX]; 364 mbuf_t *mbuf; 365 smb_txlst_t *txl; 366 smb_vdb_t *vdb = NULL; 367 size_t sent; 368 int len, nseg, rc; 369 370 bzero(&msg, sizeof (msg)); 371 bzero(&uio, sizeof (uio)); 372 373 len = nseg = 0; 374 for (mbuf = mbuf_head; 375 mbuf != NULL; 376 mbuf = mbuf->m_next) { 377 nseg++; 378 len += mbuf->m_len; 379 } 380 381 if (nseg <= SMB_LOCAL_IOV_MAX) { 382 uio.uio_iov = iov_local; 383 uio.uio_iovcnt = SMB_LOCAL_IOV_MAX; 384 } else { 385 vdb = kmem_alloc(sizeof (*vdb), KM_SLEEP); 386 uio.uio_iov = &vdb->vdb_iovec[0]; 387 uio.uio_iovcnt = MAX_IOVEC; 388 } 389 uio.uio_resid = len; 390 391 rc = smb_mbuf_mkuio(mbuf_head, &uio); 392 if (rc != 0) 393 goto out; 394 395 DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s); 396 397 /* 398 * Wait for our turn to send. 399 */ 400 txl = &s->s_txlst; 401 mutex_enter(&txl->tl_mutex); 402 while (txl->tl_active) 403 cv_wait(&txl->tl_wait_cv, &txl->tl_mutex); 404 txl->tl_active = B_TRUE; 405 mutex_exit(&txl->tl_mutex); 406 407 DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s); 408 409 /* 410 * OK, try to send. 411 * 412 * This should block until we've sent it all, 413 * or given up due to errors (socket closed). 414 */ 415 msg.msg_iov = uio.uio_iov; 416 msg.msg_iovlen = uio.uio_iovcnt; 417 while (uio.uio_resid > 0) { 418 rc = ksocket_sendmsg(s->sock, &msg, 0, &sent, CRED()); 419 if (rc != 0) 420 break; 421 uio.uio_resid -= sent; 422 } 423 424 mutex_enter(&txl->tl_mutex); 425 txl->tl_active = B_FALSE; 426 cv_signal(&txl->tl_wait_cv); 427 mutex_exit(&txl->tl_mutex); 428 429 out: 430 if (vdb != NULL) 431 kmem_free(vdb, sizeof (*vdb)); 432 m_freem(mbuf_head); 433 return (rc); 434 } 435 436 /* 437 * This has an optional code path calling ksocket_sendmblk, 438 * which is faster than ksocket_sendmsg (UIO copying) in some 439 * configurations, but needs work before it's uniformly faster. 440 * In particular, the ksocket_sendmblk code path probably needs 441 * to do more like socopyinuio etc, checking the send socket 442 * SO_SND_BUFINFO, SO_SND_COPYAVOID, etc. to find out what is 443 * the preferred MSS, header space, copying preference, etc. 444 * 445 * As it is, this works well with some NIC drivers, particularly 446 * with MTU=9000 as is typical in high performance setups, so 447 * this remains available via this tunable for now. 448 */ 449 int smb_send_mblks = 0; 450 451 /* 452 * smb_net_send_mbufs 453 * 454 * Send the buf chain using either mblk encapsulation (zero-copy) 455 * or via scatter/gather UIO vector, based on the setting. 456 */ 457 int 458 smb_net_send_mbufs(smb_session_t *s, mbuf_t *mbuf_head) 459 { 460 int rc; 461 462 if (smb_send_mblks != 0) { 463 rc = smb_net_send_mblks(s, mbuf_head); 464 } else { 465 rc = smb_net_send_uio(s, mbuf_head); 466 } 467 return (rc); 468 } 469