xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_net.c (revision 11994f6f6fa6fc668363b92c6b6ef60b2e75ebd6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2022 RackTop Systems, Inc.
23  * Copyright 2011-2021 Tintri by DDN, Inc. All rights reserved.
24  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/time.h>
33 #include <sys/varargs.h>
34 #include <sys/modctl.h>
35 #include <sys/pathname.h>
36 #include <sys/vnode.h>
37 #include <sys/socket.h>
38 #include <sys/ksocket.h>
39 #include <sys/stream.h>
40 #include <sys/strsubr.h>
41 
42 #include <smbsrv/smb_vops.h>
43 #include <smbsrv/smb.h>
44 #include <smbsrv/smb_kproto.h>
45 #include <smbsrv/smb_kstat.h>
46 
47 /*
48  * How many iovec we'll handle as a local array (no allocation)
49  * See also IOV_MAX_STACK <sys/limits.h> but we need this to
50  * work also with _FAKE_KERNEL
51  */
52 #define	SMB_LOCAL_IOV_MAX	16
53 
54 /*
55  * SMB Network Socket API
56  *
57  * smb_socreate:	Creates an socket based on domain/type.
58  * smb_soshutdown:	Disconnect a socket created with smb_socreate
59  * smb_sodestroy:	Release resources associated with a socket
60  * smb_sosend:		Send the contents of a buffer on a socket
61  * smb_sorecv:		Receive data into a buffer from a socket
62  * smb_iov_sosend:	Send the contents of an iovec on a socket
63  * smb_iov_sorecv:	Receive data into an iovec from a socket
64  */
65 
66 ksocket_t
67 smb_socreate(int domain, int type, int protocol)
68 {
69 	ksocket_t	sock;
70 	int		err = 0;
71 
72 	err = ksocket_socket(&sock, domain, type, protocol, KSOCKET_SLEEP,
73 	    CRED());
74 
75 	if (err != 0)
76 		return (NULL);
77 	else
78 		return (sock);
79 }
80 
81 /*
82  * smb_soshutdown will disconnect the socket and prevent subsequent PDU
83  * reception and transmission.  The sonode still exists but its state
84  * gets modified to indicate it is no longer connected.  Calls to
85  * smb_sorecv/smb_iov_sorecv will return so smb_soshutdown can be used
86  * regain control of a thread stuck in smb_sorecv.
87  */
88 void
89 smb_soshutdown(ksocket_t so)
90 {
91 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
92 }
93 
94 /*
95  * smb_sodestroy releases all resources associated with a socket previously
96  * created with smb_socreate.  The socket must be shutdown using smb_soshutdown
97  * before the socket is destroyed with smb_sodestroy, otherwise undefined
98  * behavior will result.
99  */
100 void
101 smb_sodestroy(ksocket_t so)
102 {
103 	(void) ksocket_close(so, CRED());
104 }
105 
106 int
107 smb_sorecv(ksocket_t so, void *msg, size_t len)
108 {
109 	size_t recvd;
110 	int err;
111 
112 	ASSERT(so != NULL);
113 	ASSERT(len != 0);
114 
115 	if ((err = ksocket_recv(so, msg, len, MSG_WAITALL, &recvd,
116 	    CRED())) != 0) {
117 		return (err);
118 	}
119 
120 	/* Successful receive */
121 	return ((recvd == len) ? 0 : -1);
122 }
123 
124 /*
125  * Receive a message as an mbuf chain (returned in *mpp)
126  * where the length requested is len.
127  *
128  * Some day hopefully this will be able to receive an actual
129  * mblk chain from the network stack (without copying), and
130  * either wrap those to create mbufs, or use mblks directly.
131  * For now, we allocate buffers here to recv into.
132  */
133 int
134 smb_net_recv_mbufs(smb_session_t *s, mbuf_t **mpp, size_t len)
135 {
136 	struct nmsghdr	msg;
137 	uio_t	uio;
138 	iovec_t iov[SMB_LOCAL_IOV_MAX];
139 	mbuf_t	*mhead = NULL;
140 	size_t	rlen;
141 	int	rc;
142 
143 	bzero(&msg, sizeof (msg));
144 	bzero(&uio, sizeof (uio));
145 	ASSERT(len > 0);
146 
147 	mhead = smb_mbuf_alloc_chain(len);
148 
149 	uio.uio_resid = len;
150 	uio.uio_iov = iov;
151 	uio.uio_iovcnt = SMB_LOCAL_IOV_MAX;
152 
153 	rc = smb_mbuf_mkuio(mhead, &uio);
154 	if (rc != 0)
155 		goto errout;
156 
157 	msg.msg_iov = uio.uio_iov;
158 	msg.msg_iovlen = uio.uio_iovcnt;
159 	rlen = len;
160 	rc = ksocket_recvmsg(s->sock, &msg, MSG_WAITALL, &rlen, CRED());
161 	if (rc != 0)
162 		goto errout;
163 	if (rlen != len) {
164 		rc = SET_ERROR(EIO);
165 		goto errout;
166 	}
167 
168 	*mpp = mhead;
169 	return (rc);
170 
171 errout:
172 	m_freem(mhead);
173 	return (rc);
174 }
175 
176 /*
177  * smb_net_txl_constructor
178  *
179  *	Transmit list constructor
180  */
181 void
182 smb_net_txl_constructor(smb_txlst_t *txl)
183 {
184 	ASSERT(txl->tl_magic != SMB_TXLST_MAGIC);
185 
186 	mutex_init(&txl->tl_mutex, NULL, MUTEX_DEFAULT, NULL);
187 	cv_init(&txl->tl_wait_cv, NULL, CV_DEFAULT, NULL);
188 	txl->tl_active = B_FALSE;
189 	txl->tl_magic = SMB_TXLST_MAGIC;
190 }
191 
192 /*
193  * smb_net_txl_destructor
194  *
195  *	Transmit list destructor
196  */
197 void
198 smb_net_txl_destructor(smb_txlst_t *txl)
199 {
200 	ASSERT(txl->tl_magic == SMB_TXLST_MAGIC);
201 
202 	txl->tl_magic = 0;
203 	cv_destroy(&txl->tl_wait_cv);
204 	mutex_destroy(&txl->tl_mutex);
205 }
206 
207 static void
208 smb_net_send_free(void *arg)
209 {
210 	mbuf_t *m = arg;
211 	(void) m_free(m);
212 }
213 
214 /*
215  * Create an mblk that wraps the passed mbuf
216  *
217  * Note we need a place to store a frtn_t for each mbuf.
218  * For M_EXT packets (most are) we have lots of unused space
219  * after the headers: M_dat.MH.MH_dat.MH_ext (a.k.a. m_ext)
220  * If not M_EXT but there's enough trailing space, just use
221  * the trailing space, otherwise convert to external type
222  * (which means copying the data, so do only if necessary).
223  *
224  * To simplify the code, the frtn_t is always located at the
225  * end of the mbuf (in space we make sure is unused).
226  */
227 static mblk_t *
228 smb_net_wrap_mbuf(mbuf_t *mbuf)
229 {
230 	frtn_t		*frtn;
231 	mblk_t		*mblk;
232 
233 	if ((mbuf->m_flags & M_EXT) == 0 &&
234 	    M_TRAILINGSPACE(mbuf) < sizeof (*frtn)) {
235 		/*
236 		 * Convert to M_EXT type, like MCLGET(),
237 		 * but copy before updating mbuf->m_ext,
238 		 * which would otherwise overwrite data.
239 		 */
240 		caddr_t buf = smb_mbufcl_alloc();
241 		ASSERT(mbuf->m_len <= MLEN);
242 		bcopy(mbuf->m_data, buf, mbuf->m_len);
243 		mbuf->m_ext.ext_buf = buf;
244 		mbuf->m_data = buf;
245 		mbuf->m_flags |= M_EXT;
246 		mbuf->m_ext.ext_size = MCLBYTES;
247 		mbuf->m_ext.ext_free = smb_mbufcl_free;
248 	}
249 
250 	/*
251 	 * Store frtn_t at the end of the mbuf data area.
252 	 * Note: This is the _internal_ data area (unused)
253 	 * not the external data pointed to by m_data.
254 	 */
255 	frtn = (void *) &mbuf->m_dat[MLEN - sizeof (*frtn)];
256 
257 	frtn->free_func = smb_net_send_free;
258 	frtn->free_arg = (caddr_t)mbuf;
259 
260 	mblk = esballoca_wait((void *)mbuf->m_data, mbuf->m_len,
261 	    BPRI_MED, frtn);
262 	if (mblk != NULL) {
263 		mblk->b_wptr += mbuf->m_len;
264 		mblk->b_datap->db_type = M_DATA;
265 	}
266 
267 	return (mblk);
268 }
269 
270 /*
271  * This routine sends an mbuf chain by encapsulating each segment
272  * with an mblk_t setup with external storage (zero-copy).
273  *
274  * Note: the mbufs passed in are free'd via smb_net_send_free.
275  */
276 static int
277 smb_net_send_mblks(smb_session_t *s, mbuf_t *mbuf_head)
278 {
279 	struct nmsghdr	msg;
280 	mblk_t	*mblk_head;
281 	mblk_t	*mblk_prev;
282 	mblk_t	*mblk;
283 	mbuf_t	*mbuf_prev;
284 	mbuf_t	*mbuf;
285 	smb_txlst_t *txl;
286 	int	rc = 0;
287 
288 	bzero(&msg, sizeof (msg));
289 
290 	mblk_prev = NULL;
291 	mblk_head = NULL;
292 	mbuf_prev = NULL;
293 	mbuf = mbuf_head;
294 	while (mbuf != NULL) {
295 		mblk = smb_net_wrap_mbuf(mbuf);
296 		if (mblk == NULL) {
297 			rc = ENOSR;
298 			break;
299 		}
300 		if (mblk_head == NULL)
301 			mblk_head = mblk;
302 		if (mblk_prev != NULL)
303 			mblk_prev->b_cont = mblk;
304 
305 		mblk_prev = mblk;
306 		mbuf_prev = mbuf;
307 		mbuf = mbuf->m_next;
308 	}
309 	if (rc != 0) {
310 		/* Bailed with ENOSR. Cleanup */
311 		if (mbuf != NULL) {
312 			if (mbuf_prev != NULL)
313 				mbuf_prev->m_next = NULL;
314 			m_freem(mbuf);
315 		}
316 		if (mblk_head != NULL)
317 			freemsg(mblk_head);
318 		return (rc);
319 	}
320 
321 	/*
322 	 * Wait for our turn to send.
323 	 */
324 	DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s);
325 	txl = &s->s_txlst;
326 	mutex_enter(&txl->tl_mutex);
327 	while (txl->tl_active)
328 		cv_wait(&txl->tl_wait_cv, &txl->tl_mutex);
329 	txl->tl_active = B_TRUE;
330 	mutex_exit(&txl->tl_mutex);
331 	DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s);
332 
333 	/*
334 	 * OK, send it.
335 	 */
336 	rc = ksocket_sendmblk(s->sock, &msg, 0, &mblk_head, CRED());
337 	if (rc != 0) {
338 		if (mblk_head != NULL) {
339 			freemsg(mblk_head);
340 			mblk_head = NULL;
341 		}
342 	}
343 
344 	mutex_enter(&txl->tl_mutex);
345 	txl->tl_active = B_FALSE;
346 	cv_signal(&txl->tl_wait_cv);
347 	mutex_exit(&txl->tl_mutex);
348 
349 	return (rc);
350 }
351 
352 /*
353  * This routine sends an mbuf chain by copying its segments
354  * (scatter/gather) via UIO.
355  *
356  * The mbuf chain is always free'd (error or not)
357  */
358 static int
359 smb_net_send_uio(smb_session_t *s, mbuf_t *mbuf_head)
360 {
361 	struct nmsghdr	msg;
362 	uio_t	uio;
363 	iovec_t iov_local[SMB_LOCAL_IOV_MAX];
364 	mbuf_t	*mbuf;
365 	smb_txlst_t *txl;
366 	smb_vdb_t *vdb = NULL;
367 	size_t sent;
368 	int	len, nseg, rc;
369 
370 	bzero(&msg, sizeof (msg));
371 	bzero(&uio, sizeof (uio));
372 
373 	len = nseg = 0;
374 	for (mbuf = mbuf_head;
375 	     mbuf != NULL;
376 	     mbuf = mbuf->m_next) {
377 		nseg++;
378 		len += mbuf->m_len;
379 	}
380 
381 	if (nseg <= SMB_LOCAL_IOV_MAX) {
382 		uio.uio_iov = iov_local;
383 		uio.uio_iovcnt = SMB_LOCAL_IOV_MAX;
384 	} else {
385 		vdb = kmem_alloc(sizeof (*vdb), KM_SLEEP);
386 		uio.uio_iov = &vdb->vdb_iovec[0];
387 		uio.uio_iovcnt = MAX_IOVEC;
388 	}
389 	uio.uio_resid = len;
390 
391 	rc = smb_mbuf_mkuio(mbuf_head, &uio);
392 	if (rc != 0)
393 		goto out;
394 
395 	DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s);
396 
397 	/*
398 	 * Wait for our turn to send.
399 	 */
400 	txl = &s->s_txlst;
401 	mutex_enter(&txl->tl_mutex);
402 	while (txl->tl_active)
403 		cv_wait(&txl->tl_wait_cv, &txl->tl_mutex);
404 	txl->tl_active = B_TRUE;
405 	mutex_exit(&txl->tl_mutex);
406 
407 	DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s);
408 
409 	/*
410 	 * OK, try to send.
411 	 *
412 	 * This should block until we've sent it all,
413 	 * or given up due to errors (socket closed).
414 	 */
415 	msg.msg_iov = uio.uio_iov;
416 	msg.msg_iovlen = uio.uio_iovcnt;
417 	while (uio.uio_resid > 0) {
418 		rc = ksocket_sendmsg(s->sock, &msg, 0, &sent, CRED());
419 		if (rc != 0)
420 			break;
421 		uio.uio_resid -= sent;
422 	}
423 
424 	mutex_enter(&txl->tl_mutex);
425 	txl->tl_active = B_FALSE;
426 	cv_signal(&txl->tl_wait_cv);
427 	mutex_exit(&txl->tl_mutex);
428 
429 out:
430 	if (vdb != NULL)
431 		kmem_free(vdb, sizeof (*vdb));
432 	m_freem(mbuf_head);
433 	return (rc);
434 }
435 
436 /*
437  * This has an optional code path calling ksocket_sendmblk,
438  * which is faster than ksocket_sendmsg (UIO copying) in some
439  * configurations, but needs work before it's uniformly faster.
440  * In particular, the ksocket_sendmblk code path probably needs
441  * to do more like socopyinuio etc, checking the send socket
442  * SO_SND_BUFINFO, SO_SND_COPYAVOID, etc. to find out what is
443  * the preferred MSS, header space, copying preference, etc.
444  *
445  * As it is, this works well with some NIC drivers, particularly
446  * with MTU=9000 as is typical in high performance setups, so
447  * this remains available via this tunable for now.
448  */
449 int smb_send_mblks = 0;
450 
451 /*
452  * smb_net_send_mbufs
453  *
454  * Send the buf chain using either mblk encapsulation (zero-copy)
455  * or via scatter/gather UIO vector, based on the setting.
456  */
457 int
458 smb_net_send_mbufs(smb_session_t *s, mbuf_t *mbuf_head)
459 {
460 	int rc;
461 
462 	if (smb_send_mblks != 0) {
463 		rc = smb_net_send_mblks(s, mbuf_head);
464 	} else {
465 		rc = smb_net_send_uio(s, mbuf_head);
466 	}
467 	return (rc);
468 }
469