1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2022 RackTop Systems, Inc.
23 * Copyright 2011-2021 Tintri by DDN, Inc. All rights reserved.
24 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/time.h>
33 #include <sys/varargs.h>
34 #include <sys/modctl.h>
35 #include <sys/pathname.h>
36 #include <sys/vnode.h>
37 #include <sys/socket.h>
38 #include <sys/ksocket.h>
39 #include <sys/stream.h>
40 #include <sys/strsubr.h>
41
42 #include <smbsrv/smb_vops.h>
43 #include <smbsrv/smb.h>
44 #include <smbsrv/smb_kproto.h>
45 #include <smbsrv/smb_kstat.h>
46
47 /*
48 * How many iovec we'll handle as a local array (no allocation)
49 * See also IOV_MAX_STACK <sys/limits.h> but we need this to
50 * work also with _FAKE_KERNEL
51 */
52 #define SMB_LOCAL_IOV_MAX 16
53
54 /*
55 * SMB Network Socket API
56 *
57 * smb_socreate: Creates an socket based on domain/type.
58 * smb_soshutdown: Disconnect a socket created with smb_socreate
59 * smb_sodestroy: Release resources associated with a socket
60 * smb_sosend: Send the contents of a buffer on a socket
61 * smb_sorecv: Receive data into a buffer from a socket
62 * smb_iov_sosend: Send the contents of an iovec on a socket
63 * smb_iov_sorecv: Receive data into an iovec from a socket
64 */
65
66 ksocket_t
smb_socreate(int domain,int type,int protocol)67 smb_socreate(int domain, int type, int protocol)
68 {
69 ksocket_t sock;
70 int err = 0;
71
72 err = ksocket_socket(&sock, domain, type, protocol, KSOCKET_SLEEP,
73 CRED());
74
75 if (err != 0)
76 return (NULL);
77 else
78 return (sock);
79 }
80
81 /*
82 * smb_soshutdown will disconnect the socket and prevent subsequent PDU
83 * reception and transmission. The sonode still exists but its state
84 * gets modified to indicate it is no longer connected. Calls to
85 * smb_sorecv/smb_iov_sorecv will return so smb_soshutdown can be used
86 * regain control of a thread stuck in smb_sorecv.
87 */
88 void
smb_soshutdown(ksocket_t so)89 smb_soshutdown(ksocket_t so)
90 {
91 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
92 }
93
94 /*
95 * smb_sodestroy releases all resources associated with a socket previously
96 * created with smb_socreate. The socket must be shutdown using smb_soshutdown
97 * before the socket is destroyed with smb_sodestroy, otherwise undefined
98 * behavior will result.
99 */
100 void
smb_sodestroy(ksocket_t so)101 smb_sodestroy(ksocket_t so)
102 {
103 (void) ksocket_close(so, CRED());
104 }
105
106 int
smb_sorecv(ksocket_t so,void * msg,size_t len)107 smb_sorecv(ksocket_t so, void *msg, size_t len)
108 {
109 size_t recvd;
110 int err;
111
112 ASSERT(so != NULL);
113 ASSERT(len != 0);
114
115 if ((err = ksocket_recv(so, msg, len, MSG_WAITALL, &recvd,
116 CRED())) != 0) {
117 return (err);
118 }
119
120 /* Successful receive */
121 return ((recvd == len) ? 0 : -1);
122 }
123
124 /*
125 * Receive a message as an mbuf chain (returned in *mpp)
126 * where the length requested is len.
127 *
128 * Some day hopefully this will be able to receive an actual
129 * mblk chain from the network stack (without copying), and
130 * either wrap those to create mbufs, or use mblks directly.
131 * For now, we allocate buffers here to recv into.
132 */
133 int
smb_net_recv_mbufs(smb_session_t * s,mbuf_t ** mpp,size_t len)134 smb_net_recv_mbufs(smb_session_t *s, mbuf_t **mpp, size_t len)
135 {
136 struct nmsghdr msg;
137 uio_t uio;
138 iovec_t iov[SMB_LOCAL_IOV_MAX];
139 mbuf_t *mhead = NULL;
140 size_t rlen;
141 int rc;
142
143 bzero(&msg, sizeof (msg));
144 bzero(&uio, sizeof (uio));
145 ASSERT(len > 0);
146
147 mhead = smb_mbuf_alloc_chain(len);
148
149 uio.uio_resid = len;
150 uio.uio_iov = iov;
151 uio.uio_iovcnt = SMB_LOCAL_IOV_MAX;
152
153 rc = smb_mbuf_mkuio(mhead, &uio);
154 if (rc != 0)
155 goto errout;
156
157 msg.msg_iov = uio.uio_iov;
158 msg.msg_iovlen = uio.uio_iovcnt;
159 rlen = len;
160 rc = ksocket_recvmsg(s->sock, &msg, MSG_WAITALL, &rlen, CRED());
161 if (rc != 0)
162 goto errout;
163 if (rlen != len) {
164 rc = SET_ERROR(EIO);
165 goto errout;
166 }
167
168 *mpp = mhead;
169 return (rc);
170
171 errout:
172 m_freem(mhead);
173 return (rc);
174 }
175
176 /*
177 * smb_net_txl_constructor
178 *
179 * Transmit list constructor
180 */
181 void
smb_net_txl_constructor(smb_txlst_t * txl)182 smb_net_txl_constructor(smb_txlst_t *txl)
183 {
184 ASSERT(txl->tl_magic != SMB_TXLST_MAGIC);
185
186 mutex_init(&txl->tl_mutex, NULL, MUTEX_DEFAULT, NULL);
187 cv_init(&txl->tl_wait_cv, NULL, CV_DEFAULT, NULL);
188 txl->tl_active = B_FALSE;
189 txl->tl_magic = SMB_TXLST_MAGIC;
190 }
191
192 /*
193 * smb_net_txl_destructor
194 *
195 * Transmit list destructor
196 */
197 void
smb_net_txl_destructor(smb_txlst_t * txl)198 smb_net_txl_destructor(smb_txlst_t *txl)
199 {
200 ASSERT(txl->tl_magic == SMB_TXLST_MAGIC);
201
202 txl->tl_magic = 0;
203 cv_destroy(&txl->tl_wait_cv);
204 mutex_destroy(&txl->tl_mutex);
205 }
206
207 static void
smb_net_send_free(void * arg)208 smb_net_send_free(void *arg)
209 {
210 mbuf_t *m = arg;
211 (void) m_free(m);
212 }
213
214 /*
215 * Create an mblk that wraps the passed mbuf
216 *
217 * Note we need a place to store a frtn_t for each mbuf.
218 * For M_EXT packets (most are) we have lots of unused space
219 * after the headers: M_dat.MH.MH_dat.MH_ext (a.k.a. m_ext)
220 * If not M_EXT but there's enough trailing space, just use
221 * the trailing space, otherwise convert to external type
222 * (which means copying the data, so do only if necessary).
223 *
224 * To simplify the code, the frtn_t is always located at the
225 * end of the mbuf (in space we make sure is unused).
226 */
227 static mblk_t *
smb_net_wrap_mbuf(mbuf_t * mbuf)228 smb_net_wrap_mbuf(mbuf_t *mbuf)
229 {
230 frtn_t *frtn;
231 mblk_t *mblk;
232
233 if ((mbuf->m_flags & M_EXT) == 0 &&
234 M_TRAILINGSPACE(mbuf) < sizeof (*frtn)) {
235 /*
236 * Convert to M_EXT type, like MCLGET(),
237 * but copy before updating mbuf->m_ext,
238 * which would otherwise overwrite data.
239 */
240 caddr_t buf = smb_mbufcl_alloc();
241 ASSERT(mbuf->m_len <= MLEN);
242 bcopy(mbuf->m_data, buf, mbuf->m_len);
243 mbuf->m_ext.ext_buf = buf;
244 mbuf->m_data = buf;
245 mbuf->m_flags |= M_EXT;
246 mbuf->m_ext.ext_size = MCLBYTES;
247 mbuf->m_ext.ext_free = smb_mbufcl_free;
248 }
249
250 /*
251 * Store frtn_t at the end of the mbuf data area.
252 * Note: This is the _internal_ data area (unused)
253 * not the external data pointed to by m_data.
254 */
255 frtn = (void *) &mbuf->m_dat[MLEN - sizeof (*frtn)];
256
257 frtn->free_func = smb_net_send_free;
258 frtn->free_arg = (caddr_t)mbuf;
259
260 mblk = esballoca_wait((void *)mbuf->m_data, mbuf->m_len,
261 BPRI_MED, frtn);
262 if (mblk != NULL) {
263 mblk->b_wptr += mbuf->m_len;
264 mblk->b_datap->db_type = M_DATA;
265 }
266
267 return (mblk);
268 }
269
270 /*
271 * This routine sends an mbuf chain by encapsulating each segment
272 * with an mblk_t setup with external storage (zero-copy).
273 *
274 * Note: the mbufs passed in are free'd via smb_net_send_free.
275 */
276 static int
smb_net_send_mblks(smb_session_t * s,mbuf_t * mbuf_head)277 smb_net_send_mblks(smb_session_t *s, mbuf_t *mbuf_head)
278 {
279 struct nmsghdr msg;
280 mblk_t *mblk_head;
281 mblk_t *mblk_prev;
282 mblk_t *mblk;
283 mbuf_t *mbuf_prev;
284 mbuf_t *mbuf;
285 smb_txlst_t *txl;
286 int rc = 0;
287
288 bzero(&msg, sizeof (msg));
289
290 mblk_prev = NULL;
291 mblk_head = NULL;
292 mbuf_prev = NULL;
293 mbuf = mbuf_head;
294 while (mbuf != NULL) {
295 mblk = smb_net_wrap_mbuf(mbuf);
296 if (mblk == NULL) {
297 rc = ENOSR;
298 break;
299 }
300 if (mblk_head == NULL)
301 mblk_head = mblk;
302 if (mblk_prev != NULL)
303 mblk_prev->b_cont = mblk;
304
305 mblk_prev = mblk;
306 mbuf_prev = mbuf;
307 mbuf = mbuf->m_next;
308 }
309 if (rc != 0) {
310 /* Bailed with ENOSR. Cleanup */
311 if (mbuf != NULL) {
312 if (mbuf_prev != NULL)
313 mbuf_prev->m_next = NULL;
314 m_freem(mbuf);
315 }
316 if (mblk_head != NULL)
317 freemsg(mblk_head);
318 return (rc);
319 }
320
321 /*
322 * Wait for our turn to send.
323 */
324 DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s);
325 txl = &s->s_txlst;
326 mutex_enter(&txl->tl_mutex);
327 while (txl->tl_active)
328 cv_wait(&txl->tl_wait_cv, &txl->tl_mutex);
329 txl->tl_active = B_TRUE;
330 mutex_exit(&txl->tl_mutex);
331 DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s);
332
333 /*
334 * OK, send it.
335 */
336 rc = ksocket_sendmblk(s->sock, &msg, 0, &mblk_head, CRED());
337 if (rc != 0) {
338 if (mblk_head != NULL) {
339 freemsg(mblk_head);
340 mblk_head = NULL;
341 }
342 }
343
344 mutex_enter(&txl->tl_mutex);
345 txl->tl_active = B_FALSE;
346 cv_signal(&txl->tl_wait_cv);
347 mutex_exit(&txl->tl_mutex);
348
349 return (rc);
350 }
351
352 /*
353 * This routine sends an mbuf chain by copying its segments
354 * (scatter/gather) via UIO.
355 *
356 * The mbuf chain is always free'd (error or not)
357 */
358 static int
smb_net_send_uio(smb_session_t * s,mbuf_t * mbuf_head)359 smb_net_send_uio(smb_session_t *s, mbuf_t *mbuf_head)
360 {
361 struct nmsghdr msg;
362 uio_t uio;
363 iovec_t iov_local[SMB_LOCAL_IOV_MAX];
364 mbuf_t *mbuf;
365 smb_txlst_t *txl;
366 smb_vdb_t *vdb = NULL;
367 size_t sent;
368 int len, nseg, rc;
369
370 bzero(&msg, sizeof (msg));
371 bzero(&uio, sizeof (uio));
372
373 len = nseg = 0;
374 for (mbuf = mbuf_head;
375 mbuf != NULL;
376 mbuf = mbuf->m_next) {
377 nseg++;
378 len += mbuf->m_len;
379 }
380
381 if (nseg <= SMB_LOCAL_IOV_MAX) {
382 uio.uio_iov = iov_local;
383 uio.uio_iovcnt = SMB_LOCAL_IOV_MAX;
384 } else {
385 vdb = kmem_alloc(sizeof (*vdb), KM_SLEEP);
386 uio.uio_iov = &vdb->vdb_iovec[0];
387 uio.uio_iovcnt = MAX_IOVEC;
388 }
389 uio.uio_resid = len;
390
391 rc = smb_mbuf_mkuio(mbuf_head, &uio);
392 if (rc != 0)
393 goto out;
394
395 DTRACE_PROBE1(send__wait__start, struct smb_session_t *, s);
396
397 /*
398 * Wait for our turn to send.
399 */
400 txl = &s->s_txlst;
401 mutex_enter(&txl->tl_mutex);
402 while (txl->tl_active)
403 cv_wait(&txl->tl_wait_cv, &txl->tl_mutex);
404 txl->tl_active = B_TRUE;
405 mutex_exit(&txl->tl_mutex);
406
407 DTRACE_PROBE1(send__wait__done, struct smb_session_t *, s);
408
409 /*
410 * OK, try to send.
411 *
412 * This should block until we've sent it all,
413 * or given up due to errors (socket closed).
414 */
415 msg.msg_iov = uio.uio_iov;
416 msg.msg_iovlen = uio.uio_iovcnt;
417 while (uio.uio_resid > 0) {
418 rc = ksocket_sendmsg(s->sock, &msg, 0, &sent, CRED());
419 if (rc != 0)
420 break;
421 uio.uio_resid -= sent;
422 }
423
424 mutex_enter(&txl->tl_mutex);
425 txl->tl_active = B_FALSE;
426 cv_signal(&txl->tl_wait_cv);
427 mutex_exit(&txl->tl_mutex);
428
429 out:
430 if (vdb != NULL)
431 kmem_free(vdb, sizeof (*vdb));
432 m_freem(mbuf_head);
433 return (rc);
434 }
435
436 /*
437 * This has an optional code path calling ksocket_sendmblk,
438 * which is faster than ksocket_sendmsg (UIO copying) in some
439 * configurations, but needs work before it's uniformly faster.
440 * In particular, the ksocket_sendmblk code path probably needs
441 * to do more like socopyinuio etc, checking the send socket
442 * SO_SND_BUFINFO, SO_SND_COPYAVOID, etc. to find out what is
443 * the preferred MSS, header space, copying preference, etc.
444 *
445 * As it is, this works well with some NIC drivers, particularly
446 * with MTU=9000 as is typical in high performance setups, so
447 * this remains available via this tunable for now.
448 */
449 int smb_send_mblks = 0;
450
451 /*
452 * smb_net_send_mbufs
453 *
454 * Send the buf chain using either mblk encapsulation (zero-copy)
455 * or via scatter/gather UIO vector, based on the setting.
456 */
457 int
smb_net_send_mbufs(smb_session_t * s,mbuf_t * mbuf_head)458 smb_net_send_mbufs(smb_session_t *s, mbuf_t *mbuf_head)
459 {
460 int rc;
461
462 if (smb_send_mblks != 0) {
463 rc = smb_net_send_mblks(s, mbuf_head);
464 } else {
465 rc = smb_net_send_uio(s, mbuf_head);
466 }
467 return (rc);
468 }
469