1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Sebastian Wiedenroth
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
33 #include <sys/vfs.h>
34 #include <sys/policy.h>
35 #include <sys/modctl.h>
36
37 #include <sys/sunddi.h>
38
39 #include <sys/strsun.h>
40 #include <sys/stropts.h>
41 #include <sys/strsubr.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/uio.h>
45
46 #include <inet/ipclassifier.h>
47 #include <fs/sockfs/sockcommon.h>
48 #include <fs/sockfs/sockfilter_impl.h>
49 #include <fs/sockfs/nl7c.h>
50 #include <fs/sockfs/socktpi.h>
51 #include <fs/sockfs/sodirect.h>
52 #include <inet/ip.h>
53
54 extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
55
56 /*
57 * Common socket access functions.
58 *
59 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
60 * the socket_xxx() function should be used.
61 */
62
63 /*
64 * Try to create a new sonode of the requested <family, type, protocol>.
65 */
66 /* ARGSUSED */
67 struct sonode *
socket_create(int family,int type,int protocol,char * devpath,char * mod,int flags,int version,struct cred * cr,int * errorp)68 socket_create(int family, int type, int protocol, char *devpath, char *mod,
69 int flags, int version, struct cred *cr, int *errorp)
70 {
71 struct sonode *so;
72 struct sockparams *sp = NULL;
73 int saved_error;
74
75 /*
76 * Look for a sockparams entry that match the given criteria.
77 * solookup() returns with the entry held.
78 */
79 *errorp = solookup(family, type, protocol, &sp);
80 saved_error = *errorp;
81 if (sp == NULL) {
82 int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
83 /*
84 * There is no matching sockparams entry. An ephemeral entry is
85 * created if the caller specifies a device or a socket module.
86 */
87 if (devpath != NULL) {
88 saved_error = 0;
89 sp = sockparams_hold_ephemeral_bydev(family, type,
90 protocol, devpath, kmflags, errorp);
91 } else if (mod != NULL) {
92 saved_error = 0;
93 sp = sockparams_hold_ephemeral_bymod(family, type,
94 protocol, mod, kmflags, errorp);
95 } else {
96 *errorp = solookup(family, type, 0, &sp);
97 }
98
99 if (sp == NULL) {
100 if (saved_error && (*errorp == EPROTONOSUPPORT ||
101 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
102 *errorp = saved_error;
103 return (NULL);
104 }
105 }
106
107 ASSERT(sp->sp_smod_info != NULL);
108 ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
109 sp->sp_stats.sps_ncreate.value.ui64++;
110 so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
111 protocol, version, flags, errorp, cr);
112 if (so == NULL) {
113 SOCKPARAMS_DEC_REF(sp);
114 } else {
115 if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
116 /* Cannot fail, only bumps so_count */
117 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
118 } else {
119 if (saved_error && (*errorp == EPROTONOSUPPORT ||
120 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
121 *errorp = saved_error;
122 socket_destroy(so);
123 so = NULL;
124 }
125 }
126 return (so);
127 }
128
129 struct sonode *
socket_newconn(struct sonode * parent,sock_lower_handle_t lh,sock_downcalls_t * dc,int flags,int * errorp)130 socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
131 sock_downcalls_t *dc, int flags, int *errorp)
132 {
133 struct sonode *so;
134 struct sockparams *sp;
135 struct cred *cr;
136
137 if ((cr = CRED()) == NULL)
138 cr = kcred;
139
140 sp = parent->so_sockparams;
141 ASSERT(sp != NULL);
142
143 sp->sp_stats.sps_ncreate.value.ui64++;
144 so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
145 parent->so_type, parent->so_protocol, parent->so_version, flags,
146 errorp, cr);
147 if (so != NULL) {
148 SOCKPARAMS_INC_REF(sp);
149
150 so->so_proto_handle = lh;
151 so->so_downcalls = dc;
152 /*
153 * This function may be called in interrupt context, and CRED()
154 * will be NULL. In this case, pass in kcred.
155 */
156 if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
157 /* Cannot fail, only bumps so_count */
158 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
159 } else {
160 socket_destroy(so);
161 so = NULL;
162 }
163 }
164
165 return (so);
166 }
167
168 /*
169 * Bind local endpoint.
170 */
171 int
socket_bind(struct sonode * so,struct sockaddr * name,socklen_t namelen,int flags,cred_t * cr)172 socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
173 int flags, cred_t *cr)
174 {
175 return (SOP_BIND(so, name, namelen, flags, cr));
176 }
177
178 /*
179 * Turn socket into a listen socket.
180 */
181 int
socket_listen(struct sonode * so,int backlog,cred_t * cr)182 socket_listen(struct sonode *so, int backlog, cred_t *cr)
183 {
184 if (backlog < 0) {
185 backlog = 0;
186 }
187
188 /*
189 * Use the same qlimit as in BSD. BSD checks the qlimit
190 * before queuing the next connection implying that a
191 * listen(sock, 0) allows one connection to be queued.
192 * BSD also uses 1.5 times the requested backlog.
193 *
194 * XNS Issue 4 required a strict interpretation of the backlog.
195 * This has been waived subsequently for Issue 4 and the change
196 * incorporated in XNS Issue 5. So we aren't required to do
197 * anything special for XPG apps.
198 */
199 if (backlog >= (INT_MAX - 1) / 3)
200 backlog = INT_MAX;
201 else
202 backlog = backlog * 3 / 2 + 1;
203
204 return (SOP_LISTEN(so, backlog, cr));
205 }
206
207 /*
208 * Accept incoming connection.
209 */
210 int
socket_accept(struct sonode * lso,int fflag,cred_t * cr,struct sonode ** nsop)211 socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
212 {
213 return (SOP_ACCEPT(lso, fflag, cr, nsop));
214 }
215
216 /*
217 * Active open.
218 */
219 int
socket_connect(struct sonode * so,struct sockaddr * name,socklen_t namelen,int fflag,int flags,cred_t * cr)220 socket_connect(struct sonode *so, struct sockaddr *name,
221 socklen_t namelen, int fflag, int flags, cred_t *cr)
222 {
223 int error;
224
225 /*
226 * Handle a connect to a name parameter of type AF_UNSPEC like a
227 * connect to a null address. This is the portable method to
228 * unconnect a socket.
229 */
230 if ((namelen >= sizeof (sa_family_t)) &&
231 (name->sa_family == AF_UNSPEC)) {
232 name = NULL;
233 namelen = 0;
234 }
235
236 error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
237
238 if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) {
239 /*
240 * X/Open specification contains a requirement that
241 * ENETUNREACH be returned but does not require
242 * EHOSTUNREACH. In order to keep the test suite
243 * happy we mess with the errno here.
244 */
245 error = ENETUNREACH;
246 }
247
248 return (error);
249 }
250
251 /*
252 * Get address of remote node.
253 */
254 int
socket_getpeername(struct sonode * so,struct sockaddr * addr,socklen_t * addrlen,boolean_t accept,cred_t * cr)255 socket_getpeername(struct sonode *so, struct sockaddr *addr,
256 socklen_t *addrlen, boolean_t accept, cred_t *cr)
257 {
258 ASSERT(*addrlen > 0);
259 return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
260
261 }
262
263 /*
264 * Get local address.
265 */
266 int
socket_getsockname(struct sonode * so,struct sockaddr * addr,socklen_t * addrlen,cred_t * cr)267 socket_getsockname(struct sonode *so, struct sockaddr *addr,
268 socklen_t *addrlen, cred_t *cr)
269 {
270 return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
271
272 }
273
274 /*
275 * Called from shutdown().
276 */
277 int
socket_shutdown(struct sonode * so,int how,cred_t * cr)278 socket_shutdown(struct sonode *so, int how, cred_t *cr)
279 {
280 return (SOP_SHUTDOWN(so, how, cr));
281 }
282
283 /*
284 * Get socket options.
285 */
286 /*ARGSUSED*/
287 int
socket_getsockopt(struct sonode * so,int level,int option_name,void * optval,socklen_t * optlenp,int flags,cred_t * cr)288 socket_getsockopt(struct sonode *so, int level, int option_name,
289 void *optval, socklen_t *optlenp, int flags, cred_t *cr)
290 {
291 return (SOP_GETSOCKOPT(so, level, option_name, optval,
292 optlenp, flags, cr));
293 }
294
295 /*
296 * Set socket options
297 */
298 int
socket_setsockopt(struct sonode * so,int level,int option_name,const void * optval,t_uscalar_t optlen,cred_t * cr)299 socket_setsockopt(struct sonode *so, int level, int option_name,
300 const void *optval, t_uscalar_t optlen, cred_t *cr)
301 {
302 int val = 1;
303 /* Caller allocates aligned optval, or passes null */
304 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
305 /* If optval is null optlen is 0, and vice-versa */
306 ASSERT(optval != NULL || optlen == 0);
307 ASSERT(optlen != 0 || optval == NULL);
308
309 if (optval == NULL && optlen == 0)
310 optval = &val;
311
312 return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
313 }
314
315 int
socket_sendmsg(struct sonode * so,struct nmsghdr * msg,struct uio * uiop,cred_t * cr)316 socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
317 cred_t *cr)
318 {
319 int error = 0;
320 ssize_t orig_resid = uiop->uio_resid;
321
322 /*
323 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
324 */
325 if (so->so_family == AF_UNIX)
326 uiop->uio_extflg |= UIO_COPY_CACHED;
327 else
328 uiop->uio_extflg &= ~UIO_COPY_CACHED;
329
330 error = SOP_SENDMSG(so, msg, uiop, cr);
331 switch (error) {
332 default:
333 break;
334 case EINTR:
335 case ENOMEM:
336 /* EAGAIN is EWOULDBLOCK */
337 case EWOULDBLOCK:
338 /* We did a partial send */
339 if (uiop->uio_resid != orig_resid)
340 error = 0;
341 break;
342 case EPIPE:
343 if (((so->so_mode & SM_KERNEL) == 0) &&
344 ((msg->msg_flags & MSG_NOSIGNAL) == 0)) {
345 tsignal(curthread, SIGPIPE);
346 }
347 break;
348 }
349
350 return (error);
351 }
352
353 int
socket_sendmblk(struct sonode * so,struct nmsghdr * msg,int fflag,struct cred * cr,mblk_t ** mpp)354 socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
355 struct cred *cr, mblk_t **mpp)
356 {
357 int error = 0;
358
359 error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
360 if (error == EPIPE) {
361 tsignal(curthread, SIGPIPE);
362 }
363 return (error);
364 }
365
366 int
socket_recvmsg(struct sonode * so,struct nmsghdr * msg,struct uio * uiop,cred_t * cr)367 socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
368 cred_t *cr)
369 {
370 int error;
371 ssize_t orig_resid = uiop->uio_resid;
372
373 /*
374 * Do not bypass the cache when reading data, as the application
375 * is likely to access the data shortly.
376 */
377 uiop->uio_extflg |= UIO_COPY_CACHED;
378
379 error = SOP_RECVMSG(so, msg, uiop, cr);
380
381 switch (error) {
382 case EINTR:
383 /* EAGAIN is EWOULDBLOCK */
384 case EWOULDBLOCK:
385 /* We did a partial read */
386 if (uiop->uio_resid != orig_resid)
387 error = 0;
388 break;
389 default:
390 break;
391 }
392 return (error);
393 }
394
395 int
socket_ioctl(struct sonode * so,int cmd,intptr_t arg,int mode,struct cred * cr,int32_t * rvalp)396 socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
397 struct cred *cr, int32_t *rvalp)
398 {
399 return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
400 }
401
402 int
socket_poll(struct sonode * so,short events,int anyyet,short * reventsp,struct pollhead ** phpp)403 socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
404 struct pollhead **phpp)
405 {
406 return (SOP_POLL(so, events, anyyet, reventsp, phpp));
407 }
408
409 int
socket_close(struct sonode * so,int flag,struct cred * cr)410 socket_close(struct sonode *so, int flag, struct cred *cr)
411 {
412 return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL));
413 }
414
415 int
socket_close_internal(struct sonode * so,int flag,cred_t * cr)416 socket_close_internal(struct sonode *so, int flag, cred_t *cr)
417 {
418 ASSERT(so->so_count == 0);
419
420 return (SOP_CLOSE(so, flag, cr));
421 }
422
423 void
socket_destroy(struct sonode * so)424 socket_destroy(struct sonode *so)
425 {
426 vn_invalid(SOTOV(so));
427 VN_RELE(SOTOV(so));
428 }
429
430 /* ARGSUSED */
431 void
socket_destroy_internal(struct sonode * so,cred_t * cr)432 socket_destroy_internal(struct sonode *so, cred_t *cr)
433 {
434 struct sockparams *sp = so->so_sockparams;
435 ASSERT(so->so_count == 0 && sp != NULL);
436
437 sp->sp_smod_info->smod_sock_destroy_func(so);
438
439 SOCKPARAMS_DEC_REF(sp);
440 }
441
442 /*
443 * TODO Once the common vnode ops is available, then the vnops argument
444 * should be removed.
445 */
446 /*ARGSUSED*/
447 int
sonode_constructor(void * buf,void * cdrarg,int kmflags)448 sonode_constructor(void *buf, void *cdrarg, int kmflags)
449 {
450 struct sonode *so = buf;
451 struct vnode *vp;
452
453 vp = so->so_vnode = vn_alloc(kmflags);
454 if (vp == NULL) {
455 return (-1);
456 }
457 vp->v_data = so;
458 vn_setops(vp, socket_vnodeops);
459
460 so->so_priv = NULL;
461 so->so_oobmsg = NULL;
462
463 so->so_proto_handle = NULL;
464
465 so->so_peercred = NULL;
466
467 so->so_rcv_queued = 0;
468 so->so_rcv_q_head = NULL;
469 so->so_rcv_q_last_head = NULL;
470 so->so_rcv_head = NULL;
471 so->so_rcv_last_head = NULL;
472 so->so_rcv_wanted = 0;
473 so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER;
474 so->so_rcv_timer_tid = 0;
475 so->so_rcv_thresh = 0;
476
477 list_create(&so->so_acceptq_list, sizeof (struct sonode),
478 offsetof(struct sonode, so_acceptq_node));
479 list_create(&so->so_acceptq_defer, sizeof (struct sonode),
480 offsetof(struct sonode, so_acceptq_node));
481 list_link_init(&so->so_acceptq_node);
482 so->so_acceptq_len = 0;
483 so->so_backlog = 0;
484 so->so_listener = NULL;
485
486 so->so_snd_qfull = B_FALSE;
487
488 so->so_filter_active = 0;
489 so->so_filter_tx = 0;
490 so->so_filter_defertime = 0;
491 so->so_filter_top = NULL;
492 so->so_filter_bottom = NULL;
493
494 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
495 mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL);
496 rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL);
497 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
498 cv_init(&so->so_single_cv, NULL, CV_DEFAULT, NULL);
499 cv_init(&so->so_read_cv, NULL, CV_DEFAULT, NULL);
500
501 cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL);
502 cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL);
503 cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL);
504 cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
505 cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
506
507 return (0);
508 }
509
510 /*ARGSUSED*/
511 void
sonode_destructor(void * buf,void * cdrarg)512 sonode_destructor(void *buf, void *cdrarg)
513 {
514 struct sonode *so = buf;
515 struct vnode *vp = SOTOV(so);
516
517 ASSERT(so->so_priv == NULL);
518 ASSERT(so->so_peercred == NULL);
519
520 ASSERT(so->so_oobmsg == NULL);
521
522 ASSERT(so->so_rcv_q_head == NULL);
523
524 list_destroy(&so->so_acceptq_list);
525 list_destroy(&so->so_acceptq_defer);
526 ASSERT(!list_link_active(&so->so_acceptq_node));
527 ASSERT(so->so_listener == NULL);
528
529 ASSERT(so->so_filter_active == 0);
530 ASSERT(so->so_filter_tx == 0);
531 ASSERT(so->so_filter_top == NULL);
532 ASSERT(so->so_filter_bottom == NULL);
533
534 ASSERT(vp->v_data == so);
535 ASSERT(vn_matchops(vp, socket_vnodeops));
536
537 vn_free(vp);
538
539 mutex_destroy(&so->so_lock);
540 mutex_destroy(&so->so_acceptq_lock);
541 rw_destroy(&so->so_fallback_rwlock);
542
543 cv_destroy(&so->so_state_cv);
544 cv_destroy(&so->so_single_cv);
545 cv_destroy(&so->so_read_cv);
546 cv_destroy(&so->so_acceptq_cv);
547 cv_destroy(&so->so_snd_cv);
548 cv_destroy(&so->so_rcv_cv);
549 cv_destroy(&so->so_closing_cv);
550 }
551
552 void
sonode_init(struct sonode * so,struct sockparams * sp,int family,int type,int protocol,sonodeops_t * sops)553 sonode_init(struct sonode *so, struct sockparams *sp, int family,
554 int type, int protocol, sonodeops_t *sops)
555 {
556 vnode_t *vp;
557
558 vp = SOTOV(so);
559
560 so->so_flag = 0;
561
562 so->so_state = 0;
563 so->so_mode = 0;
564
565 so->so_count = 0;
566
567 so->so_family = family;
568 so->so_type = type;
569 so->so_protocol = protocol;
570
571 SOCK_CONNID_INIT(so->so_proto_connid);
572
573 so->so_options = 0;
574 so->so_linger.l_onoff = 0;
575 so->so_linger.l_linger = 0;
576 so->so_sndbuf = 0;
577 so->so_error = 0;
578 so->so_rcvtimeo = 0;
579 so->so_sndtimeo = 0;
580 so->so_xpg_rcvbuf = 0;
581
582 ASSERT(so->so_oobmsg == NULL);
583 so->so_oobmark = 0;
584 so->so_pgrp = 0;
585
586 ASSERT(so->so_peercred == NULL);
587
588 so->so_zoneid = getzoneid();
589
590 so->so_sockparams = sp;
591
592 so->so_ops = sops;
593
594 so->so_not_str = (sops != &sotpi_sonodeops);
595
596 so->so_proto_handle = NULL;
597
598 so->so_downcalls = NULL;
599
600 so->so_copyflag = 0;
601
602 vn_reinit(vp);
603 vp->v_vfsp = rootvfs;
604 vp->v_type = VSOCK;
605 vp->v_rdev = sockdev;
606
607 so->so_snd_qfull = B_FALSE;
608 so->so_minpsz = 0;
609
610 so->so_rcv_wakeup = B_FALSE;
611 so->so_snd_wakeup = B_FALSE;
612 so->so_flowctrld = B_FALSE;
613
614 so->so_pollev = 0;
615 bzero(&so->so_poll_list, sizeof (so->so_poll_list));
616 bzero(&so->so_proto_props, sizeof (struct sock_proto_props));
617
618 bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
619 so->so_ksock_cb_arg = NULL;
620
621 so->so_max_addr_len = sizeof (struct sockaddr_storage);
622
623 so->so_direct = NULL;
624
625 vn_exists(vp);
626 }
627
628 void
sonode_fini(struct sonode * so)629 sonode_fini(struct sonode *so)
630 {
631 vnode_t *vp;
632
633 ASSERT(so->so_count == 0);
634
635 if (so->so_rcv_timer_tid) {
636 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
637 (void) untimeout(so->so_rcv_timer_tid);
638 so->so_rcv_timer_tid = 0;
639 }
640
641 if (so->so_poll_list.ph_list != NULL) {
642 pollwakeup(&so->so_poll_list, POLLERR);
643 pollhead_clean(&so->so_poll_list);
644 }
645
646 if (so->so_direct != NULL)
647 sod_sock_fini(so);
648
649 vp = SOTOV(so);
650 vn_invalid(vp);
651
652 if (so->so_peercred != NULL) {
653 crfree(so->so_peercred);
654 so->so_peercred = NULL;
655 }
656 /* Detach and destroy filters */
657 if (so->so_filter_top != NULL)
658 sof_sonode_cleanup(so);
659
660 ASSERT(list_is_empty(&so->so_acceptq_list));
661 ASSERT(list_is_empty(&so->so_acceptq_defer));
662 ASSERT(!list_link_active(&so->so_acceptq_node));
663
664 ASSERT(so->so_rcv_queued == 0);
665 ASSERT(so->so_rcv_q_head == NULL);
666 ASSERT(so->so_rcv_q_last_head == NULL);
667 ASSERT(so->so_rcv_head == NULL);
668 ASSERT(so->so_rcv_last_head == NULL);
669 }
670