10f1702c5SYu Xiangning /* 20f1702c5SYu Xiangning * CDDL HEADER START 30f1702c5SYu Xiangning * 40f1702c5SYu Xiangning * The contents of this file are subject to the terms of the 50f1702c5SYu Xiangning * Common Development and Distribution License (the "License"). 60f1702c5SYu Xiangning * You may not use this file except in compliance with the License. 70f1702c5SYu Xiangning * 80f1702c5SYu Xiangning * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90f1702c5SYu Xiangning * or http://www.opensolaris.org/os/licensing. 100f1702c5SYu Xiangning * See the License for the specific language governing permissions 110f1702c5SYu Xiangning * and limitations under the License. 120f1702c5SYu Xiangning * 130f1702c5SYu Xiangning * When distributing Covered Code, include this CDDL HEADER in each 140f1702c5SYu Xiangning * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150f1702c5SYu Xiangning * If applicable, add the following below this CDDL HEADER, with the 160f1702c5SYu Xiangning * fields enclosed by brackets "[]" replaced with your own identifying 170f1702c5SYu Xiangning * information: Portions Copyright [yyyy] [name of copyright owner] 180f1702c5SYu Xiangning * 190f1702c5SYu Xiangning * CDDL HEADER END 200f1702c5SYu Xiangning */ 210f1702c5SYu Xiangning 220f1702c5SYu Xiangning /* 233e95bd4aSAnders Persson * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 240f1702c5SYu Xiangning */ 25d690b62cSDan McDonald /* 26d690b62cSDan McDonald * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. 27d690b62cSDan McDonald */ 280f1702c5SYu Xiangning 290f1702c5SYu Xiangning #include <sys/types.h> 300f1702c5SYu Xiangning #include <sys/param.h> 310f1702c5SYu Xiangning #include <sys/signal.h> 320f1702c5SYu Xiangning #include <sys/cmn_err.h> 330f1702c5SYu Xiangning 340f1702c5SYu Xiangning #include <sys/stropts.h> 350f1702c5SYu Xiangning #include <sys/socket.h> 360f1702c5SYu Xiangning #include <sys/socketvar.h> 370f1702c5SYu Xiangning #include <sys/sockio.h> 380f1702c5SYu Xiangning #include <sys/strsubr.h> 390f1702c5SYu Xiangning #include <sys/strsun.h> 400f1702c5SYu Xiangning #include <sys/atomic.h> 4141174437SAnders Persson #include <sys/tihdr.h> 420f1702c5SYu Xiangning 430f1702c5SYu Xiangning #include <fs/sockfs/sockcommon.h> 443e95bd4aSAnders Persson #include <fs/sockfs/sockfilter_impl.h> 450f1702c5SYu Xiangning #include <fs/sockfs/socktpi.h> 46bbc000e5SAnders Persson #include <fs/sockfs/sodirect.h> 470f1702c5SYu Xiangning #include <sys/ddi.h> 480f1702c5SYu Xiangning #include <inet/ip.h> 490f1702c5SYu Xiangning #include <sys/time.h> 500f1702c5SYu Xiangning #include <sys/cmn_err.h> 510f1702c5SYu Xiangning 520f1702c5SYu Xiangning #ifdef SOCK_TEST 530f1702c5SYu Xiangning extern int do_useracc; 540f1702c5SYu Xiangning extern clock_t sock_test_timelimit; 550f1702c5SYu Xiangning #endif /* SOCK_TEST */ 560f1702c5SYu Xiangning 570f1702c5SYu Xiangning #define MBLK_PULL_LEN 64 580f1702c5SYu Xiangning uint32_t so_mblk_pull_len = MBLK_PULL_LEN; 590f1702c5SYu Xiangning 600f1702c5SYu Xiangning #ifdef DEBUG 610f1702c5SYu Xiangning boolean_t so_debug_length = B_FALSE; 620f1702c5SYu Xiangning static boolean_t so_check_length(sonode_t *so); 630f1702c5SYu Xiangning #endif 640f1702c5SYu Xiangning 650f1702c5SYu Xiangning static int 660f1702c5SYu Xiangning so_acceptq_dequeue_locked(struct sonode *so, boolean_t dontblock, 670f1702c5SYu Xiangning struct sonode **nsop) 680f1702c5SYu Xiangning { 690f1702c5SYu Xiangning struct sonode *nso = NULL; 700f1702c5SYu Xiangning 710f1702c5SYu Xiangning *nsop = NULL; 720f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_acceptq_lock)); 733e95bd4aSAnders Persson while ((nso = list_remove_head(&so->so_acceptq_list)) == NULL) { 740f1702c5SYu Xiangning /* 750f1702c5SYu Xiangning * No need to check so_error here, because it is not 760f1702c5SYu Xiangning * possible for a listening socket to be reset or otherwise 770f1702c5SYu Xiangning * disconnected. 780f1702c5SYu Xiangning * 790f1702c5SYu Xiangning * So now we just need check if it's ok to wait. 800f1702c5SYu Xiangning */ 810f1702c5SYu Xiangning if (dontblock) 820f1702c5SYu Xiangning return (EWOULDBLOCK); 830f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 840f1702c5SYu Xiangning return (EINTR); 850f1702c5SYu Xiangning 860f1702c5SYu Xiangning if (cv_wait_sig_swap(&so->so_acceptq_cv, 870f1702c5SYu Xiangning &so->so_acceptq_lock) == 0) 880f1702c5SYu Xiangning return (EINTR); 890f1702c5SYu Xiangning } 900f1702c5SYu Xiangning 910f1702c5SYu Xiangning ASSERT(nso != NULL); 920f1702c5SYu Xiangning ASSERT(so->so_acceptq_len > 0); 933e95bd4aSAnders Persson so->so_acceptq_len--; 943e95bd4aSAnders Persson nso->so_listener = NULL; 950f1702c5SYu Xiangning 960f1702c5SYu Xiangning *nsop = nso; 970f1702c5SYu Xiangning 980f1702c5SYu Xiangning return (0); 990f1702c5SYu Xiangning } 1000f1702c5SYu Xiangning 1010f1702c5SYu Xiangning /* 1020f1702c5SYu Xiangning * int so_acceptq_dequeue(struct sonode *, boolean_t, struct sonode **) 1030f1702c5SYu Xiangning * 1040f1702c5SYu Xiangning * Pulls a connection off of the accept queue. 1050f1702c5SYu Xiangning * 1060f1702c5SYu Xiangning * Arguments: 1070f1702c5SYu Xiangning * so - listening socket 1080f1702c5SYu Xiangning * dontblock - indicate whether it's ok to sleep if there are no 1090f1702c5SYu Xiangning * connections on the queue 1100f1702c5SYu Xiangning * nsop - Value-return argument 1110f1702c5SYu Xiangning * 1120f1702c5SYu Xiangning * Return values: 1130f1702c5SYu Xiangning * 0 when a connection is successfully dequeued, in which case nsop 1140f1702c5SYu Xiangning * is set to point to the new connection. Upon failure a non-zero 1150f1702c5SYu Xiangning * value is returned, and the value of nsop is set to NULL. 1160f1702c5SYu Xiangning * 1170f1702c5SYu Xiangning * Note: 1180f1702c5SYu Xiangning * so_acceptq_dequeue() may return prematurly if the socket is falling 1190f1702c5SYu Xiangning * back to TPI. 1200f1702c5SYu Xiangning */ 1210f1702c5SYu Xiangning int 1220f1702c5SYu Xiangning so_acceptq_dequeue(struct sonode *so, boolean_t dontblock, 1230f1702c5SYu Xiangning struct sonode **nsop) 1240f1702c5SYu Xiangning { 1250f1702c5SYu Xiangning int error; 1260f1702c5SYu Xiangning 1270f1702c5SYu Xiangning mutex_enter(&so->so_acceptq_lock); 1280f1702c5SYu Xiangning error = so_acceptq_dequeue_locked(so, dontblock, nsop); 1290f1702c5SYu Xiangning mutex_exit(&so->so_acceptq_lock); 1300f1702c5SYu Xiangning 1310f1702c5SYu Xiangning return (error); 1320f1702c5SYu Xiangning } 1330f1702c5SYu Xiangning 1343e95bd4aSAnders Persson static void 1353e95bd4aSAnders Persson so_acceptq_flush_impl(struct sonode *so, list_t *list, boolean_t doclose) 1363e95bd4aSAnders Persson { 1373e95bd4aSAnders Persson struct sonode *nso; 1383e95bd4aSAnders Persson 1393e95bd4aSAnders Persson while ((nso = list_remove_head(list)) != NULL) { 1403e95bd4aSAnders Persson nso->so_listener = NULL; 1413e95bd4aSAnders Persson if (doclose) { 1423e95bd4aSAnders Persson (void) socket_close(nso, 0, CRED()); 1433e95bd4aSAnders Persson } else { 1440f1702c5SYu Xiangning /* 1453e95bd4aSAnders Persson * Only used for fallback - not possible when filters 1463e95bd4aSAnders Persson * are present. 1473e95bd4aSAnders Persson */ 1483e95bd4aSAnders Persson ASSERT(so->so_filter_active == 0); 1493e95bd4aSAnders Persson /* 1503e95bd4aSAnders Persson * Since the socket is on the accept queue, there can 1513e95bd4aSAnders Persson * only be one reference. We drop the reference and 1523e95bd4aSAnders Persson * just blow off the socket. 1533e95bd4aSAnders Persson */ 1543e95bd4aSAnders Persson ASSERT(nso->so_count == 1); 1553e95bd4aSAnders Persson nso->so_count--; 1563e95bd4aSAnders Persson /* drop the proto ref */ 1573e95bd4aSAnders Persson VN_RELE(SOTOV(nso)); 1583e95bd4aSAnders Persson } 1593e95bd4aSAnders Persson socket_destroy(nso); 1603e95bd4aSAnders Persson } 1613e95bd4aSAnders Persson } 1623e95bd4aSAnders Persson /* 1633e95bd4aSAnders Persson * void so_acceptq_flush(struct sonode *so) 1640f1702c5SYu Xiangning * 1650f1702c5SYu Xiangning * Removes all pending connections from a listening socket, and 1660f1702c5SYu Xiangning * frees the associated resources. 1670f1702c5SYu Xiangning * 1680f1702c5SYu Xiangning * Arguments 1690f1702c5SYu Xiangning * so - listening socket 1702320a8c1SAnders Persson * doclose - make a close downcall for each socket on the accept queue 1710f1702c5SYu Xiangning * 1720f1702c5SYu Xiangning * Return values: 1730f1702c5SYu Xiangning * None. 1740f1702c5SYu Xiangning * 1750f1702c5SYu Xiangning * Note: 1760f1702c5SYu Xiangning * The caller has to ensure that no calls to so_acceptq_enqueue() or 1770f1702c5SYu Xiangning * so_acceptq_dequeue() occur while the accept queue is being flushed. 1780f1702c5SYu Xiangning * So either the socket needs to be in a state where no operations 1790f1702c5SYu Xiangning * would come in, or so_lock needs to be obtained. 1800f1702c5SYu Xiangning */ 1810f1702c5SYu Xiangning void 1822320a8c1SAnders Persson so_acceptq_flush(struct sonode *so, boolean_t doclose) 1830f1702c5SYu Xiangning { 1843e95bd4aSAnders Persson so_acceptq_flush_impl(so, &so->so_acceptq_list, doclose); 1853e95bd4aSAnders Persson so_acceptq_flush_impl(so, &so->so_acceptq_defer, doclose); 1860f1702c5SYu Xiangning 1870f1702c5SYu Xiangning so->so_acceptq_len = 0; 1880f1702c5SYu Xiangning } 1890f1702c5SYu Xiangning 1900f1702c5SYu Xiangning int 1910f1702c5SYu Xiangning so_wait_connected_locked(struct sonode *so, boolean_t nonblock, 1920f1702c5SYu Xiangning sock_connid_t id) 1930f1702c5SYu Xiangning { 1940f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 1950f1702c5SYu Xiangning 1960f1702c5SYu Xiangning /* 1970f1702c5SYu Xiangning * The protocol has notified us that a connection attempt is being 1980f1702c5SYu Xiangning * made, so before we wait for a notification to arrive we must 1990f1702c5SYu Xiangning * clear out any errors associated with earlier connection attempts. 2000f1702c5SYu Xiangning */ 2010f1702c5SYu Xiangning if (so->so_error != 0 && SOCK_CONNID_LT(so->so_proto_connid, id)) 2020f1702c5SYu Xiangning so->so_error = 0; 2030f1702c5SYu Xiangning 2040f1702c5SYu Xiangning while (SOCK_CONNID_LT(so->so_proto_connid, id)) { 2050f1702c5SYu Xiangning if (nonblock) 2060f1702c5SYu Xiangning return (EINPROGRESS); 2070f1702c5SYu Xiangning 2080f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 2090f1702c5SYu Xiangning return (EINTR); 2100f1702c5SYu Xiangning 2110f1702c5SYu Xiangning if (cv_wait_sig_swap(&so->so_state_cv, &so->so_lock) == 0) 2120f1702c5SYu Xiangning return (EINTR); 2130f1702c5SYu Xiangning } 2140f1702c5SYu Xiangning 2150f1702c5SYu Xiangning if (so->so_error != 0) 2160f1702c5SYu Xiangning return (sogeterr(so, B_TRUE)); 2170f1702c5SYu Xiangning /* 2180f1702c5SYu Xiangning * Under normal circumstances, so_error should contain an error 2190f1702c5SYu Xiangning * in case the connect failed. However, it is possible for another 2200f1702c5SYu Xiangning * thread to come in a consume the error, so generate a sensible 2210f1702c5SYu Xiangning * error in that case. 2220f1702c5SYu Xiangning */ 2230f1702c5SYu Xiangning if ((so->so_state & SS_ISCONNECTED) == 0) 2240f1702c5SYu Xiangning return (ECONNREFUSED); 2250f1702c5SYu Xiangning 2260f1702c5SYu Xiangning return (0); 2270f1702c5SYu Xiangning } 2280f1702c5SYu Xiangning 2290f1702c5SYu Xiangning /* 2300f1702c5SYu Xiangning * int so_wait_connected(struct sonode *so, boolean_t nonblock, 2310f1702c5SYu Xiangning * sock_connid_t id) 2320f1702c5SYu Xiangning * 2330f1702c5SYu Xiangning * Wait until the socket is connected or an error has occured. 2340f1702c5SYu Xiangning * 2350f1702c5SYu Xiangning * Arguments: 2360f1702c5SYu Xiangning * so - socket 2370f1702c5SYu Xiangning * nonblock - indicate whether it's ok to sleep if the connection has 2380f1702c5SYu Xiangning * not yet been established 2390f1702c5SYu Xiangning * gen - generation number that was returned by the protocol 2400f1702c5SYu Xiangning * when the operation was started 2410f1702c5SYu Xiangning * 2420f1702c5SYu Xiangning * Returns: 2430f1702c5SYu Xiangning * 0 if the connection attempt was successful, or an error indicating why 2440f1702c5SYu Xiangning * the connection attempt failed. 2450f1702c5SYu Xiangning */ 2460f1702c5SYu Xiangning int 2470f1702c5SYu Xiangning so_wait_connected(struct sonode *so, boolean_t nonblock, sock_connid_t id) 2480f1702c5SYu Xiangning { 2490f1702c5SYu Xiangning int error; 2500f1702c5SYu Xiangning 2510f1702c5SYu Xiangning mutex_enter(&so->so_lock); 2520f1702c5SYu Xiangning error = so_wait_connected_locked(so, nonblock, id); 2530f1702c5SYu Xiangning mutex_exit(&so->so_lock); 2540f1702c5SYu Xiangning 2550f1702c5SYu Xiangning return (error); 2560f1702c5SYu Xiangning } 2570f1702c5SYu Xiangning 2580f1702c5SYu Xiangning int 2590f1702c5SYu Xiangning so_snd_wait_qnotfull_locked(struct sonode *so, boolean_t dontblock) 2600f1702c5SYu Xiangning { 2610f1702c5SYu Xiangning int error; 2620f1702c5SYu Xiangning 2630f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 2643e95bd4aSAnders Persson while (SO_SND_FLOWCTRLD(so)) { 2650f1702c5SYu Xiangning if (so->so_state & SS_CANTSENDMORE) 2660f1702c5SYu Xiangning return (EPIPE); 2670f1702c5SYu Xiangning if (dontblock) 2680f1702c5SYu Xiangning return (EWOULDBLOCK); 2690f1702c5SYu Xiangning 2700f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 2710f1702c5SYu Xiangning return (EINTR); 2720f1702c5SYu Xiangning 2730f1702c5SYu Xiangning if (so->so_sndtimeo == 0) { 2740f1702c5SYu Xiangning /* 2750f1702c5SYu Xiangning * Zero means disable timeout. 2760f1702c5SYu Xiangning */ 2770f1702c5SYu Xiangning error = cv_wait_sig(&so->so_snd_cv, &so->so_lock); 2780f1702c5SYu Xiangning } else { 279d3d50737SRafael Vanoni error = cv_reltimedwait_sig(&so->so_snd_cv, 280d3d50737SRafael Vanoni &so->so_lock, so->so_sndtimeo, TR_CLOCK_TICK); 2810f1702c5SYu Xiangning } 2820f1702c5SYu Xiangning if (error == 0) 2830f1702c5SYu Xiangning return (EINTR); 2840f1702c5SYu Xiangning else if (error == -1) 28534dfe683Sshenjian return (EAGAIN); 2860f1702c5SYu Xiangning } 2870f1702c5SYu Xiangning return (0); 2880f1702c5SYu Xiangning } 2890f1702c5SYu Xiangning 2900f1702c5SYu Xiangning /* 2910f1702c5SYu Xiangning * int so_wait_sendbuf(struct sonode *so, boolean_t dontblock) 2920f1702c5SYu Xiangning * 2930f1702c5SYu Xiangning * Wait for the transport to notify us about send buffers becoming 2940f1702c5SYu Xiangning * available. 2950f1702c5SYu Xiangning */ 2960f1702c5SYu Xiangning int 2970f1702c5SYu Xiangning so_snd_wait_qnotfull(struct sonode *so, boolean_t dontblock) 2980f1702c5SYu Xiangning { 2990f1702c5SYu Xiangning int error = 0; 3000f1702c5SYu Xiangning 3010f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3020f1702c5SYu Xiangning so->so_snd_wakeup = B_TRUE; 3030f1702c5SYu Xiangning error = so_snd_wait_qnotfull_locked(so, dontblock); 3040f1702c5SYu Xiangning so->so_snd_wakeup = B_FALSE; 3050f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3060f1702c5SYu Xiangning 3070f1702c5SYu Xiangning return (error); 3080f1702c5SYu Xiangning } 3090f1702c5SYu Xiangning 3100f1702c5SYu Xiangning void 3110f1702c5SYu Xiangning so_snd_qfull(struct sonode *so) 3120f1702c5SYu Xiangning { 3130f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3140f1702c5SYu Xiangning so->so_snd_qfull = B_TRUE; 3150f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3160f1702c5SYu Xiangning } 3170f1702c5SYu Xiangning 3180f1702c5SYu Xiangning void 3190f1702c5SYu Xiangning so_snd_qnotfull(struct sonode *so) 3200f1702c5SYu Xiangning { 3210f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3220f1702c5SYu Xiangning so->so_snd_qfull = B_FALSE; 3230f1702c5SYu Xiangning /* wake up everyone waiting for buffers */ 3240f1702c5SYu Xiangning cv_broadcast(&so->so_snd_cv); 3250f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3260f1702c5SYu Xiangning } 3270f1702c5SYu Xiangning 3280f1702c5SYu Xiangning /* 3290f1702c5SYu Xiangning * Change the process/process group to which SIGIO is sent. 3300f1702c5SYu Xiangning */ 3310f1702c5SYu Xiangning int 3320f1702c5SYu Xiangning socket_chgpgrp(struct sonode *so, pid_t pid) 3330f1702c5SYu Xiangning { 3340f1702c5SYu Xiangning int error; 3350f1702c5SYu Xiangning 3360f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 3370f1702c5SYu Xiangning if (pid != 0) { 3380f1702c5SYu Xiangning /* 3390f1702c5SYu Xiangning * Permissions check by sending signal 0. 3400f1702c5SYu Xiangning * Note that when kill fails it does a 3410f1702c5SYu Xiangning * set_errno causing the system call to fail. 3420f1702c5SYu Xiangning */ 3430f1702c5SYu Xiangning error = kill(pid, 0); 3440f1702c5SYu Xiangning if (error != 0) { 3450f1702c5SYu Xiangning return (error); 3460f1702c5SYu Xiangning } 3470f1702c5SYu Xiangning } 3480f1702c5SYu Xiangning so->so_pgrp = pid; 3490f1702c5SYu Xiangning return (0); 3500f1702c5SYu Xiangning } 3510f1702c5SYu Xiangning 3520f1702c5SYu Xiangning 3530f1702c5SYu Xiangning /* 3540f1702c5SYu Xiangning * Generate a SIGIO, for 'writable' events include siginfo structure, 3550f1702c5SYu Xiangning * for read events just send the signal. 3560f1702c5SYu Xiangning */ 3570f1702c5SYu Xiangning /*ARGSUSED*/ 3580f1702c5SYu Xiangning static void 3590f1702c5SYu Xiangning socket_sigproc(proc_t *proc, int event) 3600f1702c5SYu Xiangning { 3610f1702c5SYu Xiangning k_siginfo_t info; 3620f1702c5SYu Xiangning 3630f1702c5SYu Xiangning ASSERT(event & (SOCKETSIG_WRITE | SOCKETSIG_READ | SOCKETSIG_URG)); 3640f1702c5SYu Xiangning 3650f1702c5SYu Xiangning if (event & SOCKETSIG_WRITE) { 3660f1702c5SYu Xiangning info.si_signo = SIGPOLL; 3670f1702c5SYu Xiangning info.si_code = POLL_OUT; 3680f1702c5SYu Xiangning info.si_errno = 0; 3690f1702c5SYu Xiangning info.si_fd = 0; 3700f1702c5SYu Xiangning info.si_band = 0; 3710f1702c5SYu Xiangning sigaddq(proc, NULL, &info, KM_NOSLEEP); 3720f1702c5SYu Xiangning } 3730f1702c5SYu Xiangning if (event & SOCKETSIG_READ) { 3740f1702c5SYu Xiangning sigtoproc(proc, NULL, SIGPOLL); 3750f1702c5SYu Xiangning } 3760f1702c5SYu Xiangning if (event & SOCKETSIG_URG) { 3770f1702c5SYu Xiangning sigtoproc(proc, NULL, SIGURG); 3780f1702c5SYu Xiangning } 3790f1702c5SYu Xiangning } 3800f1702c5SYu Xiangning 3810f1702c5SYu Xiangning void 3820f1702c5SYu Xiangning socket_sendsig(struct sonode *so, int event) 3830f1702c5SYu Xiangning { 3840f1702c5SYu Xiangning proc_t *proc; 3850f1702c5SYu Xiangning 3860f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 3870f1702c5SYu Xiangning 3880f1702c5SYu Xiangning if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) && 3890f1702c5SYu Xiangning event != SOCKETSIG_URG)) { 3900f1702c5SYu Xiangning return; 3910f1702c5SYu Xiangning } 3920f1702c5SYu Xiangning 3930f1702c5SYu Xiangning dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp)); 3940f1702c5SYu Xiangning 3950f1702c5SYu Xiangning if (so->so_pgrp > 0) { 3960f1702c5SYu Xiangning /* 3970f1702c5SYu Xiangning * XXX This unfortunately still generates 3980f1702c5SYu Xiangning * a signal when a fd is closed but 3990f1702c5SYu Xiangning * the proc is active. 4000f1702c5SYu Xiangning */ 4010f1702c5SYu Xiangning mutex_enter(&pidlock); 402d690b62cSDan McDonald /* 403d690b62cSDan McDonald * Even if the thread started in another zone, we're receiving 404d690b62cSDan McDonald * on behalf of this socket's zone, so find the proc using the 405d690b62cSDan McDonald * socket's zone ID. 406d690b62cSDan McDonald */ 407d690b62cSDan McDonald proc = prfind_zone(so->so_pgrp, so->so_zoneid); 4080f1702c5SYu Xiangning if (proc == NULL) { 4090f1702c5SYu Xiangning mutex_exit(&pidlock); 4100f1702c5SYu Xiangning return; 4110f1702c5SYu Xiangning } 4120f1702c5SYu Xiangning mutex_enter(&proc->p_lock); 4130f1702c5SYu Xiangning mutex_exit(&pidlock); 4140f1702c5SYu Xiangning socket_sigproc(proc, event); 4150f1702c5SYu Xiangning mutex_exit(&proc->p_lock); 4160f1702c5SYu Xiangning } else { 4170f1702c5SYu Xiangning /* 4180f1702c5SYu Xiangning * Send to process group. Hold pidlock across 4190f1702c5SYu Xiangning * calls to socket_sigproc(). 4200f1702c5SYu Xiangning */ 4210f1702c5SYu Xiangning pid_t pgrp = -so->so_pgrp; 4220f1702c5SYu Xiangning 4230f1702c5SYu Xiangning mutex_enter(&pidlock); 424d690b62cSDan McDonald /* 425d690b62cSDan McDonald * Even if the thread started in another zone, we're receiving 426d690b62cSDan McDonald * on behalf of this socket's zone, so find the pgrp using the 427d690b62cSDan McDonald * socket's zone ID. 428d690b62cSDan McDonald */ 429d690b62cSDan McDonald proc = pgfind_zone(pgrp, so->so_zoneid); 4300f1702c5SYu Xiangning while (proc != NULL) { 4310f1702c5SYu Xiangning mutex_enter(&proc->p_lock); 4320f1702c5SYu Xiangning socket_sigproc(proc, event); 4330f1702c5SYu Xiangning mutex_exit(&proc->p_lock); 4340f1702c5SYu Xiangning proc = proc->p_pglink; 4350f1702c5SYu Xiangning } 4360f1702c5SYu Xiangning mutex_exit(&pidlock); 4370f1702c5SYu Xiangning } 4380f1702c5SYu Xiangning } 4390f1702c5SYu Xiangning 4400f1702c5SYu Xiangning #define MIN(a, b) ((a) < (b) ? (a) : (b)) 4410f1702c5SYu Xiangning /* Copy userdata into a new mblk_t */ 4420f1702c5SYu Xiangning mblk_t * 4430f1702c5SYu Xiangning socopyinuio(uio_t *uiop, ssize_t iosize, size_t wroff, ssize_t maxblk, 444bd670b35SErik Nordmark size_t tail_len, int *errorp) 4450f1702c5SYu Xiangning { 4460f1702c5SYu Xiangning mblk_t *head = NULL, **tail = &head; 4470f1702c5SYu Xiangning 4480f1702c5SYu Xiangning ASSERT(iosize == INFPSZ || iosize > 0); 4490f1702c5SYu Xiangning 4500f1702c5SYu Xiangning if (iosize == INFPSZ || iosize > uiop->uio_resid) 4510f1702c5SYu Xiangning iosize = uiop->uio_resid; 4520f1702c5SYu Xiangning 4530f1702c5SYu Xiangning if (maxblk == INFPSZ) 4540f1702c5SYu Xiangning maxblk = iosize; 4550f1702c5SYu Xiangning 4560f1702c5SYu Xiangning /* Nothing to do in these cases, so we're done */ 4570f1702c5SYu Xiangning if (iosize < 0 || maxblk < 0 || (maxblk == 0 && iosize > 0)) 4580f1702c5SYu Xiangning goto done; 4590f1702c5SYu Xiangning 4600f1702c5SYu Xiangning /* 4610f1702c5SYu Xiangning * We will enter the loop below if iosize is 0; it will allocate an 4620f1702c5SYu Xiangning * empty message block and call uiomove(9F) which will just return. 4630f1702c5SYu Xiangning * We could avoid that with an extra check but would only slow 4640f1702c5SYu Xiangning * down the much more likely case where iosize is larger than 0. 4650f1702c5SYu Xiangning */ 4660f1702c5SYu Xiangning do { 4670f1702c5SYu Xiangning ssize_t blocksize; 4680f1702c5SYu Xiangning mblk_t *mp; 4690f1702c5SYu Xiangning 4700f1702c5SYu Xiangning blocksize = MIN(iosize, maxblk); 4710f1702c5SYu Xiangning ASSERT(blocksize >= 0); 472de8c4a14SErik Nordmark mp = allocb(wroff + blocksize + tail_len, BPRI_MED); 473de8c4a14SErik Nordmark if (mp == NULL) { 4740f1702c5SYu Xiangning *errorp = ENOMEM; 4750f1702c5SYu Xiangning return (head); 4760f1702c5SYu Xiangning } 4770f1702c5SYu Xiangning mp->b_rptr += wroff; 4780f1702c5SYu Xiangning mp->b_wptr = mp->b_rptr + blocksize; 4790f1702c5SYu Xiangning 4800f1702c5SYu Xiangning *tail = mp; 4810f1702c5SYu Xiangning tail = &mp->b_cont; 4820f1702c5SYu Xiangning 4830f1702c5SYu Xiangning /* uiomove(9F) either returns 0 or EFAULT */ 4840f1702c5SYu Xiangning if ((*errorp = uiomove(mp->b_rptr, (size_t)blocksize, 4850f1702c5SYu Xiangning UIO_WRITE, uiop)) != 0) { 4860f1702c5SYu Xiangning ASSERT(*errorp != ENOMEM); 4870f1702c5SYu Xiangning freemsg(head); 4880f1702c5SYu Xiangning return (NULL); 4890f1702c5SYu Xiangning } 4900f1702c5SYu Xiangning 4910f1702c5SYu Xiangning iosize -= blocksize; 4920f1702c5SYu Xiangning } while (iosize > 0); 4930f1702c5SYu Xiangning 4940f1702c5SYu Xiangning done: 4950f1702c5SYu Xiangning *errorp = 0; 4960f1702c5SYu Xiangning return (head); 4970f1702c5SYu Xiangning } 4980f1702c5SYu Xiangning 4990f1702c5SYu Xiangning mblk_t * 5000f1702c5SYu Xiangning socopyoutuio(mblk_t *mp, struct uio *uiop, ssize_t max_read, int *errorp) 5010f1702c5SYu Xiangning { 5020f1702c5SYu Xiangning int error; 5030f1702c5SYu Xiangning ptrdiff_t n; 5040f1702c5SYu Xiangning mblk_t *nmp; 5050f1702c5SYu Xiangning 5060f1702c5SYu Xiangning ASSERT(mp->b_wptr >= mp->b_rptr); 5070f1702c5SYu Xiangning 5080f1702c5SYu Xiangning /* 5090f1702c5SYu Xiangning * max_read is the offset of the oobmark and read can not go pass 5100f1702c5SYu Xiangning * the oobmark. 5110f1702c5SYu Xiangning */ 5120f1702c5SYu Xiangning if (max_read == INFPSZ || max_read > uiop->uio_resid) 5130f1702c5SYu Xiangning max_read = uiop->uio_resid; 5140f1702c5SYu Xiangning 5150f1702c5SYu Xiangning do { 5160f1702c5SYu Xiangning if ((n = MIN(max_read, MBLKL(mp))) != 0) { 5170f1702c5SYu Xiangning ASSERT(n > 0); 5180f1702c5SYu Xiangning 5190f1702c5SYu Xiangning error = uiomove(mp->b_rptr, n, UIO_READ, uiop); 5200f1702c5SYu Xiangning if (error != 0) { 5210f1702c5SYu Xiangning freemsg(mp); 5220f1702c5SYu Xiangning *errorp = error; 5230f1702c5SYu Xiangning return (NULL); 5240f1702c5SYu Xiangning } 5250f1702c5SYu Xiangning } 5260f1702c5SYu Xiangning 5270f1702c5SYu Xiangning mp->b_rptr += n; 5280f1702c5SYu Xiangning max_read -= n; 5290f1702c5SYu Xiangning while (mp != NULL && (mp->b_rptr >= mp->b_wptr)) { 5300f1702c5SYu Xiangning /* 5310f1702c5SYu Xiangning * get rid of zero length mblks 5320f1702c5SYu Xiangning */ 5330f1702c5SYu Xiangning nmp = mp; 5340f1702c5SYu Xiangning mp = mp->b_cont; 5350f1702c5SYu Xiangning freeb(nmp); 5360f1702c5SYu Xiangning } 5370f1702c5SYu Xiangning } while (mp != NULL && max_read > 0); 5380f1702c5SYu Xiangning 5390f1702c5SYu Xiangning *errorp = 0; 5400f1702c5SYu Xiangning return (mp); 5410f1702c5SYu Xiangning } 5420f1702c5SYu Xiangning 5430f1702c5SYu Xiangning static void 5440f1702c5SYu Xiangning so_prepend_msg(struct sonode *so, mblk_t *mp, mblk_t *last_tail) 5450f1702c5SYu Xiangning { 5460f1702c5SYu Xiangning ASSERT(last_tail != NULL); 5470f1702c5SYu Xiangning mp->b_next = so->so_rcv_q_head; 5480f1702c5SYu Xiangning mp->b_prev = last_tail; 5490f1702c5SYu Xiangning ASSERT(!(DB_FLAGS(mp) & DBLK_UIOA)); 5500f1702c5SYu Xiangning 5510f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 5520f1702c5SYu Xiangning ASSERT(so->so_rcv_q_last_head == NULL); 5530f1702c5SYu Xiangning so->so_rcv_q_last_head = mp; 5540f1702c5SYu Xiangning #ifdef DEBUG 5550f1702c5SYu Xiangning } else { 5560f1702c5SYu Xiangning ASSERT(!(DB_FLAGS(so->so_rcv_q_head) & DBLK_UIOA)); 5570f1702c5SYu Xiangning #endif 5580f1702c5SYu Xiangning } 5590f1702c5SYu Xiangning so->so_rcv_q_head = mp; 5600f1702c5SYu Xiangning 5610f1702c5SYu Xiangning #ifdef DEBUG 5620f1702c5SYu Xiangning if (so_debug_length) { 5630f1702c5SYu Xiangning mutex_enter(&so->so_lock); 5640f1702c5SYu Xiangning ASSERT(so_check_length(so)); 5650f1702c5SYu Xiangning mutex_exit(&so->so_lock); 5660f1702c5SYu Xiangning } 5670f1702c5SYu Xiangning #endif 5680f1702c5SYu Xiangning } 5690f1702c5SYu Xiangning 570e4b767e8SAnders Persson /* 571e4b767e8SAnders Persson * Move a mblk chain (mp_head, mp_last_head) to the sonode's rcv queue so it 572e4b767e8SAnders Persson * can be processed by so_dequeue_msg(). 573e4b767e8SAnders Persson */ 574e4b767e8SAnders Persson void 575e4b767e8SAnders Persson so_process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head) 5760f1702c5SYu Xiangning { 5773e95bd4aSAnders Persson if (so->so_filter_active > 0 && 5783e95bd4aSAnders Persson (mp_head = sof_filter_data_in_proc(so, mp_head, 5793e95bd4aSAnders Persson &mp_last_head)) == NULL) 5803e95bd4aSAnders Persson return; 5813e95bd4aSAnders Persson 5820f1702c5SYu Xiangning ASSERT(mp_head->b_prev != NULL); 5830f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 5840f1702c5SYu Xiangning so->so_rcv_q_head = mp_head; 5850f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 5860f1702c5SYu Xiangning ASSERT(so->so_rcv_q_last_head->b_prev != NULL); 5870f1702c5SYu Xiangning } else { 5880f1702c5SYu Xiangning boolean_t flag_equal = ((DB_FLAGS(mp_head) & DBLK_UIOA) == 5890f1702c5SYu Xiangning (DB_FLAGS(so->so_rcv_q_last_head) & DBLK_UIOA)); 5900f1702c5SYu Xiangning 5910f1702c5SYu Xiangning if (mp_head->b_next == NULL && 5920f1702c5SYu Xiangning DB_TYPE(mp_head) == M_DATA && 5930f1702c5SYu Xiangning DB_TYPE(so->so_rcv_q_last_head) == M_DATA && flag_equal) { 5940f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 5950f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 5960f1702c5SYu Xiangning mp_head->b_prev = NULL; 5970f1702c5SYu Xiangning } else if (flag_equal && (DB_FLAGS(mp_head) & DBLK_UIOA)) { 5980f1702c5SYu Xiangning /* 5990f1702c5SYu Xiangning * Append to last_head if more than one mblks, and both 6000f1702c5SYu Xiangning * mp_head and last_head are I/OAT mblks. 6010f1702c5SYu Xiangning */ 6020f1702c5SYu Xiangning ASSERT(mp_head->b_next != NULL); 6030f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 6040f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 6050f1702c5SYu Xiangning mp_head->b_prev = NULL; 6060f1702c5SYu Xiangning 6070f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = mp_head->b_next; 6080f1702c5SYu Xiangning mp_head->b_next = NULL; 6090f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 6100f1702c5SYu Xiangning } else { 6110f1702c5SYu Xiangning #ifdef DEBUG 6120f1702c5SYu Xiangning { 6130f1702c5SYu Xiangning mblk_t *tmp_mblk; 6140f1702c5SYu Xiangning tmp_mblk = mp_head; 6150f1702c5SYu Xiangning while (tmp_mblk != NULL) { 6160f1702c5SYu Xiangning ASSERT(tmp_mblk->b_prev != NULL); 6170f1702c5SYu Xiangning tmp_mblk = tmp_mblk->b_next; 6180f1702c5SYu Xiangning } 6190f1702c5SYu Xiangning } 6200f1702c5SYu Xiangning #endif 6210f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = mp_head; 6220f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 6230f1702c5SYu Xiangning } 6240f1702c5SYu Xiangning } 6250f1702c5SYu Xiangning } 6260f1702c5SYu Xiangning 6275795faa4SRao Shoaib /* 6285795faa4SRao Shoaib * Check flow control on a given sonode. Must have so_lock held, and 629a215d4ebSKacheong Poon * this function will release the hold. Return true if flow control 630a215d4ebSKacheong Poon * is cleared. 6315795faa4SRao Shoaib */ 632a215d4ebSKacheong Poon boolean_t 6335795faa4SRao Shoaib so_check_flow_control(struct sonode *so) 6345795faa4SRao Shoaib { 6355795faa4SRao Shoaib ASSERT(MUTEX_HELD(&so->so_lock)); 6365795faa4SRao Shoaib 6373e95bd4aSAnders Persson if (so->so_flowctrld && (so->so_rcv_queued < so->so_rcvlowat && 6383e95bd4aSAnders Persson !(so->so_state & SS_FIL_RCV_FLOWCTRL))) { 6395795faa4SRao Shoaib so->so_flowctrld = B_FALSE; 6405795faa4SRao Shoaib mutex_exit(&so->so_lock); 6415795faa4SRao Shoaib /* 6425795faa4SRao Shoaib * Open up flow control. SCTP does not have any downcalls, and 6435795faa4SRao Shoaib * it will clr flow ctrl in sosctp_recvmsg(). 6445795faa4SRao Shoaib */ 6455795faa4SRao Shoaib if (so->so_downcalls != NULL && 6465795faa4SRao Shoaib so->so_downcalls->sd_clr_flowctrl != NULL) { 6475795faa4SRao Shoaib (*so->so_downcalls->sd_clr_flowctrl) 6485795faa4SRao Shoaib (so->so_proto_handle); 6495795faa4SRao Shoaib } 6503e95bd4aSAnders Persson /* filters can start injecting data */ 6513e95bd4aSAnders Persson sof_sonode_notify_filters(so, SOF_EV_INJECT_DATA_IN_OK, 0); 652a215d4ebSKacheong Poon return (B_TRUE); 6535795faa4SRao Shoaib } else { 6545795faa4SRao Shoaib mutex_exit(&so->so_lock); 655a215d4ebSKacheong Poon return (B_FALSE); 6565795faa4SRao Shoaib } 6575795faa4SRao Shoaib } 6585795faa4SRao Shoaib 6590f1702c5SYu Xiangning int 6600f1702c5SYu Xiangning so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop, 6610f1702c5SYu Xiangning rval_t *rvalp, int flags) 6620f1702c5SYu Xiangning { 6630f1702c5SYu Xiangning mblk_t *mp, *nmp; 6640f1702c5SYu Xiangning mblk_t *savemp, *savemptail; 6650f1702c5SYu Xiangning mblk_t *new_msg_head; 6660f1702c5SYu Xiangning mblk_t *new_msg_last_head; 6670f1702c5SYu Xiangning mblk_t *last_tail; 6680f1702c5SYu Xiangning boolean_t partial_read; 6690f1702c5SYu Xiangning boolean_t reset_atmark = B_FALSE; 6700f1702c5SYu Xiangning int more = 0; 6710f1702c5SYu Xiangning int error; 6720f1702c5SYu Xiangning ssize_t oobmark; 6730f1702c5SYu Xiangning sodirect_t *sodp = so->so_direct; 6740f1702c5SYu Xiangning 6750f1702c5SYu Xiangning partial_read = B_FALSE; 6760f1702c5SYu Xiangning *mctlp = NULL; 6770f1702c5SYu Xiangning again: 6780f1702c5SYu Xiangning mutex_enter(&so->so_lock); 6790f1702c5SYu Xiangning again1: 6800f1702c5SYu Xiangning #ifdef DEBUG 6810f1702c5SYu Xiangning if (so_debug_length) { 6820f1702c5SYu Xiangning ASSERT(so_check_length(so)); 6830f1702c5SYu Xiangning } 6840f1702c5SYu Xiangning #endif 6858591a19aSAnders Persson if (so->so_state & SS_RCVATMARK) { 6868591a19aSAnders Persson /* Check whether the caller is OK to read past the mark */ 6878591a19aSAnders Persson if (flags & MSG_NOMARK) { 6888591a19aSAnders Persson mutex_exit(&so->so_lock); 6898591a19aSAnders Persson return (EWOULDBLOCK); 6908591a19aSAnders Persson } 6918591a19aSAnders Persson reset_atmark = B_TRUE; 6928591a19aSAnders Persson } 6930f1702c5SYu Xiangning /* 6940f1702c5SYu Xiangning * First move messages from the dump area to processing area 6950f1702c5SYu Xiangning */ 6960f1702c5SYu Xiangning if (sodp != NULL) { 697bbc000e5SAnders Persson if (sodp->sod_enabled) { 6980f1702c5SYu Xiangning if (sodp->sod_uioa.uioa_state & UIOA_ALLOC) { 6990f1702c5SYu Xiangning /* nothing to uioamove */ 7000f1702c5SYu Xiangning sodp = NULL; 7010f1702c5SYu Xiangning } else if (sodp->sod_uioa.uioa_state & UIOA_INIT) { 7020f1702c5SYu Xiangning sodp->sod_uioa.uioa_state &= UIOA_CLR; 7030f1702c5SYu Xiangning sodp->sod_uioa.uioa_state |= UIOA_ENABLED; 7040f1702c5SYu Xiangning /* 7050f1702c5SYu Xiangning * try to uioamove() the data that 7060f1702c5SYu Xiangning * has already queued. 7070f1702c5SYu Xiangning */ 7080f1702c5SYu Xiangning sod_uioa_so_init(so, sodp, uiop); 7090f1702c5SYu Xiangning } 7100f1702c5SYu Xiangning } else { 7110f1702c5SYu Xiangning sodp = NULL; 7120f1702c5SYu Xiangning } 7130f1702c5SYu Xiangning } 7140f1702c5SYu Xiangning new_msg_head = so->so_rcv_head; 7150f1702c5SYu Xiangning new_msg_last_head = so->so_rcv_last_head; 7160f1702c5SYu Xiangning so->so_rcv_head = NULL; 7170f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 7180f1702c5SYu Xiangning oobmark = so->so_oobmark; 7190f1702c5SYu Xiangning /* 7200f1702c5SYu Xiangning * We can release the lock as there can only be one reader 7210f1702c5SYu Xiangning */ 7220f1702c5SYu Xiangning mutex_exit(&so->so_lock); 7230f1702c5SYu Xiangning 7240f1702c5SYu Xiangning if (new_msg_head != NULL) { 725e4b767e8SAnders Persson so_process_new_message(so, new_msg_head, new_msg_last_head); 7260f1702c5SYu Xiangning } 7270f1702c5SYu Xiangning savemp = savemptail = NULL; 728a215d4ebSKacheong Poon rvalp->r_vals = 0; 7290f1702c5SYu Xiangning error = 0; 7300f1702c5SYu Xiangning mp = so->so_rcv_q_head; 7310f1702c5SYu Xiangning 7320f1702c5SYu Xiangning if (mp != NULL && 7330f1702c5SYu Xiangning (so->so_rcv_timer_tid == 0 || 7340f1702c5SYu Xiangning so->so_rcv_queued >= so->so_rcv_thresh)) { 7350f1702c5SYu Xiangning partial_read = B_FALSE; 7360f1702c5SYu Xiangning 7370f1702c5SYu Xiangning if (flags & MSG_PEEK) { 7380f1702c5SYu Xiangning if ((nmp = dupmsg(mp)) == NULL && 7390f1702c5SYu Xiangning (nmp = copymsg(mp)) == NULL) { 7400f1702c5SYu Xiangning size_t size = msgsize(mp); 7410f1702c5SYu Xiangning 7420f1702c5SYu Xiangning error = strwaitbuf(size, BPRI_HI); 7430f1702c5SYu Xiangning if (error) { 7440f1702c5SYu Xiangning return (error); 7450f1702c5SYu Xiangning } 7460f1702c5SYu Xiangning goto again; 7470f1702c5SYu Xiangning } 7480f1702c5SYu Xiangning mp = nmp; 7490f1702c5SYu Xiangning } else { 7500f1702c5SYu Xiangning ASSERT(mp->b_prev != NULL); 7510f1702c5SYu Xiangning last_tail = mp->b_prev; 7520f1702c5SYu Xiangning mp->b_prev = NULL; 7530f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 7540f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 7550f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 7560f1702c5SYu Xiangning } 7570f1702c5SYu Xiangning mp->b_next = NULL; 7580f1702c5SYu Xiangning } 7590f1702c5SYu Xiangning 7600f1702c5SYu Xiangning ASSERT(mctlp != NULL); 7610f1702c5SYu Xiangning /* 7620f1702c5SYu Xiangning * First process PROTO or PCPROTO blocks, if any. 7630f1702c5SYu Xiangning */ 7640f1702c5SYu Xiangning if (DB_TYPE(mp) != M_DATA) { 7650f1702c5SYu Xiangning *mctlp = mp; 7660f1702c5SYu Xiangning savemp = mp; 7670f1702c5SYu Xiangning savemptail = mp; 7680f1702c5SYu Xiangning ASSERT(DB_TYPE(mp) == M_PROTO || 7690f1702c5SYu Xiangning DB_TYPE(mp) == M_PCPROTO); 7700f1702c5SYu Xiangning while (mp->b_cont != NULL && 7710f1702c5SYu Xiangning DB_TYPE(mp->b_cont) != M_DATA) { 7720f1702c5SYu Xiangning ASSERT(DB_TYPE(mp->b_cont) == M_PROTO || 7730f1702c5SYu Xiangning DB_TYPE(mp->b_cont) == M_PCPROTO); 7740f1702c5SYu Xiangning mp = mp->b_cont; 7750f1702c5SYu Xiangning savemptail = mp; 7760f1702c5SYu Xiangning } 7770f1702c5SYu Xiangning mp = savemptail->b_cont; 7780f1702c5SYu Xiangning savemptail->b_cont = NULL; 7790f1702c5SYu Xiangning } 7800f1702c5SYu Xiangning 7810f1702c5SYu Xiangning ASSERT(DB_TYPE(mp) == M_DATA); 7820f1702c5SYu Xiangning /* 7830f1702c5SYu Xiangning * Now process DATA blocks, if any. Note that for sodirect 7840f1702c5SYu Xiangning * enabled socket, uio_resid can be 0. 7850f1702c5SYu Xiangning */ 7860f1702c5SYu Xiangning if (uiop->uio_resid >= 0) { 7870f1702c5SYu Xiangning ssize_t copied = 0; 7880f1702c5SYu Xiangning 7890f1702c5SYu Xiangning if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) { 790bbc000e5SAnders Persson mutex_enter(&so->so_lock); 7910f1702c5SYu Xiangning ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 7920f1702c5SYu Xiangning copied = sod_uioa_mblk(so, mp); 7930f1702c5SYu Xiangning if (copied > 0) 7940f1702c5SYu Xiangning partial_read = B_TRUE; 795bbc000e5SAnders Persson mutex_exit(&so->so_lock); 7960f1702c5SYu Xiangning /* mark this mblk as processed */ 7970f1702c5SYu Xiangning mp = NULL; 7980f1702c5SYu Xiangning } else { 7990f1702c5SYu Xiangning ssize_t oldresid = uiop->uio_resid; 8000f1702c5SYu Xiangning 8010f1702c5SYu Xiangning if (MBLKL(mp) < so_mblk_pull_len) { 8020f1702c5SYu Xiangning if (pullupmsg(mp, -1) == 1) { 8030f1702c5SYu Xiangning last_tail = mp; 8040f1702c5SYu Xiangning } 8050f1702c5SYu Xiangning } 8060f1702c5SYu Xiangning /* 8070f1702c5SYu Xiangning * Can not read beyond the oobmark 8080f1702c5SYu Xiangning */ 8090f1702c5SYu Xiangning mp = socopyoutuio(mp, uiop, 8100f1702c5SYu Xiangning oobmark == 0 ? INFPSZ : oobmark, &error); 8110f1702c5SYu Xiangning if (error != 0) { 8120f1702c5SYu Xiangning freemsg(*mctlp); 8130f1702c5SYu Xiangning *mctlp = NULL; 8140f1702c5SYu Xiangning more = 0; 8150f1702c5SYu Xiangning goto done; 8160f1702c5SYu Xiangning } 8170f1702c5SYu Xiangning ASSERT(oldresid >= uiop->uio_resid); 8180f1702c5SYu Xiangning copied = oldresid - uiop->uio_resid; 8190f1702c5SYu Xiangning if (oldresid > uiop->uio_resid) 8200f1702c5SYu Xiangning partial_read = B_TRUE; 8210f1702c5SYu Xiangning } 8220f1702c5SYu Xiangning ASSERT(copied >= 0); 8230f1702c5SYu Xiangning if (copied > 0 && !(flags & MSG_PEEK)) { 8240f1702c5SYu Xiangning mutex_enter(&so->so_lock); 8250f1702c5SYu Xiangning so->so_rcv_queued -= copied; 8260f1702c5SYu Xiangning ASSERT(so->so_oobmark >= 0); 8270f1702c5SYu Xiangning if (so->so_oobmark > 0) { 8280f1702c5SYu Xiangning so->so_oobmark -= copied; 8290f1702c5SYu Xiangning ASSERT(so->so_oobmark >= 0); 8300f1702c5SYu Xiangning if (so->so_oobmark == 0) { 8310f1702c5SYu Xiangning ASSERT(so->so_state & 8320f1702c5SYu Xiangning SS_OOBPEND); 8330f1702c5SYu Xiangning so->so_oobmark = 0; 8340f1702c5SYu Xiangning so->so_state |= SS_RCVATMARK; 8350f1702c5SYu Xiangning } 8360f1702c5SYu Xiangning } 8370f1702c5SYu Xiangning /* 8385795faa4SRao Shoaib * so_check_flow_control() will drop 8395795faa4SRao Shoaib * so->so_lock. 8400f1702c5SYu Xiangning */ 841a215d4ebSKacheong Poon rvalp->r_val2 = so_check_flow_control(so); 8420f1702c5SYu Xiangning } 8430f1702c5SYu Xiangning } 8440f1702c5SYu Xiangning if (mp != NULL) { /* more data blocks in msg */ 8450f1702c5SYu Xiangning more |= MOREDATA; 8460f1702c5SYu Xiangning if ((flags & (MSG_PEEK|MSG_TRUNC))) { 8475795faa4SRao Shoaib if (flags & MSG_PEEK) { 8480f1702c5SYu Xiangning freemsg(mp); 8495795faa4SRao Shoaib } else { 8505795faa4SRao Shoaib unsigned int msize = msgdsize(mp); 8515795faa4SRao Shoaib 8525795faa4SRao Shoaib freemsg(mp); 8535795faa4SRao Shoaib mutex_enter(&so->so_lock); 8545795faa4SRao Shoaib so->so_rcv_queued -= msize; 8555795faa4SRao Shoaib /* 8565795faa4SRao Shoaib * so_check_flow_control() will drop 8575795faa4SRao Shoaib * so->so_lock. 8585795faa4SRao Shoaib */ 859a215d4ebSKacheong Poon rvalp->r_val2 = 8605795faa4SRao Shoaib so_check_flow_control(so); 8615795faa4SRao Shoaib } 8620f1702c5SYu Xiangning } else if (partial_read && !somsghasdata(mp)) { 8630f1702c5SYu Xiangning /* 8640f1702c5SYu Xiangning * Avoid queuing a zero-length tail part of 8650f1702c5SYu Xiangning * a message. partial_read == 1 indicates that 8660f1702c5SYu Xiangning * we read some of the message. 8670f1702c5SYu Xiangning */ 8680f1702c5SYu Xiangning freemsg(mp); 8690f1702c5SYu Xiangning more &= ~MOREDATA; 8700f1702c5SYu Xiangning } else { 8710f1702c5SYu Xiangning if (savemp != NULL && 8720f1702c5SYu Xiangning (flags & MSG_DUPCTRL)) { 8730f1702c5SYu Xiangning mblk_t *nmp; 8740f1702c5SYu Xiangning /* 8750f1702c5SYu Xiangning * There should only be non data mblks 8760f1702c5SYu Xiangning */ 8770f1702c5SYu Xiangning ASSERT(DB_TYPE(savemp) != M_DATA && 8780f1702c5SYu Xiangning DB_TYPE(savemptail) != M_DATA); 8790f1702c5SYu Xiangning try_again: 8800f1702c5SYu Xiangning if ((nmp = dupmsg(savemp)) == NULL && 8810f1702c5SYu Xiangning (nmp = copymsg(savemp)) == NULL) { 8820f1702c5SYu Xiangning 8830f1702c5SYu Xiangning size_t size = msgsize(savemp); 8840f1702c5SYu Xiangning 8850f1702c5SYu Xiangning error = strwaitbuf(size, 8860f1702c5SYu Xiangning BPRI_HI); 8870f1702c5SYu Xiangning if (error != 0) { 8880f1702c5SYu Xiangning /* 8890f1702c5SYu Xiangning * In case we 8900f1702c5SYu Xiangning * cannot copy 8910f1702c5SYu Xiangning * control data 8920f1702c5SYu Xiangning * free the remaining 8930f1702c5SYu Xiangning * data. 8940f1702c5SYu Xiangning */ 8950f1702c5SYu Xiangning freemsg(mp); 8960f1702c5SYu Xiangning goto done; 8970f1702c5SYu Xiangning } 8980f1702c5SYu Xiangning goto try_again; 8990f1702c5SYu Xiangning } 9000f1702c5SYu Xiangning 9010f1702c5SYu Xiangning ASSERT(nmp != NULL); 9020f1702c5SYu Xiangning ASSERT(DB_TYPE(nmp) != M_DATA); 9030f1702c5SYu Xiangning savemptail->b_cont = mp; 9040f1702c5SYu Xiangning *mctlp = nmp; 9050f1702c5SYu Xiangning mp = savemp; 9060f1702c5SYu Xiangning } 9070f1702c5SYu Xiangning /* 9080f1702c5SYu Xiangning * putback mp 9090f1702c5SYu Xiangning */ 9100f1702c5SYu Xiangning so_prepend_msg(so, mp, last_tail); 9110f1702c5SYu Xiangning } 9120f1702c5SYu Xiangning } 9130f1702c5SYu Xiangning 9140f1702c5SYu Xiangning /* fast check so_rcv_head if there is more data */ 9150f1702c5SYu Xiangning if (partial_read && !(so->so_state & SS_RCVATMARK) && 9160f1702c5SYu Xiangning *mctlp == NULL && uiop->uio_resid > 0 && 9170f1702c5SYu Xiangning !(flags & MSG_PEEK) && so->so_rcv_head != NULL) { 9180f1702c5SYu Xiangning goto again; 9190f1702c5SYu Xiangning } 9200f1702c5SYu Xiangning } else if (!partial_read) { 9210f1702c5SYu Xiangning mutex_enter(&so->so_lock); 9220f1702c5SYu Xiangning if (so->so_error != 0) { 9230f1702c5SYu Xiangning error = sogeterr(so, !(flags & MSG_PEEK)); 9240f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9250f1702c5SYu Xiangning return (error); 9260f1702c5SYu Xiangning } 927*dd572c32SArne Jansen 928*dd572c32SArne Jansen /* See if new data has arrived in the meantime */ 929*dd572c32SArne Jansen if (so->so_rcv_head != NULL) 930*dd572c32SArne Jansen goto again1; 931*dd572c32SArne Jansen 9320f1702c5SYu Xiangning /* 9330f1702c5SYu Xiangning * No pending data. Return right away for nonblocking 9340f1702c5SYu Xiangning * socket, otherwise sleep waiting for data. 9350f1702c5SYu Xiangning */ 9362caa659dSMike Cheng if (!(so->so_state & SS_CANTRCVMORE) && uiop->uio_resid > 0) { 9370f1702c5SYu Xiangning if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) || 9380f1702c5SYu Xiangning (flags & MSG_DONTWAIT)) { 9390f1702c5SYu Xiangning error = EWOULDBLOCK; 9400f1702c5SYu Xiangning } else { 9410f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | 9420f1702c5SYu Xiangning SS_FALLBACK_PENDING)) { 9430f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9440f1702c5SYu Xiangning error = EINTR; 9450f1702c5SYu Xiangning goto done; 9460f1702c5SYu Xiangning } 9470f1702c5SYu Xiangning 9480f1702c5SYu Xiangning so->so_rcv_wakeup = B_TRUE; 9490f1702c5SYu Xiangning so->so_rcv_wanted = uiop->uio_resid; 9500f1702c5SYu Xiangning if (so->so_rcvtimeo == 0) { 9510f1702c5SYu Xiangning /* 9520f1702c5SYu Xiangning * Zero means disable timeout. 9530f1702c5SYu Xiangning */ 9540f1702c5SYu Xiangning error = cv_wait_sig(&so->so_rcv_cv, 9550f1702c5SYu Xiangning &so->so_lock); 9560f1702c5SYu Xiangning } else { 957d3d50737SRafael Vanoni error = cv_reltimedwait_sig( 958d3d50737SRafael Vanoni &so->so_rcv_cv, &so->so_lock, 959d3d50737SRafael Vanoni so->so_rcvtimeo, TR_CLOCK_TICK); 9600f1702c5SYu Xiangning } 9610f1702c5SYu Xiangning so->so_rcv_wakeup = B_FALSE; 9620f1702c5SYu Xiangning so->so_rcv_wanted = 0; 9630f1702c5SYu Xiangning 9640f1702c5SYu Xiangning if (error == 0) { 9650f1702c5SYu Xiangning error = EINTR; 9660f1702c5SYu Xiangning } else if (error == -1) { 96734dfe683Sshenjian error = EAGAIN; 9680f1702c5SYu Xiangning } else { 9690f1702c5SYu Xiangning goto again1; 9700f1702c5SYu Xiangning } 9710f1702c5SYu Xiangning } 9720f1702c5SYu Xiangning } 9730f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9740f1702c5SYu Xiangning } 9750f1702c5SYu Xiangning if (reset_atmark && partial_read && !(flags & MSG_PEEK)) { 9760f1702c5SYu Xiangning /* 9770f1702c5SYu Xiangning * We are passed the mark, update state 9780f1702c5SYu Xiangning * 4.3BSD and 4.4BSD clears the mark when peeking across it. 9790f1702c5SYu Xiangning * The draft Posix socket spec states that the mark should 9800f1702c5SYu Xiangning * not be cleared when peeking. We follow the latter. 9810f1702c5SYu Xiangning */ 9820f1702c5SYu Xiangning mutex_enter(&so->so_lock); 9830f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 9840f1702c5SYu Xiangning so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 9850f1702c5SYu Xiangning freemsg(so->so_oobmsg); 9860f1702c5SYu Xiangning so->so_oobmsg = NULL; 9870f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 9880f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9890f1702c5SYu Xiangning } 9900f1702c5SYu Xiangning ASSERT(so->so_rcv_wakeup == B_FALSE); 9910f1702c5SYu Xiangning done: 9920f1702c5SYu Xiangning if (sodp != NULL) { 993bbc000e5SAnders Persson mutex_enter(&so->so_lock); 994bbc000e5SAnders Persson if (sodp->sod_enabled && 9950f1702c5SYu Xiangning (sodp->sod_uioa.uioa_state & UIOA_ENABLED)) { 9960f1702c5SYu Xiangning SOD_UIOAFINI(sodp); 9970f1702c5SYu Xiangning if (sodp->sod_uioa.uioa_mbytes > 0) { 9980f1702c5SYu Xiangning ASSERT(so->so_rcv_q_head != NULL || 9990f1702c5SYu Xiangning so->so_rcv_head != NULL); 10000f1702c5SYu Xiangning so->so_rcv_queued -= sod_uioa_mblk(so, NULL); 10010f1702c5SYu Xiangning if (error == EWOULDBLOCK) 10020f1702c5SYu Xiangning error = 0; 10030f1702c5SYu Xiangning } 10040f1702c5SYu Xiangning } 1005bbc000e5SAnders Persson mutex_exit(&so->so_lock); 10060f1702c5SYu Xiangning } 10070f1702c5SYu Xiangning #ifdef DEBUG 10080f1702c5SYu Xiangning if (so_debug_length) { 10090f1702c5SYu Xiangning mutex_enter(&so->so_lock); 10100f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10110f1702c5SYu Xiangning mutex_exit(&so->so_lock); 10120f1702c5SYu Xiangning } 10130f1702c5SYu Xiangning #endif 10140f1702c5SYu Xiangning rvalp->r_val1 = more; 10155795faa4SRao Shoaib ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 10160f1702c5SYu Xiangning return (error); 10170f1702c5SYu Xiangning } 10180f1702c5SYu Xiangning 1019e4b767e8SAnders Persson /* 1020e4b767e8SAnders Persson * Enqueue data from the protocol on the socket's rcv queue. 1021e4b767e8SAnders Persson * 1022e4b767e8SAnders Persson * We try to hook new M_DATA mblks onto an existing chain, however, 1023e4b767e8SAnders Persson * that cannot be done if the existing chain has already been 1024e4b767e8SAnders Persson * processed by I/OAT. Non-M_DATA mblks are just linked together via 1025e4b767e8SAnders Persson * b_next. In all cases the b_prev of the enqueued mblk is set to 1026e4b767e8SAnders Persson * point to the last mblk in its b_cont chain. 1027e4b767e8SAnders Persson */ 10280f1702c5SYu Xiangning void 10290f1702c5SYu Xiangning so_enqueue_msg(struct sonode *so, mblk_t *mp, size_t msg_size) 10300f1702c5SYu Xiangning { 10310f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 10320f1702c5SYu Xiangning 10330f1702c5SYu Xiangning #ifdef DEBUG 10340f1702c5SYu Xiangning if (so_debug_length) { 10350f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10360f1702c5SYu Xiangning } 10370f1702c5SYu Xiangning #endif 10380f1702c5SYu Xiangning so->so_rcv_queued += msg_size; 10390f1702c5SYu Xiangning 10400f1702c5SYu Xiangning if (so->so_rcv_head == NULL) { 10410f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head == NULL); 10420f1702c5SYu Xiangning so->so_rcv_head = mp; 10430f1702c5SYu Xiangning so->so_rcv_last_head = mp; 10440f1702c5SYu Xiangning } else if ((DB_TYPE(mp) == M_DATA && 10450f1702c5SYu Xiangning DB_TYPE(so->so_rcv_last_head) == M_DATA) && 10460f1702c5SYu Xiangning ((DB_FLAGS(mp) & DBLK_UIOA) == 10470f1702c5SYu Xiangning (DB_FLAGS(so->so_rcv_last_head) & DBLK_UIOA))) { 10480f1702c5SYu Xiangning /* Added to the end */ 10490f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head != NULL); 10500f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head->b_prev != NULL); 10510f1702c5SYu Xiangning so->so_rcv_last_head->b_prev->b_cont = mp; 10520f1702c5SYu Xiangning } else { 10530f1702c5SYu Xiangning /* Start a new end */ 10540f1702c5SYu Xiangning so->so_rcv_last_head->b_next = mp; 10550f1702c5SYu Xiangning so->so_rcv_last_head = mp; 10560f1702c5SYu Xiangning } 10570f1702c5SYu Xiangning while (mp->b_cont != NULL) 10580f1702c5SYu Xiangning mp = mp->b_cont; 10590f1702c5SYu Xiangning 10600f1702c5SYu Xiangning so->so_rcv_last_head->b_prev = mp; 10610f1702c5SYu Xiangning #ifdef DEBUG 10620f1702c5SYu Xiangning if (so_debug_length) { 10630f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10640f1702c5SYu Xiangning } 10650f1702c5SYu Xiangning #endif 10660f1702c5SYu Xiangning } 10670f1702c5SYu Xiangning 10680f1702c5SYu Xiangning /* 10690f1702c5SYu Xiangning * Return B_TRUE if there is data in the message, B_FALSE otherwise. 10700f1702c5SYu Xiangning */ 10710f1702c5SYu Xiangning boolean_t 10720f1702c5SYu Xiangning somsghasdata(mblk_t *mp) 10730f1702c5SYu Xiangning { 10740f1702c5SYu Xiangning for (; mp; mp = mp->b_cont) 10750f1702c5SYu Xiangning if (mp->b_datap->db_type == M_DATA) { 10760f1702c5SYu Xiangning ASSERT(mp->b_wptr >= mp->b_rptr); 10770f1702c5SYu Xiangning if (mp->b_wptr > mp->b_rptr) 10780f1702c5SYu Xiangning return (B_TRUE); 10790f1702c5SYu Xiangning } 10800f1702c5SYu Xiangning return (B_FALSE); 10810f1702c5SYu Xiangning } 10820f1702c5SYu Xiangning 10830f1702c5SYu Xiangning /* 10840f1702c5SYu Xiangning * Flush the read side of sockfs. 10850f1702c5SYu Xiangning * 10860f1702c5SYu Xiangning * The caller must be sure that a reader is not already active when the 10870f1702c5SYu Xiangning * buffer is being flushed. 10880f1702c5SYu Xiangning */ 10890f1702c5SYu Xiangning void 10900f1702c5SYu Xiangning so_rcv_flush(struct sonode *so) 10910f1702c5SYu Xiangning { 10920f1702c5SYu Xiangning mblk_t *mp; 10930f1702c5SYu Xiangning 10940f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 10950f1702c5SYu Xiangning 10960f1702c5SYu Xiangning if (so->so_oobmsg != NULL) { 10970f1702c5SYu Xiangning freemsg(so->so_oobmsg); 10980f1702c5SYu Xiangning so->so_oobmsg = NULL; 10990f1702c5SYu Xiangning so->so_oobmark = 0; 11000f1702c5SYu Xiangning so->so_state &= 11010f1702c5SYu Xiangning ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|SS_RCVATMARK); 11020f1702c5SYu Xiangning } 11030f1702c5SYu Xiangning 11040f1702c5SYu Xiangning /* 11053e95bd4aSAnders Persson * Free messages sitting in the recv queues 11060f1702c5SYu Xiangning */ 11070f1702c5SYu Xiangning while (so->so_rcv_q_head != NULL) { 11080f1702c5SYu Xiangning mp = so->so_rcv_q_head; 11090f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 11100f1702c5SYu Xiangning mp->b_next = mp->b_prev = NULL; 11110f1702c5SYu Xiangning freemsg(mp); 11120f1702c5SYu Xiangning } 11130f1702c5SYu Xiangning while (so->so_rcv_head != NULL) { 11140f1702c5SYu Xiangning mp = so->so_rcv_head; 11150f1702c5SYu Xiangning so->so_rcv_head = mp->b_next; 11160f1702c5SYu Xiangning mp->b_next = mp->b_prev = NULL; 11170f1702c5SYu Xiangning freemsg(mp); 11180f1702c5SYu Xiangning } 11190f1702c5SYu Xiangning so->so_rcv_queued = 0; 11200f1702c5SYu Xiangning so->so_rcv_q_head = NULL; 11210f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 11220f1702c5SYu Xiangning so->so_rcv_head = NULL; 11230f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 11240f1702c5SYu Xiangning } 11250f1702c5SYu Xiangning 11260f1702c5SYu Xiangning /* 11270f1702c5SYu Xiangning * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 11280f1702c5SYu Xiangning */ 11290f1702c5SYu Xiangning int 11300f1702c5SYu Xiangning sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags, 11310f1702c5SYu Xiangning boolean_t oob_inline) 11320f1702c5SYu Xiangning { 11330f1702c5SYu Xiangning mblk_t *mp, *nmp; 11340f1702c5SYu Xiangning int error; 11350f1702c5SYu Xiangning 11360f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", (void *)so, (void *)msg, 11370f1702c5SYu Xiangning flags)); 11380f1702c5SYu Xiangning 11390f1702c5SYu Xiangning if (msg != NULL) { 11400f1702c5SYu Xiangning /* 11410f1702c5SYu Xiangning * There is never any oob data with addresses or control since 11420f1702c5SYu Xiangning * the T_EXDATA_IND does not carry any options. 11430f1702c5SYu Xiangning */ 11440f1702c5SYu Xiangning msg->msg_controllen = 0; 11450f1702c5SYu Xiangning msg->msg_namelen = 0; 11460f1702c5SYu Xiangning msg->msg_flags = 0; 11470f1702c5SYu Xiangning } 11480f1702c5SYu Xiangning 11490f1702c5SYu Xiangning mutex_enter(&so->so_lock); 11500f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 11510f1702c5SYu Xiangning if (oob_inline || 11520f1702c5SYu Xiangning (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 11530f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 11540f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11550f1702c5SYu Xiangning return (EINVAL); 11560f1702c5SYu Xiangning } 11570f1702c5SYu Xiangning if (!(so->so_state & SS_HAVEOOBDATA)) { 11580f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob: no data yet\n")); 11590f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11600f1702c5SYu Xiangning return (EWOULDBLOCK); 11610f1702c5SYu Xiangning } 11620f1702c5SYu Xiangning ASSERT(so->so_oobmsg != NULL); 11630f1702c5SYu Xiangning mp = so->so_oobmsg; 11640f1702c5SYu Xiangning if (flags & MSG_PEEK) { 11650f1702c5SYu Xiangning /* 11660f1702c5SYu Xiangning * Since recv* can not return ENOBUFS we can not use dupmsg. 11670f1702c5SYu Xiangning * Instead we revert to the consolidation private 11680f1702c5SYu Xiangning * allocb_wait plus bcopy. 11690f1702c5SYu Xiangning */ 11700f1702c5SYu Xiangning mblk_t *mp1; 11710f1702c5SYu Xiangning 11720f1702c5SYu Xiangning mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 11730f1702c5SYu Xiangning ASSERT(mp1); 11740f1702c5SYu Xiangning 11750f1702c5SYu Xiangning while (mp != NULL) { 11760f1702c5SYu Xiangning ssize_t size; 11770f1702c5SYu Xiangning 11780f1702c5SYu Xiangning size = MBLKL(mp); 11790f1702c5SYu Xiangning bcopy(mp->b_rptr, mp1->b_wptr, size); 11800f1702c5SYu Xiangning mp1->b_wptr += size; 11810f1702c5SYu Xiangning ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 11820f1702c5SYu Xiangning mp = mp->b_cont; 11830f1702c5SYu Xiangning } 11840f1702c5SYu Xiangning mp = mp1; 11850f1702c5SYu Xiangning } else { 11860f1702c5SYu Xiangning /* 11870f1702c5SYu Xiangning * Update the state indicating that the data has been consumed. 11880f1702c5SYu Xiangning * Keep SS_OOBPEND set until data is consumed past the mark. 11890f1702c5SYu Xiangning */ 11900f1702c5SYu Xiangning so->so_oobmsg = NULL; 11910f1702c5SYu Xiangning so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 11920f1702c5SYu Xiangning } 11930f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 11940f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11950f1702c5SYu Xiangning 11960f1702c5SYu Xiangning error = 0; 11970f1702c5SYu Xiangning nmp = mp; 11980f1702c5SYu Xiangning while (nmp != NULL && uiop->uio_resid > 0) { 11990f1702c5SYu Xiangning ssize_t n = MBLKL(nmp); 12000f1702c5SYu Xiangning 12010f1702c5SYu Xiangning n = MIN(n, uiop->uio_resid); 12020f1702c5SYu Xiangning if (n > 0) 12030f1702c5SYu Xiangning error = uiomove(nmp->b_rptr, n, 12040f1702c5SYu Xiangning UIO_READ, uiop); 12050f1702c5SYu Xiangning if (error) 12060f1702c5SYu Xiangning break; 12070f1702c5SYu Xiangning nmp = nmp->b_cont; 12080f1702c5SYu Xiangning } 12090f1702c5SYu Xiangning ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 12100f1702c5SYu Xiangning freemsg(mp); 12110f1702c5SYu Xiangning return (error); 12120f1702c5SYu Xiangning } 12130f1702c5SYu Xiangning 12140f1702c5SYu Xiangning /* 12150f1702c5SYu Xiangning * Allocate and initializ sonode 12160f1702c5SYu Xiangning */ 12170f1702c5SYu Xiangning /* ARGSUSED */ 12180f1702c5SYu Xiangning struct sonode * 12190f1702c5SYu Xiangning socket_sonode_create(struct sockparams *sp, int family, int type, 12200f1702c5SYu Xiangning int protocol, int version, int sflags, int *errorp, struct cred *cr) 12210f1702c5SYu Xiangning { 12220f1702c5SYu Xiangning sonode_t *so; 12230f1702c5SYu Xiangning int kmflags; 12240f1702c5SYu Xiangning 12250f1702c5SYu Xiangning /* 12260f1702c5SYu Xiangning * Choose the right set of sonodeops based on the upcall and 12270f1702c5SYu Xiangning * down call version that the protocol has provided 12280f1702c5SYu Xiangning */ 12290f1702c5SYu Xiangning if (SOCK_UC_VERSION != sp->sp_smod_info->smod_uc_version || 12300f1702c5SYu Xiangning SOCK_DC_VERSION != sp->sp_smod_info->smod_dc_version) { 12310f1702c5SYu Xiangning /* 12320f1702c5SYu Xiangning * mismatch 12330f1702c5SYu Xiangning */ 12340f1702c5SYu Xiangning #ifdef DEBUG 12350f1702c5SYu Xiangning cmn_err(CE_CONT, "protocol and socket module version mismatch"); 12360f1702c5SYu Xiangning #endif 12370f1702c5SYu Xiangning *errorp = EINVAL; 12380f1702c5SYu Xiangning return (NULL); 12390f1702c5SYu Xiangning } 12400f1702c5SYu Xiangning 12410f1702c5SYu Xiangning kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 12420f1702c5SYu Xiangning 12430f1702c5SYu Xiangning so = kmem_cache_alloc(socket_cache, kmflags); 12440f1702c5SYu Xiangning if (so == NULL) { 12450f1702c5SYu Xiangning *errorp = ENOMEM; 12460f1702c5SYu Xiangning return (NULL); 12470f1702c5SYu Xiangning } 12480f1702c5SYu Xiangning 12490f1702c5SYu Xiangning sonode_init(so, sp, family, type, protocol, &so_sonodeops); 12500f1702c5SYu Xiangning 12510f1702c5SYu Xiangning if (version == SOV_DEFAULT) 12520f1702c5SYu Xiangning version = so_default_version; 12530f1702c5SYu Xiangning 12540f1702c5SYu Xiangning so->so_version = (short)version; 12550f1702c5SYu Xiangning 12560f1702c5SYu Xiangning /* 12570f1702c5SYu Xiangning * set the default values to be INFPSZ 12580f1702c5SYu Xiangning * if a protocol desires it can change the value later 12590f1702c5SYu Xiangning */ 12600f1702c5SYu Xiangning so->so_proto_props.sopp_rxhiwat = SOCKET_RECVHIWATER; 12610f1702c5SYu Xiangning so->so_proto_props.sopp_rxlowat = SOCKET_RECVLOWATER; 12620f1702c5SYu Xiangning so->so_proto_props.sopp_maxpsz = INFPSZ; 12630f1702c5SYu Xiangning so->so_proto_props.sopp_maxblk = INFPSZ; 12640f1702c5SYu Xiangning 12650f1702c5SYu Xiangning return (so); 12660f1702c5SYu Xiangning } 12670f1702c5SYu Xiangning 12680f1702c5SYu Xiangning int 12690f1702c5SYu Xiangning socket_init_common(struct sonode *so, struct sonode *pso, int flags, cred_t *cr) 12700f1702c5SYu Xiangning { 12710f1702c5SYu Xiangning int error = 0; 12720f1702c5SYu Xiangning 12730f1702c5SYu Xiangning if (pso != NULL) { 12740f1702c5SYu Xiangning /* 12750f1702c5SYu Xiangning * We have a passive open, so inherit basic state from 12760f1702c5SYu Xiangning * the parent (listener). 12770f1702c5SYu Xiangning * 12780f1702c5SYu Xiangning * No need to grab the new sonode's lock, since there is no 12790f1702c5SYu Xiangning * one that can have a reference to it. 12800f1702c5SYu Xiangning */ 12810f1702c5SYu Xiangning mutex_enter(&pso->so_lock); 12820f1702c5SYu Xiangning 12830f1702c5SYu Xiangning so->so_state |= SS_ISCONNECTED | (pso->so_state & SS_ASYNC); 12840f1702c5SYu Xiangning so->so_pgrp = pso->so_pgrp; 12850f1702c5SYu Xiangning so->so_rcvtimeo = pso->so_rcvtimeo; 12860f1702c5SYu Xiangning so->so_sndtimeo = pso->so_sndtimeo; 1287a5adac4dSYu Xiangning so->so_xpg_rcvbuf = pso->so_xpg_rcvbuf; 12880f1702c5SYu Xiangning /* 12890f1702c5SYu Xiangning * Make note of the socket level options. TCP and IP level 12900f1702c5SYu Xiangning * options are already inherited. We could do all this after 12910f1702c5SYu Xiangning * accept is successful but doing it here simplifies code and 12920f1702c5SYu Xiangning * no harm done for error case. 12930f1702c5SYu Xiangning */ 12940f1702c5SYu Xiangning so->so_options = pso->so_options & (SO_DEBUG|SO_REUSEADDR| 12950f1702c5SYu Xiangning SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 12960f1702c5SYu Xiangning SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 12970f1702c5SYu Xiangning so->so_proto_props = pso->so_proto_props; 12980f1702c5SYu Xiangning so->so_mode = pso->so_mode; 1299f0267584Sanders so->so_pollev = pso->so_pollev & SO_POLLEV_ALWAYS; 13000f1702c5SYu Xiangning 13010f1702c5SYu Xiangning mutex_exit(&pso->so_lock); 13023e95bd4aSAnders Persson 13033e95bd4aSAnders Persson /* 13043e95bd4aSAnders Persson * If the parent has any filters, try to inherit them. 13053e95bd4aSAnders Persson */ 13063e95bd4aSAnders Persson if (pso->so_filter_active > 0 && 13073e95bd4aSAnders Persson (error = sof_sonode_inherit_filters(so, pso)) != 0) 13083e95bd4aSAnders Persson return (error); 13093e95bd4aSAnders Persson 13100f1702c5SYu Xiangning } else { 13110f1702c5SYu Xiangning struct sockparams *sp = so->so_sockparams; 13120f1702c5SYu Xiangning sock_upcalls_t *upcalls_to_use; 13130f1702c5SYu Xiangning 13140f1702c5SYu Xiangning /* 13153e95bd4aSAnders Persson * Attach automatic filters, if there are any. 13163e95bd4aSAnders Persson */ 13173e95bd4aSAnders Persson if (!list_is_empty(&sp->sp_auto_filters) && 13183e95bd4aSAnders Persson (error = sof_sonode_autoattach_filters(so, cr)) != 0) 13193e95bd4aSAnders Persson return (error); 13203e95bd4aSAnders Persson 13213e95bd4aSAnders Persson /* OK to attach filters */ 13223e95bd4aSAnders Persson so->so_state |= SS_FILOP_OK; 13233e95bd4aSAnders Persson 13243e95bd4aSAnders Persson /* 13250f1702c5SYu Xiangning * Based on the version number select the right upcalls to 13260f1702c5SYu Xiangning * pass down. Currently we only have one version so choose 13270f1702c5SYu Xiangning * default 13280f1702c5SYu Xiangning */ 13290f1702c5SYu Xiangning upcalls_to_use = &so_upcalls; 13300f1702c5SYu Xiangning 13310f1702c5SYu Xiangning /* active open, so create a lower handle */ 13320f1702c5SYu Xiangning so->so_proto_handle = 13330f1702c5SYu Xiangning sp->sp_smod_info->smod_proto_create_func(so->so_family, 13340f1702c5SYu Xiangning so->so_type, so->so_protocol, &so->so_downcalls, 13350f1702c5SYu Xiangning &so->so_mode, &error, flags, cr); 13360f1702c5SYu Xiangning 13370f1702c5SYu Xiangning if (so->so_proto_handle == NULL) { 13380f1702c5SYu Xiangning ASSERT(error != 0); 13390f1702c5SYu Xiangning /* 13400f1702c5SYu Xiangning * To be safe; if a lower handle cannot be created, and 13410f1702c5SYu Xiangning * the proto does not give a reason why, assume there 13420f1702c5SYu Xiangning * was a lack of memory. 13430f1702c5SYu Xiangning */ 13440f1702c5SYu Xiangning return ((error == 0) ? ENOMEM : error); 13450f1702c5SYu Xiangning } 13460f1702c5SYu Xiangning ASSERT(so->so_downcalls != NULL); 13470f1702c5SYu Xiangning ASSERT(so->so_downcalls->sd_send != NULL || 13480f1702c5SYu Xiangning so->so_downcalls->sd_send_uio != NULL); 13490f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) { 13500f1702c5SYu Xiangning ASSERT(so->so_downcalls->sd_poll != NULL); 13510f1702c5SYu Xiangning so->so_pollev |= SO_POLLEV_ALWAYS; 13520f1702c5SYu Xiangning } 13530f1702c5SYu Xiangning 13540f1702c5SYu Xiangning (*so->so_downcalls->sd_activate)(so->so_proto_handle, 13550f1702c5SYu Xiangning (sock_upper_handle_t)so, upcalls_to_use, 0, cr); 13560f1702c5SYu Xiangning 13570f1702c5SYu Xiangning /* Wildcard */ 13580f1702c5SYu Xiangning 13590f1702c5SYu Xiangning /* 13600f1702c5SYu Xiangning * FIXME No need for this, the protocol can deal with it in 13610f1702c5SYu Xiangning * sd_create(). Should update ICMP. 13620f1702c5SYu Xiangning */ 13630f1702c5SYu Xiangning if (so->so_protocol != so->so_sockparams->sp_protocol) { 13640f1702c5SYu Xiangning int protocol = so->so_protocol; 13650f1702c5SYu Xiangning int error; 13660f1702c5SYu Xiangning /* 13670f1702c5SYu Xiangning * Issue SO_PROTOTYPE setsockopt. 13680f1702c5SYu Xiangning */ 13690f1702c5SYu Xiangning error = socket_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 13700f1702c5SYu Xiangning &protocol, (t_uscalar_t)sizeof (protocol), cr); 13710f1702c5SYu Xiangning if (error) { 13720f1702c5SYu Xiangning (void) (*so->so_downcalls->sd_close) 13730f1702c5SYu Xiangning (so->so_proto_handle, 0, cr); 13740f1702c5SYu Xiangning 13750f1702c5SYu Xiangning mutex_enter(&so->so_lock); 13760f1702c5SYu Xiangning so_rcv_flush(so); 13770f1702c5SYu Xiangning mutex_exit(&so->so_lock); 13780f1702c5SYu Xiangning /* 13790f1702c5SYu Xiangning * Setsockopt often fails with ENOPROTOOPT but 13800f1702c5SYu Xiangning * socket() should fail with 13810f1702c5SYu Xiangning * EPROTONOSUPPORT/EPROTOTYPE. 13820f1702c5SYu Xiangning */ 13830f1702c5SYu Xiangning return (EPROTONOSUPPORT); 13840f1702c5SYu Xiangning } 13850f1702c5SYu Xiangning } 13860f1702c5SYu Xiangning } 1387bbc000e5SAnders Persson 1388bbc000e5SAnders Persson if (uioasync.enabled) 1389bbc000e5SAnders Persson sod_sock_init(so); 1390bbc000e5SAnders Persson 13913e95bd4aSAnders Persson /* put an extra reference on the socket for the protocol */ 13923e95bd4aSAnders Persson VN_HOLD(SOTOV(so)); 13933e95bd4aSAnders Persson 1394bbc000e5SAnders Persson return (0); 13950f1702c5SYu Xiangning } 13960f1702c5SYu Xiangning 13970f1702c5SYu Xiangning /* 13980f1702c5SYu Xiangning * int socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 13990f1702c5SYu Xiangning * struct cred *cr, int32_t *rvalp) 14000f1702c5SYu Xiangning * 14010f1702c5SYu Xiangning * Handle ioctls that manipulate basic socket state; non-blocking, 14020f1702c5SYu Xiangning * async, etc. 14030f1702c5SYu Xiangning * 14040f1702c5SYu Xiangning * Returns: 14050f1702c5SYu Xiangning * < 0 - ioctl was not handle 14060f1702c5SYu Xiangning * >= 0 - ioctl was handled, if > 0, then it is an errno 14070f1702c5SYu Xiangning * 14080f1702c5SYu Xiangning * Notes: 14090f1702c5SYu Xiangning * Assumes the standard receive buffer is used to obtain info for 14100f1702c5SYu Xiangning * NREAD. 14110f1702c5SYu Xiangning */ 14120f1702c5SYu Xiangning /* ARGSUSED */ 14130f1702c5SYu Xiangning int 14140f1702c5SYu Xiangning socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 14150f1702c5SYu Xiangning struct cred *cr, int32_t *rvalp) 14160f1702c5SYu Xiangning { 14170f1702c5SYu Xiangning switch (cmd) { 1418bfcb55b8SRao Shoaib case SIOCSQPTR: 1419bfcb55b8SRao Shoaib /* 1420bfcb55b8SRao Shoaib * SIOCSQPTR is valid only when helper stream is created 1421bfcb55b8SRao Shoaib * by the protocol. 1422bfcb55b8SRao Shoaib */ 1423bfcb55b8SRao Shoaib 1424bfcb55b8SRao Shoaib return (EOPNOTSUPP); 14250f1702c5SYu Xiangning case FIONBIO: { 14260f1702c5SYu Xiangning int32_t value; 14270f1702c5SYu Xiangning 14280f1702c5SYu Xiangning if (so_copyin((void *)arg, &value, sizeof (int32_t), 14290f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14300f1702c5SYu Xiangning return (EFAULT); 14310f1702c5SYu Xiangning 14320f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14330f1702c5SYu Xiangning if (value) { 14340f1702c5SYu Xiangning so->so_state |= SS_NDELAY; 14350f1702c5SYu Xiangning } else { 14360f1702c5SYu Xiangning so->so_state &= ~SS_NDELAY; 14370f1702c5SYu Xiangning } 14380f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14390f1702c5SYu Xiangning return (0); 14400f1702c5SYu Xiangning } 14410f1702c5SYu Xiangning case FIOASYNC: { 14420f1702c5SYu Xiangning int32_t value; 14430f1702c5SYu Xiangning 14440f1702c5SYu Xiangning if (so_copyin((void *)arg, &value, sizeof (int32_t), 14450f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14460f1702c5SYu Xiangning return (EFAULT); 14470f1702c5SYu Xiangning 14480f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14490f1702c5SYu Xiangning 14500f1702c5SYu Xiangning if (value) { 14510f1702c5SYu Xiangning /* Turn on SIGIO */ 14520f1702c5SYu Xiangning so->so_state |= SS_ASYNC; 14530f1702c5SYu Xiangning } else { 14540f1702c5SYu Xiangning /* Turn off SIGIO */ 14550f1702c5SYu Xiangning so->so_state &= ~SS_ASYNC; 14560f1702c5SYu Xiangning } 14570f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14580f1702c5SYu Xiangning 14590f1702c5SYu Xiangning return (0); 14600f1702c5SYu Xiangning } 14610f1702c5SYu Xiangning 14620f1702c5SYu Xiangning case SIOCSPGRP: 14630f1702c5SYu Xiangning case FIOSETOWN: { 14640f1702c5SYu Xiangning int error; 14650f1702c5SYu Xiangning pid_t pid; 14660f1702c5SYu Xiangning 14670f1702c5SYu Xiangning if (so_copyin((void *)arg, &pid, sizeof (pid_t), 14680f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14690f1702c5SYu Xiangning return (EFAULT); 14700f1702c5SYu Xiangning 14710f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14720f1702c5SYu Xiangning error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0; 14730f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14740f1702c5SYu Xiangning return (error); 14750f1702c5SYu Xiangning } 14760f1702c5SYu Xiangning case SIOCGPGRP: 14770f1702c5SYu Xiangning case FIOGETOWN: 14780f1702c5SYu Xiangning if (so_copyout(&so->so_pgrp, (void *)arg, 14790f1702c5SYu Xiangning sizeof (pid_t), (mode & (int)FKIOCTL))) 14800f1702c5SYu Xiangning return (EFAULT); 14810f1702c5SYu Xiangning 14820f1702c5SYu Xiangning return (0); 14830f1702c5SYu Xiangning case SIOCATMARK: { 14840f1702c5SYu Xiangning int retval; 14850f1702c5SYu Xiangning 14860f1702c5SYu Xiangning /* 14870f1702c5SYu Xiangning * Only protocols that support urgent data can handle ATMARK. 14880f1702c5SYu Xiangning */ 14890f1702c5SYu Xiangning if ((so->so_mode & SM_EXDATA) == 0) 14900f1702c5SYu Xiangning return (EINVAL); 14910f1702c5SYu Xiangning 14920f1702c5SYu Xiangning /* 14930f1702c5SYu Xiangning * If the protocol is maintaining its own buffer, then the 14940f1702c5SYu Xiangning * request must be passed down. 14950f1702c5SYu Xiangning */ 14960f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) 14970f1702c5SYu Xiangning return (-1); 14980f1702c5SYu Xiangning 14990f1702c5SYu Xiangning retval = (so->so_state & SS_RCVATMARK) != 0; 15000f1702c5SYu Xiangning 15010f1702c5SYu Xiangning if (so_copyout(&retval, (void *)arg, sizeof (int), 15020f1702c5SYu Xiangning (mode & (int)FKIOCTL))) { 15030f1702c5SYu Xiangning return (EFAULT); 15040f1702c5SYu Xiangning } 15050f1702c5SYu Xiangning return (0); 15060f1702c5SYu Xiangning } 15070f1702c5SYu Xiangning 15080f1702c5SYu Xiangning case FIONREAD: { 15090f1702c5SYu Xiangning int retval; 15100f1702c5SYu Xiangning 15110f1702c5SYu Xiangning /* 15120f1702c5SYu Xiangning * If the protocol is maintaining its own buffer, then the 15130f1702c5SYu Xiangning * request must be passed down. 15140f1702c5SYu Xiangning */ 15150f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) 15160f1702c5SYu Xiangning return (-1); 15170f1702c5SYu Xiangning 15180f1702c5SYu Xiangning retval = MIN(so->so_rcv_queued, INT_MAX); 15190f1702c5SYu Xiangning 15200f1702c5SYu Xiangning if (so_copyout(&retval, (void *)arg, 15210f1702c5SYu Xiangning sizeof (retval), (mode & (int)FKIOCTL))) { 15220f1702c5SYu Xiangning return (EFAULT); 15230f1702c5SYu Xiangning } 15240f1702c5SYu Xiangning return (0); 15250f1702c5SYu Xiangning } 15260f1702c5SYu Xiangning 15270f1702c5SYu Xiangning case _I_GETPEERCRED: { 15280f1702c5SYu Xiangning int error = 0; 15290f1702c5SYu Xiangning 15300f1702c5SYu Xiangning if ((mode & FKIOCTL) == 0) 15310f1702c5SYu Xiangning return (EINVAL); 15320f1702c5SYu Xiangning 15330f1702c5SYu Xiangning mutex_enter(&so->so_lock); 15340f1702c5SYu Xiangning if ((so->so_mode & SM_CONNREQUIRED) == 0) { 15350f1702c5SYu Xiangning error = ENOTSUP; 15360f1702c5SYu Xiangning } else if ((so->so_state & SS_ISCONNECTED) == 0) { 15370f1702c5SYu Xiangning error = ENOTCONN; 15380f1702c5SYu Xiangning } else if (so->so_peercred != NULL) { 15390f1702c5SYu Xiangning k_peercred_t *kp = (k_peercred_t *)arg; 15400f1702c5SYu Xiangning kp->pc_cr = so->so_peercred; 15410f1702c5SYu Xiangning kp->pc_cpid = so->so_cpid; 15420f1702c5SYu Xiangning crhold(so->so_peercred); 15430f1702c5SYu Xiangning } else { 15440f1702c5SYu Xiangning error = EINVAL; 15450f1702c5SYu Xiangning } 15460f1702c5SYu Xiangning mutex_exit(&so->so_lock); 15470f1702c5SYu Xiangning return (error); 15480f1702c5SYu Xiangning } 15490f1702c5SYu Xiangning default: 15500f1702c5SYu Xiangning return (-1); 15510f1702c5SYu Xiangning } 15520f1702c5SYu Xiangning } 15530f1702c5SYu Xiangning 15540f1702c5SYu Xiangning /* 155541174437SAnders Persson * Handle the I_NREAD STREAM ioctl. 155641174437SAnders Persson */ 155741174437SAnders Persson static int 155841174437SAnders Persson so_strioc_nread(struct sonode *so, intptr_t arg, int mode, int32_t *rvalp) 155941174437SAnders Persson { 156041174437SAnders Persson size_t size = 0; 156141174437SAnders Persson int retval; 156241174437SAnders Persson int count = 0; 156341174437SAnders Persson mblk_t *mp; 1564d3d50737SRafael Vanoni clock_t wakeup = drv_usectohz(10); 156541174437SAnders Persson 156641174437SAnders Persson if (so->so_downcalls == NULL || 156741174437SAnders Persson so->so_downcalls->sd_recv_uio != NULL) 156841174437SAnders Persson return (EINVAL); 156941174437SAnders Persson 157041174437SAnders Persson mutex_enter(&so->so_lock); 157141174437SAnders Persson /* Wait for reader to get out of the way. */ 157241174437SAnders Persson while (so->so_flag & SOREADLOCKED) { 157341174437SAnders Persson /* 157441174437SAnders Persson * If reader is waiting for data, then there should be nothing 157541174437SAnders Persson * on the rcv queue. 157641174437SAnders Persson */ 157741174437SAnders Persson if (so->so_rcv_wakeup) 157841174437SAnders Persson goto out; 157941174437SAnders Persson 158041174437SAnders Persson /* Do a timed sleep, in case the reader goes to sleep. */ 1581decd6ccdSAnders Persson (void) cv_reltimedwait(&so->so_read_cv, &so->so_lock, wakeup, 1582d3d50737SRafael Vanoni TR_CLOCK_TICK); 158341174437SAnders Persson } 158441174437SAnders Persson 158541174437SAnders Persson /* 158641174437SAnders Persson * Since we are holding so_lock no new reader will come in, and the 158741174437SAnders Persson * protocol will not be able to enqueue data. So it's safe to walk 158841174437SAnders Persson * both rcv queues. 158941174437SAnders Persson */ 159041174437SAnders Persson mp = so->so_rcv_q_head; 159141174437SAnders Persson if (mp != NULL) { 159241174437SAnders Persson size = msgdsize(so->so_rcv_q_head); 159341174437SAnders Persson for (; mp != NULL; mp = mp->b_next) 159441174437SAnders Persson count++; 159541174437SAnders Persson } else { 159641174437SAnders Persson /* 159741174437SAnders Persson * In case the processing list was empty, get the size of the 159841174437SAnders Persson * next msg in line. 159941174437SAnders Persson */ 160041174437SAnders Persson size = msgdsize(so->so_rcv_head); 160141174437SAnders Persson } 160241174437SAnders Persson 160341174437SAnders Persson for (mp = so->so_rcv_head; mp != NULL; mp = mp->b_next) 160441174437SAnders Persson count++; 160541174437SAnders Persson out: 160641174437SAnders Persson mutex_exit(&so->so_lock); 160741174437SAnders Persson 160841174437SAnders Persson /* 160941174437SAnders Persson * Drop down from size_t to the "int" required by the 161041174437SAnders Persson * interface. Cap at INT_MAX. 161141174437SAnders Persson */ 161241174437SAnders Persson retval = MIN(size, INT_MAX); 161341174437SAnders Persson if (so_copyout(&retval, (void *)arg, sizeof (retval), 161441174437SAnders Persson (mode & (int)FKIOCTL))) { 161541174437SAnders Persson return (EFAULT); 161641174437SAnders Persson } else { 161741174437SAnders Persson *rvalp = count; 161841174437SAnders Persson return (0); 161941174437SAnders Persson } 162041174437SAnders Persson } 162141174437SAnders Persson 162241174437SAnders Persson /* 162341174437SAnders Persson * Process STREAM ioctls. 16240f1702c5SYu Xiangning * 16250f1702c5SYu Xiangning * Returns: 16260f1702c5SYu Xiangning * < 0 - ioctl was not handle 16270f1702c5SYu Xiangning * >= 0 - ioctl was handled, if > 0, then it is an errno 16280f1702c5SYu Xiangning */ 16290f1702c5SYu Xiangning int 16300f1702c5SYu Xiangning socket_strioc_common(struct sonode *so, int cmd, intptr_t arg, int mode, 16310f1702c5SYu Xiangning struct cred *cr, int32_t *rvalp) 16320f1702c5SYu Xiangning { 16330f1702c5SYu Xiangning int retval; 16340f1702c5SYu Xiangning 163541174437SAnders Persson /* Only STREAM iotcls are handled here */ 163641174437SAnders Persson if ((cmd & 0xffffff00U) != STR) 163741174437SAnders Persson return (-1); 163841174437SAnders Persson 163941174437SAnders Persson switch (cmd) { 164041174437SAnders Persson case I_CANPUT: 164141174437SAnders Persson /* 164241174437SAnders Persson * We return an error for I_CANPUT so that isastream(3C) will 164341174437SAnders Persson * not report the socket as being a STREAM. 164441174437SAnders Persson */ 164541174437SAnders Persson return (EOPNOTSUPP); 164641174437SAnders Persson case I_NREAD: 164741174437SAnders Persson /* Avoid doing a fallback for I_NREAD. */ 164841174437SAnders Persson return (so_strioc_nread(so, arg, mode, rvalp)); 16490f1702c5SYu Xiangning case I_LOOK: 165041174437SAnders Persson /* Avoid doing a fallback for I_LOOK. */ 16510f1702c5SYu Xiangning if (so_copyout("sockmod", (void *)arg, strlen("sockmod") + 1, 16520f1702c5SYu Xiangning (mode & (int)FKIOCTL))) { 16530f1702c5SYu Xiangning return (EFAULT); 16540f1702c5SYu Xiangning } 16550f1702c5SYu Xiangning return (0); 16560f1702c5SYu Xiangning default: 165741174437SAnders Persson break; 165841174437SAnders Persson } 165941174437SAnders Persson 166041174437SAnders Persson /* 166141174437SAnders Persson * Try to fall back to TPI, and if successful, reissue the ioctl. 166241174437SAnders Persson */ 166341174437SAnders Persson if ((retval = so_tpi_fallback(so, cr)) == 0) { 166441174437SAnders Persson /* Reissue the ioctl */ 166541174437SAnders Persson ASSERT(so->so_rcv_q_head == NULL); 166641174437SAnders Persson return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 166741174437SAnders Persson } else { 166841174437SAnders Persson return (retval); 16690f1702c5SYu Xiangning } 16700f1702c5SYu Xiangning } 16710f1702c5SYu Xiangning 16722c632ad5SAnders Persson /* 16732c632ad5SAnders Persson * This is called for all socket types to verify that the buffer size is large 16742c632ad5SAnders Persson * enough for the option, and if we can, handle the request as well. Most 16752c632ad5SAnders Persson * options will be forwarded to the protocol. 16762c632ad5SAnders Persson */ 16770f1702c5SYu Xiangning int 16780f1702c5SYu Xiangning socket_getopt_common(struct sonode *so, int level, int option_name, 1679a5adac4dSYu Xiangning void *optval, socklen_t *optlenp, int flags) 16800f1702c5SYu Xiangning { 16810f1702c5SYu Xiangning if (level != SOL_SOCKET) 16820f1702c5SYu Xiangning return (-1); 16830f1702c5SYu Xiangning 16840f1702c5SYu Xiangning switch (option_name) { 16850f1702c5SYu Xiangning case SO_ERROR: 16860f1702c5SYu Xiangning case SO_DOMAIN: 16870f1702c5SYu Xiangning case SO_TYPE: 16880f1702c5SYu Xiangning case SO_ACCEPTCONN: { 16890f1702c5SYu Xiangning int32_t value; 16900f1702c5SYu Xiangning socklen_t optlen = *optlenp; 16910f1702c5SYu Xiangning 16920f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) { 16930f1702c5SYu Xiangning return (EINVAL); 16940f1702c5SYu Xiangning } 16950f1702c5SYu Xiangning 16960f1702c5SYu Xiangning switch (option_name) { 16970f1702c5SYu Xiangning case SO_ERROR: 16980f1702c5SYu Xiangning mutex_enter(&so->so_lock); 16990f1702c5SYu Xiangning value = sogeterr(so, B_TRUE); 17000f1702c5SYu Xiangning mutex_exit(&so->so_lock); 17010f1702c5SYu Xiangning break; 17020f1702c5SYu Xiangning case SO_DOMAIN: 17030f1702c5SYu Xiangning value = so->so_family; 17040f1702c5SYu Xiangning break; 17050f1702c5SYu Xiangning case SO_TYPE: 17060f1702c5SYu Xiangning value = so->so_type; 17070f1702c5SYu Xiangning break; 17080f1702c5SYu Xiangning case SO_ACCEPTCONN: 17090f1702c5SYu Xiangning if (so->so_state & SS_ACCEPTCONN) 17100f1702c5SYu Xiangning value = SO_ACCEPTCONN; 17110f1702c5SYu Xiangning else 17120f1702c5SYu Xiangning value = 0; 17130f1702c5SYu Xiangning break; 17140f1702c5SYu Xiangning } 17150f1702c5SYu Xiangning 17160f1702c5SYu Xiangning bcopy(&value, optval, sizeof (value)); 17170f1702c5SYu Xiangning *optlenp = sizeof (value); 17180f1702c5SYu Xiangning 17190f1702c5SYu Xiangning return (0); 17200f1702c5SYu Xiangning } 17210f1702c5SYu Xiangning case SO_SNDTIMEO: 17220f1702c5SYu Xiangning case SO_RCVTIMEO: { 17230f1702c5SYu Xiangning clock_t value; 17240f1702c5SYu Xiangning socklen_t optlen = *optlenp; 1725e5083e81Sshenjian 1726e5083e81Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 1727e5083e81Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 172822238f73Sshenjian if (optlen < sizeof (struct timeval)) 172922238f73Sshenjian return (EINVAL); 173022238f73Sshenjian } else { 173122238f73Sshenjian if (optlen < sizeof (struct timeval32)) 17320f1702c5SYu Xiangning return (EINVAL); 17330f1702c5SYu Xiangning } 17340f1702c5SYu Xiangning if (option_name == SO_RCVTIMEO) 17350f1702c5SYu Xiangning value = drv_hztousec(so->so_rcvtimeo); 17360f1702c5SYu Xiangning else 17370f1702c5SYu Xiangning value = drv_hztousec(so->so_sndtimeo); 173822238f73Sshenjian 1739e5083e81Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 1740e5083e81Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 174122238f73Sshenjian ((struct timeval *)(optval))->tv_sec = 174222238f73Sshenjian value / (1000 * 1000); 174322238f73Sshenjian ((struct timeval *)(optval))->tv_usec = 174422238f73Sshenjian value % (1000 * 1000); 17450f1702c5SYu Xiangning *optlenp = sizeof (struct timeval); 174622238f73Sshenjian } else { 174722238f73Sshenjian ((struct timeval32 *)(optval))->tv_sec = 174822238f73Sshenjian value / (1000 * 1000); 174922238f73Sshenjian ((struct timeval32 *)(optval))->tv_usec = 175022238f73Sshenjian value % (1000 * 1000); 175122238f73Sshenjian *optlenp = sizeof (struct timeval32); 175222238f73Sshenjian } 17530f1702c5SYu Xiangning return (0); 17540f1702c5SYu Xiangning } 17550f1702c5SYu Xiangning case SO_DEBUG: 17560f1702c5SYu Xiangning case SO_REUSEADDR: 175778918900SArne Jansen case SO_REUSEPORT: 17580f1702c5SYu Xiangning case SO_KEEPALIVE: 17590f1702c5SYu Xiangning case SO_DONTROUTE: 17600f1702c5SYu Xiangning case SO_BROADCAST: 17610f1702c5SYu Xiangning case SO_USELOOPBACK: 17620f1702c5SYu Xiangning case SO_OOBINLINE: 17630f1702c5SYu Xiangning case SO_SNDBUF: 17640f1702c5SYu Xiangning #ifdef notyet 17650f1702c5SYu Xiangning case SO_SNDLOWAT: 17660f1702c5SYu Xiangning case SO_RCVLOWAT: 17670f1702c5SYu Xiangning #endif /* notyet */ 17680f1702c5SYu Xiangning case SO_DGRAM_ERRIND: { 17690f1702c5SYu Xiangning socklen_t optlen = *optlenp; 17700f1702c5SYu Xiangning 17710f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) 17720f1702c5SYu Xiangning return (EINVAL); 17730f1702c5SYu Xiangning break; 17740f1702c5SYu Xiangning } 1775a5adac4dSYu Xiangning case SO_RCVBUF: { 1776a5adac4dSYu Xiangning socklen_t optlen = *optlenp; 1777a5adac4dSYu Xiangning 1778a5adac4dSYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) 1779a5adac4dSYu Xiangning return (EINVAL); 1780a5adac4dSYu Xiangning 1781a5adac4dSYu Xiangning if ((flags & _SOGETSOCKOPT_XPG4_2) && so->so_xpg_rcvbuf != 0) { 1782a5adac4dSYu Xiangning /* 1783a5adac4dSYu Xiangning * XXX If SO_RCVBUF has been set and this is an 1784a5adac4dSYu Xiangning * XPG 4.2 application then do not ask the transport 1785a5adac4dSYu Xiangning * since the transport might adjust the value and not 1786a5adac4dSYu Xiangning * return exactly what was set by the application. 1787a5adac4dSYu Xiangning * For non-XPG 4.2 application we return the value 1788a5adac4dSYu Xiangning * that the transport is actually using. 1789a5adac4dSYu Xiangning */ 1790a5adac4dSYu Xiangning *(int32_t *)optval = so->so_xpg_rcvbuf; 1791a5adac4dSYu Xiangning *optlenp = sizeof (so->so_xpg_rcvbuf); 1792a5adac4dSYu Xiangning return (0); 1793a5adac4dSYu Xiangning } 1794a5adac4dSYu Xiangning /* 1795a5adac4dSYu Xiangning * If the option has not been set then get a default 1796a5adac4dSYu Xiangning * value from the transport. 1797a5adac4dSYu Xiangning */ 1798a5adac4dSYu Xiangning break; 1799a5adac4dSYu Xiangning } 18000f1702c5SYu Xiangning case SO_LINGER: { 18010f1702c5SYu Xiangning socklen_t optlen = *optlenp; 18020f1702c5SYu Xiangning 18030f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (struct linger)) 18040f1702c5SYu Xiangning return (EINVAL); 18050f1702c5SYu Xiangning break; 18060f1702c5SYu Xiangning } 18070f1702c5SYu Xiangning case SO_SND_BUFINFO: { 18080f1702c5SYu Xiangning socklen_t optlen = *optlenp; 18090f1702c5SYu Xiangning 18100f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (struct so_snd_bufinfo)) 18110f1702c5SYu Xiangning return (EINVAL); 18120f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_wroff = 18130f1702c5SYu Xiangning (so->so_proto_props).sopp_wroff; 18140f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_maxblk = 18150f1702c5SYu Xiangning (so->so_proto_props).sopp_maxblk; 18160f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_maxpsz = 18170f1702c5SYu Xiangning (so->so_proto_props).sopp_maxpsz; 18180f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_tail = 18190f1702c5SYu Xiangning (so->so_proto_props).sopp_tail; 18200f1702c5SYu Xiangning *optlenp = sizeof (struct so_snd_bufinfo); 18210f1702c5SYu Xiangning return (0); 18220f1702c5SYu Xiangning } 18233e95bd4aSAnders Persson case SO_SND_COPYAVOID: { 18243e95bd4aSAnders Persson sof_instance_t *inst; 18253e95bd4aSAnders Persson 18263e95bd4aSAnders Persson /* 18273e95bd4aSAnders Persson * Avoid zero-copy if there is a filter with a data_out 18283e95bd4aSAnders Persson * callback. We could let the operation succeed, but then 18293e95bd4aSAnders Persson * the filter would have to copy the data anyway. 18303e95bd4aSAnders Persson */ 18313e95bd4aSAnders Persson for (inst = so->so_filter_top; inst != NULL; 18323e95bd4aSAnders Persson inst = inst->sofi_next) { 18333e95bd4aSAnders Persson if (SOF_INTERESTED(inst, data_out)) 18343e95bd4aSAnders Persson return (EOPNOTSUPP); 18353e95bd4aSAnders Persson } 18363e95bd4aSAnders Persson break; 18373e95bd4aSAnders Persson } 18383e95bd4aSAnders Persson 18390f1702c5SYu Xiangning default: 18400f1702c5SYu Xiangning break; 18410f1702c5SYu Xiangning } 18420f1702c5SYu Xiangning 18430f1702c5SYu Xiangning /* Unknown Option */ 18440f1702c5SYu Xiangning return (-1); 18450f1702c5SYu Xiangning } 18460f1702c5SYu Xiangning 18470f1702c5SYu Xiangning void 18480f1702c5SYu Xiangning socket_sonode_destroy(struct sonode *so) 18490f1702c5SYu Xiangning { 18500f1702c5SYu Xiangning sonode_fini(so); 18510f1702c5SYu Xiangning kmem_cache_free(socket_cache, so); 18520f1702c5SYu Xiangning } 18530f1702c5SYu Xiangning 18540f1702c5SYu Xiangning int 18550f1702c5SYu Xiangning so_zcopy_wait(struct sonode *so) 18560f1702c5SYu Xiangning { 18570f1702c5SYu Xiangning int error = 0; 18580f1702c5SYu Xiangning 18590f1702c5SYu Xiangning mutex_enter(&so->so_lock); 18600f1702c5SYu Xiangning while (!(so->so_copyflag & STZCNOTIFY)) { 18610f1702c5SYu Xiangning if (so->so_state & SS_CLOSING) { 18620f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18630f1702c5SYu Xiangning return (EINTR); 18640f1702c5SYu Xiangning } 18650f1702c5SYu Xiangning if (cv_wait_sig(&so->so_copy_cv, &so->so_lock) == 0) { 18660f1702c5SYu Xiangning error = EINTR; 18670f1702c5SYu Xiangning break; 18680f1702c5SYu Xiangning } 18690f1702c5SYu Xiangning } 18700f1702c5SYu Xiangning so->so_copyflag &= ~STZCNOTIFY; 18710f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18720f1702c5SYu Xiangning return (error); 18730f1702c5SYu Xiangning } 18740f1702c5SYu Xiangning 18750f1702c5SYu Xiangning void 18760f1702c5SYu Xiangning so_timer_callback(void *arg) 18770f1702c5SYu Xiangning { 18780f1702c5SYu Xiangning struct sonode *so = (struct sonode *)arg; 18790f1702c5SYu Xiangning 18800f1702c5SYu Xiangning mutex_enter(&so->so_lock); 18810f1702c5SYu Xiangning 18820f1702c5SYu Xiangning so->so_rcv_timer_tid = 0; 18830f1702c5SYu Xiangning if (so->so_rcv_queued > 0) { 18840f1702c5SYu Xiangning so_notify_data(so, so->so_rcv_queued); 18850f1702c5SYu Xiangning } else { 18860f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18870f1702c5SYu Xiangning } 18880f1702c5SYu Xiangning } 18890f1702c5SYu Xiangning 18900f1702c5SYu Xiangning #ifdef DEBUG 18910f1702c5SYu Xiangning /* 18920f1702c5SYu Xiangning * Verify that the length stored in so_rcv_queued and the length of data blocks 18930f1702c5SYu Xiangning * queued is same. 18940f1702c5SYu Xiangning */ 18950f1702c5SYu Xiangning static boolean_t 18960f1702c5SYu Xiangning so_check_length(sonode_t *so) 18970f1702c5SYu Xiangning { 18980f1702c5SYu Xiangning mblk_t *mp = so->so_rcv_q_head; 18990f1702c5SYu Xiangning int len = 0; 19000f1702c5SYu Xiangning 19010f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 19020f1702c5SYu Xiangning 19030f1702c5SYu Xiangning if (mp != NULL) { 19040f1702c5SYu Xiangning len = msgdsize(mp); 19050f1702c5SYu Xiangning while ((mp = mp->b_next) != NULL) 19060f1702c5SYu Xiangning len += msgdsize(mp); 19070f1702c5SYu Xiangning } 19080f1702c5SYu Xiangning mp = so->so_rcv_head; 19090f1702c5SYu Xiangning if (mp != NULL) { 19100f1702c5SYu Xiangning len += msgdsize(mp); 19110f1702c5SYu Xiangning while ((mp = mp->b_next) != NULL) 19120f1702c5SYu Xiangning len += msgdsize(mp); 19130f1702c5SYu Xiangning } 19140f1702c5SYu Xiangning return ((len == so->so_rcv_queued) ? B_TRUE : B_FALSE); 19150f1702c5SYu Xiangning } 19160f1702c5SYu Xiangning #endif 19170f1702c5SYu Xiangning 19180f1702c5SYu Xiangning int 19190f1702c5SYu Xiangning so_get_mod_version(struct sockparams *sp) 19200f1702c5SYu Xiangning { 19210f1702c5SYu Xiangning ASSERT(sp != NULL && sp->sp_smod_info != NULL); 19220f1702c5SYu Xiangning return (sp->sp_smod_info->smod_version); 19230f1702c5SYu Xiangning } 19240f1702c5SYu Xiangning 19250f1702c5SYu Xiangning /* 19260f1702c5SYu Xiangning * so_start_fallback() 19270f1702c5SYu Xiangning * 19280f1702c5SYu Xiangning * Block new socket operations from coming in, and wait for active operations 19290f1702c5SYu Xiangning * to complete. Threads that are sleeping will be woken up so they can get 19300f1702c5SYu Xiangning * out of the way. 19310f1702c5SYu Xiangning * 19320f1702c5SYu Xiangning * The caller must be a reader on so_fallback_rwlock. 19330f1702c5SYu Xiangning */ 19340f1702c5SYu Xiangning static boolean_t 19350f1702c5SYu Xiangning so_start_fallback(struct sonode *so) 19360f1702c5SYu Xiangning { 19370f1702c5SYu Xiangning ASSERT(RW_READ_HELD(&so->so_fallback_rwlock)); 19380f1702c5SYu Xiangning 19390f1702c5SYu Xiangning mutex_enter(&so->so_lock); 19400f1702c5SYu Xiangning if (so->so_state & SS_FALLBACK_PENDING) { 19410f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19420f1702c5SYu Xiangning return (B_FALSE); 19430f1702c5SYu Xiangning } 19440f1702c5SYu Xiangning so->so_state |= SS_FALLBACK_PENDING; 19450f1702c5SYu Xiangning /* 19460f1702c5SYu Xiangning * Poke all threads that might be sleeping. Any operation that comes 19470f1702c5SYu Xiangning * in after the cv_broadcast will observe the fallback pending flag 19480f1702c5SYu Xiangning * which cause the call to return where it would normally sleep. 19490f1702c5SYu Xiangning */ 19500f1702c5SYu Xiangning cv_broadcast(&so->so_state_cv); /* threads in connect() */ 19510f1702c5SYu Xiangning cv_broadcast(&so->so_rcv_cv); /* threads in recvmsg() */ 19520f1702c5SYu Xiangning cv_broadcast(&so->so_snd_cv); /* threads in sendmsg() */ 19530f1702c5SYu Xiangning mutex_enter(&so->so_acceptq_lock); 19540f1702c5SYu Xiangning cv_broadcast(&so->so_acceptq_cv); /* threads in accept() */ 19550f1702c5SYu Xiangning mutex_exit(&so->so_acceptq_lock); 19560f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19570f1702c5SYu Xiangning 19580f1702c5SYu Xiangning /* 19590f1702c5SYu Xiangning * The main reason for the rw_tryupgrade call is to provide 19600f1702c5SYu Xiangning * observability during the fallback process. We want to 19610f1702c5SYu Xiangning * be able to see if there are pending operations. 19620f1702c5SYu Xiangning */ 19630f1702c5SYu Xiangning if (rw_tryupgrade(&so->so_fallback_rwlock) == 0) { 19640f1702c5SYu Xiangning /* 19650f1702c5SYu Xiangning * It is safe to drop and reaquire the fallback lock, because 19660f1702c5SYu Xiangning * we are guaranteed that another fallback cannot take place. 19670f1702c5SYu Xiangning */ 19680f1702c5SYu Xiangning rw_exit(&so->so_fallback_rwlock); 19690f1702c5SYu Xiangning DTRACE_PROBE1(pending__ops__wait, (struct sonode *), so); 19700f1702c5SYu Xiangning rw_enter(&so->so_fallback_rwlock, RW_WRITER); 19710f1702c5SYu Xiangning DTRACE_PROBE1(pending__ops__complete, (struct sonode *), so); 19720f1702c5SYu Xiangning } 19730f1702c5SYu Xiangning 19740f1702c5SYu Xiangning return (B_TRUE); 19750f1702c5SYu Xiangning } 19760f1702c5SYu Xiangning 19770f1702c5SYu Xiangning /* 19780f1702c5SYu Xiangning * so_end_fallback() 19790f1702c5SYu Xiangning * 19800f1702c5SYu Xiangning * Allow socket opertions back in. 19810f1702c5SYu Xiangning * 19820f1702c5SYu Xiangning * The caller must be a writer on so_fallback_rwlock. 19830f1702c5SYu Xiangning */ 19840f1702c5SYu Xiangning static void 19850f1702c5SYu Xiangning so_end_fallback(struct sonode *so) 19860f1702c5SYu Xiangning { 19870f1702c5SYu Xiangning ASSERT(RW_ISWRITER(&so->so_fallback_rwlock)); 19880f1702c5SYu Xiangning 19890f1702c5SYu Xiangning mutex_enter(&so->so_lock); 199041174437SAnders Persson so->so_state &= ~(SS_FALLBACK_PENDING|SS_FALLBACK_DRAIN); 19910f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19920f1702c5SYu Xiangning 19930f1702c5SYu Xiangning rw_downgrade(&so->so_fallback_rwlock); 19940f1702c5SYu Xiangning } 19950f1702c5SYu Xiangning 19960f1702c5SYu Xiangning /* 19970f1702c5SYu Xiangning * so_quiesced_cb() 19980f1702c5SYu Xiangning * 19990f1702c5SYu Xiangning * Callback passed to the protocol during fallback. It is called once 20000f1702c5SYu Xiangning * the endpoint is quiescent. 20010f1702c5SYu Xiangning * 20020f1702c5SYu Xiangning * No requests from the user, no notifications from the protocol, so it 20030f1702c5SYu Xiangning * is safe to synchronize the state. Data can also be moved without 20040f1702c5SYu Xiangning * risk for reordering. 20050f1702c5SYu Xiangning * 20060f1702c5SYu Xiangning * We do not need to hold so_lock, since there can be only one thread 20070f1702c5SYu Xiangning * operating on the sonode. 20080f1702c5SYu Xiangning */ 20093e95bd4aSAnders Persson static mblk_t * 20103e95bd4aSAnders Persson so_quiesced_cb(sock_upper_handle_t sock_handle, sock_quiesce_arg_t *arg, 20113e95bd4aSAnders Persson struct T_capability_ack *tcap, 20123e95bd4aSAnders Persson struct sockaddr *laddr, socklen_t laddrlen, 20130f1702c5SYu Xiangning struct sockaddr *faddr, socklen_t faddrlen, short opts) 20140f1702c5SYu Xiangning { 20150f1702c5SYu Xiangning struct sonode *so = (struct sonode *)sock_handle; 201641174437SAnders Persson boolean_t atmark; 20173e95bd4aSAnders Persson mblk_t *retmp = NULL, **tailmpp = &retmp; 20180f1702c5SYu Xiangning 20193e95bd4aSAnders Persson if (tcap != NULL) 20203e95bd4aSAnders Persson sotpi_update_state(so, tcap, laddr, laddrlen, faddr, faddrlen, 20213e95bd4aSAnders Persson opts); 20220f1702c5SYu Xiangning 202341174437SAnders Persson /* 202441174437SAnders Persson * Some protocols do not quiece the data path during fallback. Once 202541174437SAnders Persson * we set the SS_FALLBACK_DRAIN flag any attempt to queue data will 202641174437SAnders Persson * fail and the protocol is responsible for saving the data for later 202741174437SAnders Persson * delivery (i.e., once the fallback has completed). 202841174437SAnders Persson */ 20290f1702c5SYu Xiangning mutex_enter(&so->so_lock); 203041174437SAnders Persson so->so_state |= SS_FALLBACK_DRAIN; 20310f1702c5SYu Xiangning SOCKET_TIMER_CANCEL(so); 20320f1702c5SYu Xiangning mutex_exit(&so->so_lock); 203341174437SAnders Persson 20340f1702c5SYu Xiangning if (so->so_rcv_head != NULL) { 20350f1702c5SYu Xiangning if (so->so_rcv_q_last_head == NULL) 20360f1702c5SYu Xiangning so->so_rcv_q_head = so->so_rcv_head; 20370f1702c5SYu Xiangning else 20380f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = so->so_rcv_head; 20390f1702c5SYu Xiangning so->so_rcv_q_last_head = so->so_rcv_last_head; 20400f1702c5SYu Xiangning } 20410f1702c5SYu Xiangning 204241174437SAnders Persson atmark = (so->so_state & SS_RCVATMARK) != 0; 204341174437SAnders Persson /* 204441174437SAnders Persson * Clear any OOB state having to do with pending data. The TPI 204541174437SAnders Persson * code path will set the appropriate oob state when we move the 204641174437SAnders Persson * oob data to the STREAM head. We leave SS_HADOOBDATA since the oob 204741174437SAnders Persson * data has already been consumed. 204841174437SAnders Persson */ 204941174437SAnders Persson so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA); 205041174437SAnders Persson 205141174437SAnders Persson ASSERT(so->so_oobmsg != NULL || so->so_oobmark <= so->so_rcv_queued); 205241174437SAnders Persson 205341174437SAnders Persson /* 205441174437SAnders Persson * Move data to the STREAM head. 205541174437SAnders Persson */ 20560f1702c5SYu Xiangning while (so->so_rcv_q_head != NULL) { 20570f1702c5SYu Xiangning mblk_t *mp = so->so_rcv_q_head; 20580f1702c5SYu Xiangning size_t mlen = msgdsize(mp); 20590f1702c5SYu Xiangning 20600f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 20610f1702c5SYu Xiangning mp->b_next = NULL; 20620f1702c5SYu Xiangning mp->b_prev = NULL; 206341174437SAnders Persson 206441174437SAnders Persson /* 206541174437SAnders Persson * Send T_EXDATA_IND if we are at the oob mark. 206641174437SAnders Persson */ 206741174437SAnders Persson if (atmark) { 206841174437SAnders Persson struct T_exdata_ind *tei; 20693e95bd4aSAnders Persson mblk_t *mp1 = arg->soqa_exdata_mp; 207041174437SAnders Persson 20713e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 207241174437SAnders Persson ASSERT(mp1 != NULL); 207341174437SAnders Persson mp1->b_datap->db_type = M_PROTO; 207441174437SAnders Persson tei = (struct T_exdata_ind *)mp1->b_rptr; 207541174437SAnders Persson tei->PRIM_type = T_EXDATA_IND; 207641174437SAnders Persson tei->MORE_flag = 0; 207741174437SAnders Persson mp1->b_wptr = (uchar_t *)&tei[1]; 207841174437SAnders Persson 207941174437SAnders Persson if (IS_SO_OOB_INLINE(so)) { 208041174437SAnders Persson mp1->b_cont = mp; 208141174437SAnders Persson } else { 208241174437SAnders Persson ASSERT(so->so_oobmsg != NULL); 208341174437SAnders Persson mp1->b_cont = so->so_oobmsg; 208441174437SAnders Persson so->so_oobmsg = NULL; 208541174437SAnders Persson 208641174437SAnders Persson /* process current mp next time around */ 208741174437SAnders Persson mp->b_next = so->so_rcv_q_head; 208841174437SAnders Persson so->so_rcv_q_head = mp; 208941174437SAnders Persson mlen = 0; 209041174437SAnders Persson } 209141174437SAnders Persson mp = mp1; 209241174437SAnders Persson 209341174437SAnders Persson /* we have consumed the oob mark */ 209441174437SAnders Persson atmark = B_FALSE; 209541174437SAnders Persson } else if (so->so_oobmark > 0) { 209641174437SAnders Persson /* 209741174437SAnders Persson * Check if the OOB mark is within the current 209841174437SAnders Persson * mblk chain. In that case we have to split it up. 209941174437SAnders Persson */ 210041174437SAnders Persson if (so->so_oobmark < mlen) { 210141174437SAnders Persson mblk_t *urg_mp = mp; 210241174437SAnders Persson 210341174437SAnders Persson atmark = B_TRUE; 210441174437SAnders Persson mp = NULL; 210541174437SAnders Persson mlen = so->so_oobmark; 210641174437SAnders Persson 210741174437SAnders Persson /* 210841174437SAnders Persson * It is assumed that the OOB mark does 210941174437SAnders Persson * not land within a mblk. 211041174437SAnders Persson */ 211141174437SAnders Persson do { 211241174437SAnders Persson so->so_oobmark -= MBLKL(urg_mp); 211341174437SAnders Persson mp = urg_mp; 211441174437SAnders Persson urg_mp = urg_mp->b_cont; 211541174437SAnders Persson } while (so->so_oobmark > 0); 211641174437SAnders Persson mp->b_cont = NULL; 211741174437SAnders Persson if (urg_mp != NULL) { 211841174437SAnders Persson urg_mp->b_next = so->so_rcv_q_head; 211941174437SAnders Persson so->so_rcv_q_head = urg_mp; 212041174437SAnders Persson } 212141174437SAnders Persson } else { 212241174437SAnders Persson so->so_oobmark -= mlen; 212341174437SAnders Persson if (so->so_oobmark == 0) 212441174437SAnders Persson atmark = B_TRUE; 212541174437SAnders Persson } 212641174437SAnders Persson } 212741174437SAnders Persson 212841174437SAnders Persson /* 212941174437SAnders Persson * Queue data on the STREAM head. 213041174437SAnders Persson */ 21310f1702c5SYu Xiangning so->so_rcv_queued -= mlen; 21323e95bd4aSAnders Persson *tailmpp = mp; 21333e95bd4aSAnders Persson tailmpp = &mp->b_next; 21340f1702c5SYu Xiangning } 21350f1702c5SYu Xiangning so->so_rcv_head = NULL; 21360f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 21370f1702c5SYu Xiangning so->so_rcv_q_head = NULL; 21380f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 21390f1702c5SYu Xiangning 214041174437SAnders Persson /* 214141174437SAnders Persson * Check if the oob byte is at the end of the data stream, or if the 214241174437SAnders Persson * oob byte has not yet arrived. In the latter case we have to send a 214341174437SAnders Persson * SIGURG and a mark indicator to the STREAM head. The mark indicator 214441174437SAnders Persson * is needed to guarantee correct behavior for SIOCATMARK. See block 214541174437SAnders Persson * comment in socktpi.h for more details. 214641174437SAnders Persson */ 214741174437SAnders Persson if (atmark || so->so_oobmark > 0) { 214841174437SAnders Persson mblk_t *mp; 21490f1702c5SYu Xiangning 215041174437SAnders Persson if (atmark && so->so_oobmsg != NULL) { 215141174437SAnders Persson struct T_exdata_ind *tei; 215241174437SAnders Persson 21533e95bd4aSAnders Persson mp = arg->soqa_exdata_mp; 21543e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 215541174437SAnders Persson ASSERT(mp != NULL); 215641174437SAnders Persson mp->b_datap->db_type = M_PROTO; 215741174437SAnders Persson tei = (struct T_exdata_ind *)mp->b_rptr; 215841174437SAnders Persson tei->PRIM_type = T_EXDATA_IND; 215941174437SAnders Persson tei->MORE_flag = 0; 216041174437SAnders Persson mp->b_wptr = (uchar_t *)&tei[1]; 216141174437SAnders Persson 216241174437SAnders Persson mp->b_cont = so->so_oobmsg; 216341174437SAnders Persson so->so_oobmsg = NULL; 216441174437SAnders Persson 21653e95bd4aSAnders Persson *tailmpp = mp; 21663e95bd4aSAnders Persson tailmpp = &mp->b_next; 216741174437SAnders Persson } else { 216841174437SAnders Persson /* Send up the signal */ 21693e95bd4aSAnders Persson mp = arg->soqa_exdata_mp; 21703e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 217141174437SAnders Persson ASSERT(mp != NULL); 217241174437SAnders Persson DB_TYPE(mp) = M_PCSIG; 217341174437SAnders Persson *mp->b_wptr++ = (uchar_t)SIGURG; 21743e95bd4aSAnders Persson *tailmpp = mp; 21753e95bd4aSAnders Persson tailmpp = &mp->b_next; 217641174437SAnders Persson 217741174437SAnders Persson /* Send up the mark indicator */ 21783e95bd4aSAnders Persson mp = arg->soqa_urgmark_mp; 21793e95bd4aSAnders Persson arg->soqa_urgmark_mp = NULL; 218041174437SAnders Persson mp->b_flag = atmark ? MSGMARKNEXT : MSGNOTMARKNEXT; 21813e95bd4aSAnders Persson *tailmpp = mp; 21823e95bd4aSAnders Persson tailmpp = &mp->b_next; 218341174437SAnders Persson 218441174437SAnders Persson so->so_oobmark = 0; 218541174437SAnders Persson } 218641174437SAnders Persson } 218741174437SAnders Persson ASSERT(so->so_oobmark == 0); 21880f1702c5SYu Xiangning ASSERT(so->so_rcv_queued == 0); 21893e95bd4aSAnders Persson 21903e95bd4aSAnders Persson return (retmp); 21910f1702c5SYu Xiangning } 21920f1702c5SYu Xiangning 219341174437SAnders Persson #ifdef DEBUG 219441174437SAnders Persson /* 219541174437SAnders Persson * Do an integrity check of the sonode. This should be done if a 219641174437SAnders Persson * fallback fails after sonode has initially been converted to use 219741174437SAnders Persson * TPI and subsequently have to be reverted. 219841174437SAnders Persson * 219941174437SAnders Persson * Failure to pass the integrity check will panic the system. 220041174437SAnders Persson */ 220141174437SAnders Persson void 220241174437SAnders Persson so_integrity_check(struct sonode *cur, struct sonode *orig) 220341174437SAnders Persson { 220441174437SAnders Persson VERIFY(cur->so_vnode == orig->so_vnode); 220541174437SAnders Persson VERIFY(cur->so_ops == orig->so_ops); 220641174437SAnders Persson /* 220741174437SAnders Persson * For so_state we can only VERIFY the state flags in CHECK_STATE. 220841174437SAnders Persson * The other state flags might be affected by a notification from the 220941174437SAnders Persson * protocol. 221041174437SAnders Persson */ 221141174437SAnders Persson #define CHECK_STATE (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_NDELAY|SS_NONBLOCK| \ 221241174437SAnders Persson SS_ASYNC|SS_ACCEPTCONN|SS_SAVEDEOR|SS_RCVATMARK|SS_OOBPEND| \ 221341174437SAnders Persson SS_HAVEOOBDATA|SS_HADOOBDATA|SS_SENTLASTREADSIG|SS_SENTLASTWRITESIG) 221441174437SAnders Persson VERIFY((cur->so_state & (orig->so_state & CHECK_STATE)) == 221541174437SAnders Persson (orig->so_state & CHECK_STATE)); 221641174437SAnders Persson VERIFY(cur->so_mode == orig->so_mode); 221741174437SAnders Persson VERIFY(cur->so_flag == orig->so_flag); 221841174437SAnders Persson VERIFY(cur->so_count == orig->so_count); 221941174437SAnders Persson /* Cannot VERIFY so_proto_connid; proto can update it */ 222041174437SAnders Persson VERIFY(cur->so_sockparams == orig->so_sockparams); 222141174437SAnders Persson /* an error might have been recorded, but it can not be lost */ 222241174437SAnders Persson VERIFY(cur->so_error != 0 || orig->so_error == 0); 222341174437SAnders Persson VERIFY(cur->so_family == orig->so_family); 222441174437SAnders Persson VERIFY(cur->so_type == orig->so_type); 222541174437SAnders Persson VERIFY(cur->so_protocol == orig->so_protocol); 222641174437SAnders Persson VERIFY(cur->so_version == orig->so_version); 222741174437SAnders Persson /* New conns might have arrived, but none should have been lost */ 222841174437SAnders Persson VERIFY(cur->so_acceptq_len >= orig->so_acceptq_len); 22293e95bd4aSAnders Persson VERIFY(list_head(&cur->so_acceptq_list) == 22303e95bd4aSAnders Persson list_head(&orig->so_acceptq_list)); 223141174437SAnders Persson VERIFY(cur->so_backlog == orig->so_backlog); 223241174437SAnders Persson /* New OOB migth have arrived, but mark should not have been lost */ 223341174437SAnders Persson VERIFY(cur->so_oobmark >= orig->so_oobmark); 223441174437SAnders Persson /* Cannot VERIFY so_oobmsg; the proto might have sent up a new one */ 223541174437SAnders Persson VERIFY(cur->so_pgrp == orig->so_pgrp); 223641174437SAnders Persson VERIFY(cur->so_peercred == orig->so_peercred); 223741174437SAnders Persson VERIFY(cur->so_cpid == orig->so_cpid); 223841174437SAnders Persson VERIFY(cur->so_zoneid == orig->so_zoneid); 223941174437SAnders Persson /* New data migth have arrived, but none should have been lost */ 224041174437SAnders Persson VERIFY(cur->so_rcv_queued >= orig->so_rcv_queued); 224141174437SAnders Persson VERIFY(cur->so_rcv_q_head == orig->so_rcv_q_head); 224241174437SAnders Persson VERIFY(cur->so_rcv_head == orig->so_rcv_head); 224341174437SAnders Persson VERIFY(cur->so_proto_handle == orig->so_proto_handle); 224441174437SAnders Persson VERIFY(cur->so_downcalls == orig->so_downcalls); 224541174437SAnders Persson /* Cannot VERIFY so_proto_props; they can be updated by proto */ 224641174437SAnders Persson } 224741174437SAnders Persson #endif 224841174437SAnders Persson 22490f1702c5SYu Xiangning /* 22500f1702c5SYu Xiangning * so_tpi_fallback() 22510f1702c5SYu Xiangning * 225241174437SAnders Persson * This is the fallback initation routine; things start here. 22530f1702c5SYu Xiangning * 22540f1702c5SYu Xiangning * Basic strategy: 22550f1702c5SYu Xiangning * o Block new socket operations from coming in 22560f1702c5SYu Xiangning * o Allocate/initate info needed by TPI 22570f1702c5SYu Xiangning * o Quiesce the connection, at which point we sync 22580f1702c5SYu Xiangning * state and move data 22590f1702c5SYu Xiangning * o Change operations (sonodeops) associated with the socket 22600f1702c5SYu Xiangning * o Unblock threads waiting for the fallback to finish 22610f1702c5SYu Xiangning */ 22620f1702c5SYu Xiangning int 22630f1702c5SYu Xiangning so_tpi_fallback(struct sonode *so, struct cred *cr) 22640f1702c5SYu Xiangning { 22650f1702c5SYu Xiangning int error; 22660f1702c5SYu Xiangning queue_t *q; 22670f1702c5SYu Xiangning struct sockparams *sp; 226841174437SAnders Persson struct sockparams *newsp = NULL; 22690f1702c5SYu Xiangning so_proto_fallback_func_t fbfunc; 22703e95bd4aSAnders Persson const char *devpath; 22710f1702c5SYu Xiangning boolean_t direct; 227241174437SAnders Persson struct sonode *nso; 22733e95bd4aSAnders Persson sock_quiesce_arg_t arg = { NULL, NULL }; 227441174437SAnders Persson #ifdef DEBUG 227541174437SAnders Persson struct sonode origso; 227641174437SAnders Persson #endif 22770f1702c5SYu Xiangning error = 0; 22780f1702c5SYu Xiangning sp = so->so_sockparams; 22790f1702c5SYu Xiangning fbfunc = sp->sp_smod_info->smod_proto_fallback_func; 22800f1702c5SYu Xiangning 22810f1702c5SYu Xiangning /* 22823e95bd4aSAnders Persson * Cannot fallback if the socket has active filters 22830f1702c5SYu Xiangning */ 22843e95bd4aSAnders Persson if (so->so_filter_active > 0) 22853e95bd4aSAnders Persson return (EINVAL); 22863e95bd4aSAnders Persson 22873e95bd4aSAnders Persson switch (so->so_family) { 22883e95bd4aSAnders Persson case AF_INET: 22893e95bd4aSAnders Persson devpath = sp->sp_smod_info->smod_fallback_devpath_v4; 22903e95bd4aSAnders Persson break; 22913e95bd4aSAnders Persson case AF_INET6: 22923e95bd4aSAnders Persson devpath = sp->sp_smod_info->smod_fallback_devpath_v6; 22933e95bd4aSAnders Persson break; 22943e95bd4aSAnders Persson default: 22953e95bd4aSAnders Persson return (EINVAL); 22963e95bd4aSAnders Persson } 22973e95bd4aSAnders Persson 22983e95bd4aSAnders Persson /* 22993e95bd4aSAnders Persson * Fallback can only happen if the socket module has a TPI device 23003e95bd4aSAnders Persson * and fallback function. 23013e95bd4aSAnders Persson */ 23023e95bd4aSAnders Persson if (devpath == NULL || fbfunc == NULL) 23030f1702c5SYu Xiangning return (EINVAL); 23040f1702c5SYu Xiangning 23050f1702c5SYu Xiangning /* 23060f1702c5SYu Xiangning * Initiate fallback; upon success we know that no new requests 23070f1702c5SYu Xiangning * will come in from the user. 23080f1702c5SYu Xiangning */ 23090f1702c5SYu Xiangning if (!so_start_fallback(so)) 23100f1702c5SYu Xiangning return (EAGAIN); 231141174437SAnders Persson #ifdef DEBUG 231241174437SAnders Persson /* 231341174437SAnders Persson * Make a copy of the sonode in case we need to make an integrity 231441174437SAnders Persson * check later on. 231541174437SAnders Persson */ 231641174437SAnders Persson bcopy(so, &origso, sizeof (*so)); 231741174437SAnders Persson #endif 23180f1702c5SYu Xiangning 23197d64f41bSAnders Persson sp->sp_stats.sps_nfallback.value.ui64++; 23207d64f41bSAnders Persson 23210f1702c5SYu Xiangning newsp = sockparams_hold_ephemeral_bydev(so->so_family, so->so_type, 23223e95bd4aSAnders Persson so->so_protocol, devpath, KM_SLEEP, &error); 23230f1702c5SYu Xiangning if (error != 0) 23240f1702c5SYu Xiangning goto out; 23250f1702c5SYu Xiangning 23260f1702c5SYu Xiangning if (so->so_direct != NULL) { 23270f1702c5SYu Xiangning sodirect_t *sodp = so->so_direct; 2328bbc000e5SAnders Persson mutex_enter(&so->so_lock); 23290f1702c5SYu Xiangning 2330bbc000e5SAnders Persson so->so_direct->sod_enabled = B_FALSE; 23310f1702c5SYu Xiangning so->so_state &= ~SS_SODIRECT; 23320f1702c5SYu Xiangning ASSERT(sodp->sod_uioafh == NULL); 2333bbc000e5SAnders Persson mutex_exit(&so->so_lock); 23340f1702c5SYu Xiangning } 23350f1702c5SYu Xiangning 23360f1702c5SYu Xiangning /* Turn sonode into a TPI socket */ 233741174437SAnders Persson error = sotpi_convert_sonode(so, newsp, &direct, &q, cr); 233841174437SAnders Persson if (error != 0) 23390f1702c5SYu Xiangning goto out; 23403e95bd4aSAnders Persson /* 23413e95bd4aSAnders Persson * When it comes to urgent data we have two cases to deal with; 23423e95bd4aSAnders Persson * (1) The oob byte has already arrived, or (2) the protocol has 23433e95bd4aSAnders Persson * notified that oob data is pending, but it has not yet arrived. 23443e95bd4aSAnders Persson * 23453e95bd4aSAnders Persson * For (1) all we need to do is send a T_EXDATA_IND to indicate were 23463e95bd4aSAnders Persson * in the byte stream the oob byte is. For (2) we have to send a 23473e95bd4aSAnders Persson * SIGURG (M_PCSIG), followed by a zero-length mblk indicating whether 23483e95bd4aSAnders Persson * the oob byte will be the next byte from the protocol. 23493e95bd4aSAnders Persson * 23503e95bd4aSAnders Persson * So in the worst case we need two mblks, one for the signal, another 23513e95bd4aSAnders Persson * for mark indication. In that case we use the exdata_mp for the sig. 23523e95bd4aSAnders Persson */ 23533e95bd4aSAnders Persson arg.soqa_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind), 23543e95bd4aSAnders Persson BPRI_MED, STR_NOSIG, NULL); 23553e95bd4aSAnders Persson arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); 23560f1702c5SYu Xiangning 23570f1702c5SYu Xiangning /* 23580f1702c5SYu Xiangning * Now tell the protocol to start using TPI. so_quiesced_cb be 23590f1702c5SYu Xiangning * called once it's safe to synchronize state. 23600f1702c5SYu Xiangning */ 23610f1702c5SYu Xiangning DTRACE_PROBE1(proto__fallback__begin, struct sonode *, so); 23623e95bd4aSAnders Persson error = (*fbfunc)(so->so_proto_handle, q, direct, so_quiesced_cb, 23633e95bd4aSAnders Persson &arg); 23640f1702c5SYu Xiangning DTRACE_PROBE1(proto__fallback__end, struct sonode *, so); 23650f1702c5SYu Xiangning 236641174437SAnders Persson if (error != 0) { 236741174437SAnders Persson /* protocol was unable to do a fallback, revert the sonode */ 236841174437SAnders Persson sotpi_revert_sonode(so, cr); 236941174437SAnders Persson goto out; 237041174437SAnders Persson } 237141174437SAnders Persson 23720f1702c5SYu Xiangning /* 237341174437SAnders Persson * Walk the accept queue and notify the proto that they should 237441174437SAnders Persson * fall back to TPI. The protocol will send up the T_CONN_IND. 237541174437SAnders Persson */ 23763e95bd4aSAnders Persson nso = list_head(&so->so_acceptq_list); 237741174437SAnders Persson while (nso != NULL) { 237841174437SAnders Persson int rval; 23793e95bd4aSAnders Persson struct sonode *next; 23803e95bd4aSAnders Persson 23813e95bd4aSAnders Persson if (arg.soqa_exdata_mp == NULL) { 23823e95bd4aSAnders Persson arg.soqa_exdata_mp = 23833e95bd4aSAnders Persson allocb_wait(sizeof (struct T_exdata_ind), 23843e95bd4aSAnders Persson BPRI_MED, STR_NOSIG, NULL); 23853e95bd4aSAnders Persson } 23863e95bd4aSAnders Persson if (arg.soqa_urgmark_mp == NULL) { 23873e95bd4aSAnders Persson arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED, 23883e95bd4aSAnders Persson STR_NOSIG, NULL); 23893e95bd4aSAnders Persson } 239041174437SAnders Persson 239141174437SAnders Persson DTRACE_PROBE1(proto__fallback__begin, struct sonode *, nso); 23923e95bd4aSAnders Persson rval = (*fbfunc)(nso->so_proto_handle, NULL, direct, 23933e95bd4aSAnders Persson so_quiesced_cb, &arg); 239441174437SAnders Persson DTRACE_PROBE1(proto__fallback__end, struct sonode *, nso); 239541174437SAnders Persson if (rval != 0) { 23963e95bd4aSAnders Persson /* Abort the connection */ 239741174437SAnders Persson zcmn_err(getzoneid(), CE_WARN, 239841174437SAnders Persson "Failed to convert socket in accept queue to TPI. " 239941174437SAnders Persson "Pid = %d\n", curproc->p_pid); 24003e95bd4aSAnders Persson next = list_next(&so->so_acceptq_list, nso); 24013e95bd4aSAnders Persson list_remove(&so->so_acceptq_list, nso); 24023e95bd4aSAnders Persson so->so_acceptq_len--; 24033e95bd4aSAnders Persson 24043e95bd4aSAnders Persson (void) socket_close(nso, 0, CRED()); 24053e95bd4aSAnders Persson socket_destroy(nso); 24063e95bd4aSAnders Persson nso = next; 24073e95bd4aSAnders Persson } else { 24083e95bd4aSAnders Persson nso = list_next(&so->so_acceptq_list, nso); 240941174437SAnders Persson } 241041174437SAnders Persson } 241141174437SAnders Persson 241241174437SAnders Persson /* 241341174437SAnders Persson * Now flush the acceptq, this will destroy all sockets. They will 241441174437SAnders Persson * be recreated in sotpi_accept(). 24150f1702c5SYu Xiangning */ 24162320a8c1SAnders Persson so_acceptq_flush(so, B_FALSE); 24170f1702c5SYu Xiangning 24180f1702c5SYu Xiangning mutex_enter(&so->so_lock); 24190f1702c5SYu Xiangning so->so_state |= SS_FALLBACK_COMP; 24200f1702c5SYu Xiangning mutex_exit(&so->so_lock); 24210f1702c5SYu Xiangning 24220f1702c5SYu Xiangning /* 24230f1702c5SYu Xiangning * Swap the sonode ops. Socket opertations that come in once this 24240f1702c5SYu Xiangning * is done will proceed without blocking. 24250f1702c5SYu Xiangning */ 24260f1702c5SYu Xiangning so->so_ops = &sotpi_sonodeops; 24270f1702c5SYu Xiangning 24280f1702c5SYu Xiangning /* 24290f1702c5SYu Xiangning * Wake up any threads stuck in poll. This is needed since the poll 24300f1702c5SYu Xiangning * head changes when the fallback happens (moves from the sonode to 24310f1702c5SYu Xiangning * the STREAMS head). 24320f1702c5SYu Xiangning */ 24330f1702c5SYu Xiangning pollwakeup(&so->so_poll_list, POLLERR); 24343e95bd4aSAnders Persson 24353e95bd4aSAnders Persson /* 24363e95bd4aSAnders Persson * When this non-STREAM socket was created we placed an extra ref on 24373e95bd4aSAnders Persson * the associated vnode to support asynchronous close. Drop that ref 24383e95bd4aSAnders Persson * here. 24393e95bd4aSAnders Persson */ 24403e95bd4aSAnders Persson ASSERT(SOTOV(so)->v_count >= 2); 24413e95bd4aSAnders Persson VN_RELE(SOTOV(so)); 24420f1702c5SYu Xiangning out: 24430f1702c5SYu Xiangning so_end_fallback(so); 24440f1702c5SYu Xiangning 244541174437SAnders Persson if (error != 0) { 244641174437SAnders Persson #ifdef DEBUG 244741174437SAnders Persson so_integrity_check(so, &origso); 244841174437SAnders Persson #endif 244941174437SAnders Persson zcmn_err(getzoneid(), CE_WARN, 245041174437SAnders Persson "Failed to convert socket to TPI (err=%d). Pid = %d\n", 245141174437SAnders Persson error, curproc->p_pid); 245241174437SAnders Persson if (newsp != NULL) 245341174437SAnders Persson SOCKPARAMS_DEC_REF(newsp); 245441174437SAnders Persson } 24553e95bd4aSAnders Persson if (arg.soqa_exdata_mp != NULL) 24563e95bd4aSAnders Persson freemsg(arg.soqa_exdata_mp); 24573e95bd4aSAnders Persson if (arg.soqa_urgmark_mp != NULL) 24583e95bd4aSAnders Persson freemsg(arg.soqa_urgmark_mp); 245941174437SAnders Persson 24600f1702c5SYu Xiangning return (error); 24610f1702c5SYu Xiangning } 2462