10f1702c5SYu Xiangning /* 20f1702c5SYu Xiangning * CDDL HEADER START 30f1702c5SYu Xiangning * 40f1702c5SYu Xiangning * The contents of this file are subject to the terms of the 50f1702c5SYu Xiangning * Common Development and Distribution License (the "License"). 60f1702c5SYu Xiangning * You may not use this file except in compliance with the License. 70f1702c5SYu Xiangning * 80f1702c5SYu Xiangning * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90f1702c5SYu Xiangning * or http://www.opensolaris.org/os/licensing. 100f1702c5SYu Xiangning * See the License for the specific language governing permissions 110f1702c5SYu Xiangning * and limitations under the License. 120f1702c5SYu Xiangning * 130f1702c5SYu Xiangning * When distributing Covered Code, include this CDDL HEADER in each 140f1702c5SYu Xiangning * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150f1702c5SYu Xiangning * If applicable, add the following below this CDDL HEADER, with the 160f1702c5SYu Xiangning * fields enclosed by brackets "[]" replaced with your own identifying 170f1702c5SYu Xiangning * information: Portions Copyright [yyyy] [name of copyright owner] 180f1702c5SYu Xiangning * 190f1702c5SYu Xiangning * CDDL HEADER END 200f1702c5SYu Xiangning */ 210f1702c5SYu Xiangning 220f1702c5SYu Xiangning /* 233e95bd4aSAnders Persson * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 240f1702c5SYu Xiangning */ 25d690b62cSDan McDonald /* 26d690b62cSDan McDonald * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. 27d690b62cSDan McDonald */ 280f1702c5SYu Xiangning 290f1702c5SYu Xiangning #include <sys/types.h> 300f1702c5SYu Xiangning #include <sys/param.h> 310f1702c5SYu Xiangning #include <sys/signal.h> 320f1702c5SYu Xiangning #include <sys/cmn_err.h> 330f1702c5SYu Xiangning 340f1702c5SYu Xiangning #include <sys/stropts.h> 350f1702c5SYu Xiangning #include <sys/socket.h> 360f1702c5SYu Xiangning #include <sys/socketvar.h> 370f1702c5SYu Xiangning #include <sys/sockio.h> 380f1702c5SYu Xiangning #include <sys/strsubr.h> 390f1702c5SYu Xiangning #include <sys/strsun.h> 400f1702c5SYu Xiangning #include <sys/atomic.h> 4141174437SAnders Persson #include <sys/tihdr.h> 420f1702c5SYu Xiangning 430f1702c5SYu Xiangning #include <fs/sockfs/sockcommon.h> 443e95bd4aSAnders Persson #include <fs/sockfs/sockfilter_impl.h> 450f1702c5SYu Xiangning #include <fs/sockfs/socktpi.h> 46bbc000e5SAnders Persson #include <fs/sockfs/sodirect.h> 470f1702c5SYu Xiangning #include <sys/ddi.h> 480f1702c5SYu Xiangning #include <inet/ip.h> 490f1702c5SYu Xiangning #include <sys/time.h> 500f1702c5SYu Xiangning #include <sys/cmn_err.h> 510f1702c5SYu Xiangning 520f1702c5SYu Xiangning #ifdef SOCK_TEST 530f1702c5SYu Xiangning extern int do_useracc; 540f1702c5SYu Xiangning extern clock_t sock_test_timelimit; 550f1702c5SYu Xiangning #endif /* SOCK_TEST */ 560f1702c5SYu Xiangning 570f1702c5SYu Xiangning #define MBLK_PULL_LEN 64 580f1702c5SYu Xiangning uint32_t so_mblk_pull_len = MBLK_PULL_LEN; 590f1702c5SYu Xiangning 600f1702c5SYu Xiangning #ifdef DEBUG 610f1702c5SYu Xiangning boolean_t so_debug_length = B_FALSE; 620f1702c5SYu Xiangning static boolean_t so_check_length(sonode_t *so); 630f1702c5SYu Xiangning #endif 640f1702c5SYu Xiangning 650f1702c5SYu Xiangning static int 660f1702c5SYu Xiangning so_acceptq_dequeue_locked(struct sonode *so, boolean_t dontblock, 670f1702c5SYu Xiangning struct sonode **nsop) 680f1702c5SYu Xiangning { 690f1702c5SYu Xiangning struct sonode *nso = NULL; 700f1702c5SYu Xiangning 710f1702c5SYu Xiangning *nsop = NULL; 720f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_acceptq_lock)); 733e95bd4aSAnders Persson while ((nso = list_remove_head(&so->so_acceptq_list)) == NULL) { 740f1702c5SYu Xiangning /* 750f1702c5SYu Xiangning * No need to check so_error here, because it is not 760f1702c5SYu Xiangning * possible for a listening socket to be reset or otherwise 770f1702c5SYu Xiangning * disconnected. 780f1702c5SYu Xiangning * 790f1702c5SYu Xiangning * So now we just need check if it's ok to wait. 800f1702c5SYu Xiangning */ 810f1702c5SYu Xiangning if (dontblock) 820f1702c5SYu Xiangning return (EWOULDBLOCK); 830f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 840f1702c5SYu Xiangning return (EINTR); 850f1702c5SYu Xiangning 860f1702c5SYu Xiangning if (cv_wait_sig_swap(&so->so_acceptq_cv, 870f1702c5SYu Xiangning &so->so_acceptq_lock) == 0) 880f1702c5SYu Xiangning return (EINTR); 890f1702c5SYu Xiangning } 900f1702c5SYu Xiangning 910f1702c5SYu Xiangning ASSERT(nso != NULL); 920f1702c5SYu Xiangning ASSERT(so->so_acceptq_len > 0); 933e95bd4aSAnders Persson so->so_acceptq_len--; 943e95bd4aSAnders Persson nso->so_listener = NULL; 950f1702c5SYu Xiangning 960f1702c5SYu Xiangning *nsop = nso; 970f1702c5SYu Xiangning 980f1702c5SYu Xiangning return (0); 990f1702c5SYu Xiangning } 1000f1702c5SYu Xiangning 1010f1702c5SYu Xiangning /* 1020f1702c5SYu Xiangning * int so_acceptq_dequeue(struct sonode *, boolean_t, struct sonode **) 1030f1702c5SYu Xiangning * 1040f1702c5SYu Xiangning * Pulls a connection off of the accept queue. 1050f1702c5SYu Xiangning * 1060f1702c5SYu Xiangning * Arguments: 1070f1702c5SYu Xiangning * so - listening socket 1080f1702c5SYu Xiangning * dontblock - indicate whether it's ok to sleep if there are no 1090f1702c5SYu Xiangning * connections on the queue 1100f1702c5SYu Xiangning * nsop - Value-return argument 1110f1702c5SYu Xiangning * 1120f1702c5SYu Xiangning * Return values: 1130f1702c5SYu Xiangning * 0 when a connection is successfully dequeued, in which case nsop 1140f1702c5SYu Xiangning * is set to point to the new connection. Upon failure a non-zero 1150f1702c5SYu Xiangning * value is returned, and the value of nsop is set to NULL. 1160f1702c5SYu Xiangning * 1170f1702c5SYu Xiangning * Note: 1180f1702c5SYu Xiangning * so_acceptq_dequeue() may return prematurly if the socket is falling 1190f1702c5SYu Xiangning * back to TPI. 1200f1702c5SYu Xiangning */ 1210f1702c5SYu Xiangning int 1220f1702c5SYu Xiangning so_acceptq_dequeue(struct sonode *so, boolean_t dontblock, 1230f1702c5SYu Xiangning struct sonode **nsop) 1240f1702c5SYu Xiangning { 1250f1702c5SYu Xiangning int error; 1260f1702c5SYu Xiangning 1270f1702c5SYu Xiangning mutex_enter(&so->so_acceptq_lock); 1280f1702c5SYu Xiangning error = so_acceptq_dequeue_locked(so, dontblock, nsop); 1290f1702c5SYu Xiangning mutex_exit(&so->so_acceptq_lock); 1300f1702c5SYu Xiangning 1310f1702c5SYu Xiangning return (error); 1320f1702c5SYu Xiangning } 1330f1702c5SYu Xiangning 1343e95bd4aSAnders Persson static void 1353e95bd4aSAnders Persson so_acceptq_flush_impl(struct sonode *so, list_t *list, boolean_t doclose) 1363e95bd4aSAnders Persson { 1373e95bd4aSAnders Persson struct sonode *nso; 1383e95bd4aSAnders Persson 1393e95bd4aSAnders Persson while ((nso = list_remove_head(list)) != NULL) { 1403e95bd4aSAnders Persson nso->so_listener = NULL; 1413e95bd4aSAnders Persson if (doclose) { 1423e95bd4aSAnders Persson (void) socket_close(nso, 0, CRED()); 1433e95bd4aSAnders Persson } else { 1440f1702c5SYu Xiangning /* 1453e95bd4aSAnders Persson * Only used for fallback - not possible when filters 1463e95bd4aSAnders Persson * are present. 1473e95bd4aSAnders Persson */ 1483e95bd4aSAnders Persson ASSERT(so->so_filter_active == 0); 1493e95bd4aSAnders Persson /* 1503e95bd4aSAnders Persson * Since the socket is on the accept queue, there can 1513e95bd4aSAnders Persson * only be one reference. We drop the reference and 1523e95bd4aSAnders Persson * just blow off the socket. 1533e95bd4aSAnders Persson */ 1543e95bd4aSAnders Persson ASSERT(nso->so_count == 1); 1553e95bd4aSAnders Persson nso->so_count--; 1563e95bd4aSAnders Persson /* drop the proto ref */ 1573e95bd4aSAnders Persson VN_RELE(SOTOV(nso)); 1583e95bd4aSAnders Persson } 1593e95bd4aSAnders Persson socket_destroy(nso); 1603e95bd4aSAnders Persson } 1613e95bd4aSAnders Persson } 1623e95bd4aSAnders Persson /* 1633e95bd4aSAnders Persson * void so_acceptq_flush(struct sonode *so) 1640f1702c5SYu Xiangning * 1650f1702c5SYu Xiangning * Removes all pending connections from a listening socket, and 1660f1702c5SYu Xiangning * frees the associated resources. 1670f1702c5SYu Xiangning * 1680f1702c5SYu Xiangning * Arguments 1690f1702c5SYu Xiangning * so - listening socket 1702320a8c1SAnders Persson * doclose - make a close downcall for each socket on the accept queue 1710f1702c5SYu Xiangning * 1720f1702c5SYu Xiangning * Return values: 1730f1702c5SYu Xiangning * None. 1740f1702c5SYu Xiangning * 1750f1702c5SYu Xiangning * Note: 1760f1702c5SYu Xiangning * The caller has to ensure that no calls to so_acceptq_enqueue() or 1770f1702c5SYu Xiangning * so_acceptq_dequeue() occur while the accept queue is being flushed. 1780f1702c5SYu Xiangning * So either the socket needs to be in a state where no operations 1790f1702c5SYu Xiangning * would come in, or so_lock needs to be obtained. 1800f1702c5SYu Xiangning */ 1810f1702c5SYu Xiangning void 1822320a8c1SAnders Persson so_acceptq_flush(struct sonode *so, boolean_t doclose) 1830f1702c5SYu Xiangning { 1843e95bd4aSAnders Persson so_acceptq_flush_impl(so, &so->so_acceptq_list, doclose); 1853e95bd4aSAnders Persson so_acceptq_flush_impl(so, &so->so_acceptq_defer, doclose); 1860f1702c5SYu Xiangning 1870f1702c5SYu Xiangning so->so_acceptq_len = 0; 1880f1702c5SYu Xiangning } 1890f1702c5SYu Xiangning 1900f1702c5SYu Xiangning int 1910f1702c5SYu Xiangning so_wait_connected_locked(struct sonode *so, boolean_t nonblock, 1920f1702c5SYu Xiangning sock_connid_t id) 1930f1702c5SYu Xiangning { 1940f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 1950f1702c5SYu Xiangning 1960f1702c5SYu Xiangning /* 1970f1702c5SYu Xiangning * The protocol has notified us that a connection attempt is being 1980f1702c5SYu Xiangning * made, so before we wait for a notification to arrive we must 1990f1702c5SYu Xiangning * clear out any errors associated with earlier connection attempts. 2000f1702c5SYu Xiangning */ 2010f1702c5SYu Xiangning if (so->so_error != 0 && SOCK_CONNID_LT(so->so_proto_connid, id)) 2020f1702c5SYu Xiangning so->so_error = 0; 2030f1702c5SYu Xiangning 2040f1702c5SYu Xiangning while (SOCK_CONNID_LT(so->so_proto_connid, id)) { 2050f1702c5SYu Xiangning if (nonblock) 2060f1702c5SYu Xiangning return (EINPROGRESS); 2070f1702c5SYu Xiangning 2080f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 2090f1702c5SYu Xiangning return (EINTR); 2100f1702c5SYu Xiangning 2110f1702c5SYu Xiangning if (cv_wait_sig_swap(&so->so_state_cv, &so->so_lock) == 0) 2120f1702c5SYu Xiangning return (EINTR); 2130f1702c5SYu Xiangning } 2140f1702c5SYu Xiangning 2150f1702c5SYu Xiangning if (so->so_error != 0) 2160f1702c5SYu Xiangning return (sogeterr(so, B_TRUE)); 2170f1702c5SYu Xiangning /* 2180f1702c5SYu Xiangning * Under normal circumstances, so_error should contain an error 2190f1702c5SYu Xiangning * in case the connect failed. However, it is possible for another 2200f1702c5SYu Xiangning * thread to come in a consume the error, so generate a sensible 2210f1702c5SYu Xiangning * error in that case. 2220f1702c5SYu Xiangning */ 2230f1702c5SYu Xiangning if ((so->so_state & SS_ISCONNECTED) == 0) 2240f1702c5SYu Xiangning return (ECONNREFUSED); 2250f1702c5SYu Xiangning 2260f1702c5SYu Xiangning return (0); 2270f1702c5SYu Xiangning } 2280f1702c5SYu Xiangning 2290f1702c5SYu Xiangning /* 2300f1702c5SYu Xiangning * int so_wait_connected(struct sonode *so, boolean_t nonblock, 2310f1702c5SYu Xiangning * sock_connid_t id) 2320f1702c5SYu Xiangning * 2330f1702c5SYu Xiangning * Wait until the socket is connected or an error has occured. 2340f1702c5SYu Xiangning * 2350f1702c5SYu Xiangning * Arguments: 2360f1702c5SYu Xiangning * so - socket 2370f1702c5SYu Xiangning * nonblock - indicate whether it's ok to sleep if the connection has 2380f1702c5SYu Xiangning * not yet been established 2390f1702c5SYu Xiangning * gen - generation number that was returned by the protocol 2400f1702c5SYu Xiangning * when the operation was started 2410f1702c5SYu Xiangning * 2420f1702c5SYu Xiangning * Returns: 2430f1702c5SYu Xiangning * 0 if the connection attempt was successful, or an error indicating why 2440f1702c5SYu Xiangning * the connection attempt failed. 2450f1702c5SYu Xiangning */ 2460f1702c5SYu Xiangning int 2470f1702c5SYu Xiangning so_wait_connected(struct sonode *so, boolean_t nonblock, sock_connid_t id) 2480f1702c5SYu Xiangning { 2490f1702c5SYu Xiangning int error; 2500f1702c5SYu Xiangning 2510f1702c5SYu Xiangning mutex_enter(&so->so_lock); 2520f1702c5SYu Xiangning error = so_wait_connected_locked(so, nonblock, id); 2530f1702c5SYu Xiangning mutex_exit(&so->so_lock); 2540f1702c5SYu Xiangning 2550f1702c5SYu Xiangning return (error); 2560f1702c5SYu Xiangning } 2570f1702c5SYu Xiangning 2580f1702c5SYu Xiangning int 2590f1702c5SYu Xiangning so_snd_wait_qnotfull_locked(struct sonode *so, boolean_t dontblock) 2600f1702c5SYu Xiangning { 2610f1702c5SYu Xiangning int error; 2620f1702c5SYu Xiangning 2630f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 2643e95bd4aSAnders Persson while (SO_SND_FLOWCTRLD(so)) { 2650f1702c5SYu Xiangning if (so->so_state & SS_CANTSENDMORE) 2660f1702c5SYu Xiangning return (EPIPE); 2670f1702c5SYu Xiangning if (dontblock) 2680f1702c5SYu Xiangning return (EWOULDBLOCK); 2690f1702c5SYu Xiangning 2700f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 2710f1702c5SYu Xiangning return (EINTR); 2720f1702c5SYu Xiangning 2730f1702c5SYu Xiangning if (so->so_sndtimeo == 0) { 2740f1702c5SYu Xiangning /* 2750f1702c5SYu Xiangning * Zero means disable timeout. 2760f1702c5SYu Xiangning */ 2770f1702c5SYu Xiangning error = cv_wait_sig(&so->so_snd_cv, &so->so_lock); 2780f1702c5SYu Xiangning } else { 279d3d50737SRafael Vanoni error = cv_reltimedwait_sig(&so->so_snd_cv, 280d3d50737SRafael Vanoni &so->so_lock, so->so_sndtimeo, TR_CLOCK_TICK); 2810f1702c5SYu Xiangning } 2820f1702c5SYu Xiangning if (error == 0) 2830f1702c5SYu Xiangning return (EINTR); 2840f1702c5SYu Xiangning else if (error == -1) 28534dfe683Sshenjian return (EAGAIN); 2860f1702c5SYu Xiangning } 2870f1702c5SYu Xiangning return (0); 2880f1702c5SYu Xiangning } 2890f1702c5SYu Xiangning 2900f1702c5SYu Xiangning /* 2910f1702c5SYu Xiangning * int so_wait_sendbuf(struct sonode *so, boolean_t dontblock) 2920f1702c5SYu Xiangning * 2930f1702c5SYu Xiangning * Wait for the transport to notify us about send buffers becoming 2940f1702c5SYu Xiangning * available. 2950f1702c5SYu Xiangning */ 2960f1702c5SYu Xiangning int 2970f1702c5SYu Xiangning so_snd_wait_qnotfull(struct sonode *so, boolean_t dontblock) 2980f1702c5SYu Xiangning { 2990f1702c5SYu Xiangning int error = 0; 3000f1702c5SYu Xiangning 3010f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3020f1702c5SYu Xiangning so->so_snd_wakeup = B_TRUE; 3030f1702c5SYu Xiangning error = so_snd_wait_qnotfull_locked(so, dontblock); 3040f1702c5SYu Xiangning so->so_snd_wakeup = B_FALSE; 3050f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3060f1702c5SYu Xiangning 3070f1702c5SYu Xiangning return (error); 3080f1702c5SYu Xiangning } 3090f1702c5SYu Xiangning 3100f1702c5SYu Xiangning void 3110f1702c5SYu Xiangning so_snd_qfull(struct sonode *so) 3120f1702c5SYu Xiangning { 3130f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3140f1702c5SYu Xiangning so->so_snd_qfull = B_TRUE; 3150f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3160f1702c5SYu Xiangning } 3170f1702c5SYu Xiangning 3180f1702c5SYu Xiangning void 3190f1702c5SYu Xiangning so_snd_qnotfull(struct sonode *so) 3200f1702c5SYu Xiangning { 3210f1702c5SYu Xiangning mutex_enter(&so->so_lock); 3220f1702c5SYu Xiangning so->so_snd_qfull = B_FALSE; 3230f1702c5SYu Xiangning /* wake up everyone waiting for buffers */ 3240f1702c5SYu Xiangning cv_broadcast(&so->so_snd_cv); 3250f1702c5SYu Xiangning mutex_exit(&so->so_lock); 3260f1702c5SYu Xiangning } 3270f1702c5SYu Xiangning 3280f1702c5SYu Xiangning /* 3290f1702c5SYu Xiangning * Change the process/process group to which SIGIO is sent. 3300f1702c5SYu Xiangning */ 3310f1702c5SYu Xiangning int 3320f1702c5SYu Xiangning socket_chgpgrp(struct sonode *so, pid_t pid) 3330f1702c5SYu Xiangning { 3340f1702c5SYu Xiangning int error; 3350f1702c5SYu Xiangning 3360f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 3370f1702c5SYu Xiangning if (pid != 0) { 3380f1702c5SYu Xiangning /* 3390f1702c5SYu Xiangning * Permissions check by sending signal 0. 3400f1702c5SYu Xiangning * Note that when kill fails it does a 3410f1702c5SYu Xiangning * set_errno causing the system call to fail. 3420f1702c5SYu Xiangning */ 3430f1702c5SYu Xiangning error = kill(pid, 0); 3440f1702c5SYu Xiangning if (error != 0) { 3450f1702c5SYu Xiangning return (error); 3460f1702c5SYu Xiangning } 3470f1702c5SYu Xiangning } 3480f1702c5SYu Xiangning so->so_pgrp = pid; 3490f1702c5SYu Xiangning return (0); 3500f1702c5SYu Xiangning } 3510f1702c5SYu Xiangning 3520f1702c5SYu Xiangning 3530f1702c5SYu Xiangning /* 3540f1702c5SYu Xiangning * Generate a SIGIO, for 'writable' events include siginfo structure, 3550f1702c5SYu Xiangning * for read events just send the signal. 3560f1702c5SYu Xiangning */ 3570f1702c5SYu Xiangning /*ARGSUSED*/ 3580f1702c5SYu Xiangning static void 3590f1702c5SYu Xiangning socket_sigproc(proc_t *proc, int event) 3600f1702c5SYu Xiangning { 3610f1702c5SYu Xiangning k_siginfo_t info; 3620f1702c5SYu Xiangning 3630f1702c5SYu Xiangning ASSERT(event & (SOCKETSIG_WRITE | SOCKETSIG_READ | SOCKETSIG_URG)); 3640f1702c5SYu Xiangning 3650f1702c5SYu Xiangning if (event & SOCKETSIG_WRITE) { 3660f1702c5SYu Xiangning info.si_signo = SIGPOLL; 3670f1702c5SYu Xiangning info.si_code = POLL_OUT; 3680f1702c5SYu Xiangning info.si_errno = 0; 3690f1702c5SYu Xiangning info.si_fd = 0; 3700f1702c5SYu Xiangning info.si_band = 0; 3710f1702c5SYu Xiangning sigaddq(proc, NULL, &info, KM_NOSLEEP); 3720f1702c5SYu Xiangning } 3730f1702c5SYu Xiangning if (event & SOCKETSIG_READ) { 3740f1702c5SYu Xiangning sigtoproc(proc, NULL, SIGPOLL); 3750f1702c5SYu Xiangning } 3760f1702c5SYu Xiangning if (event & SOCKETSIG_URG) { 3770f1702c5SYu Xiangning sigtoproc(proc, NULL, SIGURG); 3780f1702c5SYu Xiangning } 3790f1702c5SYu Xiangning } 3800f1702c5SYu Xiangning 3810f1702c5SYu Xiangning void 3820f1702c5SYu Xiangning socket_sendsig(struct sonode *so, int event) 3830f1702c5SYu Xiangning { 3840f1702c5SYu Xiangning proc_t *proc; 3850f1702c5SYu Xiangning 3860f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 3870f1702c5SYu Xiangning 3880f1702c5SYu Xiangning if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) && 3890f1702c5SYu Xiangning event != SOCKETSIG_URG)) { 3900f1702c5SYu Xiangning return; 3910f1702c5SYu Xiangning } 3920f1702c5SYu Xiangning 3930f1702c5SYu Xiangning dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp)); 3940f1702c5SYu Xiangning 3950f1702c5SYu Xiangning if (so->so_pgrp > 0) { 3960f1702c5SYu Xiangning /* 3970f1702c5SYu Xiangning * XXX This unfortunately still generates 3980f1702c5SYu Xiangning * a signal when a fd is closed but 3990f1702c5SYu Xiangning * the proc is active. 4000f1702c5SYu Xiangning */ 4010f1702c5SYu Xiangning mutex_enter(&pidlock); 402d690b62cSDan McDonald /* 403d690b62cSDan McDonald * Even if the thread started in another zone, we're receiving 404d690b62cSDan McDonald * on behalf of this socket's zone, so find the proc using the 405d690b62cSDan McDonald * socket's zone ID. 406d690b62cSDan McDonald */ 407d690b62cSDan McDonald proc = prfind_zone(so->so_pgrp, so->so_zoneid); 4080f1702c5SYu Xiangning if (proc == NULL) { 4090f1702c5SYu Xiangning mutex_exit(&pidlock); 4100f1702c5SYu Xiangning return; 4110f1702c5SYu Xiangning } 4120f1702c5SYu Xiangning mutex_enter(&proc->p_lock); 4130f1702c5SYu Xiangning mutex_exit(&pidlock); 4140f1702c5SYu Xiangning socket_sigproc(proc, event); 4150f1702c5SYu Xiangning mutex_exit(&proc->p_lock); 4160f1702c5SYu Xiangning } else { 4170f1702c5SYu Xiangning /* 4180f1702c5SYu Xiangning * Send to process group. Hold pidlock across 4190f1702c5SYu Xiangning * calls to socket_sigproc(). 4200f1702c5SYu Xiangning */ 4210f1702c5SYu Xiangning pid_t pgrp = -so->so_pgrp; 4220f1702c5SYu Xiangning 4230f1702c5SYu Xiangning mutex_enter(&pidlock); 424d690b62cSDan McDonald /* 425d690b62cSDan McDonald * Even if the thread started in another zone, we're receiving 426d690b62cSDan McDonald * on behalf of this socket's zone, so find the pgrp using the 427d690b62cSDan McDonald * socket's zone ID. 428d690b62cSDan McDonald */ 429d690b62cSDan McDonald proc = pgfind_zone(pgrp, so->so_zoneid); 4300f1702c5SYu Xiangning while (proc != NULL) { 4310f1702c5SYu Xiangning mutex_enter(&proc->p_lock); 4320f1702c5SYu Xiangning socket_sigproc(proc, event); 4330f1702c5SYu Xiangning mutex_exit(&proc->p_lock); 4340f1702c5SYu Xiangning proc = proc->p_pglink; 4350f1702c5SYu Xiangning } 4360f1702c5SYu Xiangning mutex_exit(&pidlock); 4370f1702c5SYu Xiangning } 4380f1702c5SYu Xiangning } 4390f1702c5SYu Xiangning 4400f1702c5SYu Xiangning #define MIN(a, b) ((a) < (b) ? (a) : (b)) 4410f1702c5SYu Xiangning /* Copy userdata into a new mblk_t */ 4420f1702c5SYu Xiangning mblk_t * 4430f1702c5SYu Xiangning socopyinuio(uio_t *uiop, ssize_t iosize, size_t wroff, ssize_t maxblk, 444bd670b35SErik Nordmark size_t tail_len, int *errorp) 4450f1702c5SYu Xiangning { 4460f1702c5SYu Xiangning mblk_t *head = NULL, **tail = &head; 4470f1702c5SYu Xiangning 4480f1702c5SYu Xiangning ASSERT(iosize == INFPSZ || iosize > 0); 4490f1702c5SYu Xiangning 4500f1702c5SYu Xiangning if (iosize == INFPSZ || iosize > uiop->uio_resid) 4510f1702c5SYu Xiangning iosize = uiop->uio_resid; 4520f1702c5SYu Xiangning 4530f1702c5SYu Xiangning if (maxblk == INFPSZ) 4540f1702c5SYu Xiangning maxblk = iosize; 4550f1702c5SYu Xiangning 4560f1702c5SYu Xiangning /* Nothing to do in these cases, so we're done */ 4570f1702c5SYu Xiangning if (iosize < 0 || maxblk < 0 || (maxblk == 0 && iosize > 0)) 4580f1702c5SYu Xiangning goto done; 4590f1702c5SYu Xiangning 4600f1702c5SYu Xiangning /* 4610f1702c5SYu Xiangning * We will enter the loop below if iosize is 0; it will allocate an 4620f1702c5SYu Xiangning * empty message block and call uiomove(9F) which will just return. 4630f1702c5SYu Xiangning * We could avoid that with an extra check but would only slow 4640f1702c5SYu Xiangning * down the much more likely case where iosize is larger than 0. 4650f1702c5SYu Xiangning */ 4660f1702c5SYu Xiangning do { 4670f1702c5SYu Xiangning ssize_t blocksize; 4680f1702c5SYu Xiangning mblk_t *mp; 4690f1702c5SYu Xiangning 4700f1702c5SYu Xiangning blocksize = MIN(iosize, maxblk); 4710f1702c5SYu Xiangning ASSERT(blocksize >= 0); 472de8c4a14SErik Nordmark mp = allocb(wroff + blocksize + tail_len, BPRI_MED); 473de8c4a14SErik Nordmark if (mp == NULL) { 4740f1702c5SYu Xiangning *errorp = ENOMEM; 4750f1702c5SYu Xiangning return (head); 4760f1702c5SYu Xiangning } 4770f1702c5SYu Xiangning mp->b_rptr += wroff; 4780f1702c5SYu Xiangning mp->b_wptr = mp->b_rptr + blocksize; 4790f1702c5SYu Xiangning 4800f1702c5SYu Xiangning *tail = mp; 4810f1702c5SYu Xiangning tail = &mp->b_cont; 4820f1702c5SYu Xiangning 4830f1702c5SYu Xiangning /* uiomove(9F) either returns 0 or EFAULT */ 4840f1702c5SYu Xiangning if ((*errorp = uiomove(mp->b_rptr, (size_t)blocksize, 4850f1702c5SYu Xiangning UIO_WRITE, uiop)) != 0) { 4860f1702c5SYu Xiangning ASSERT(*errorp != ENOMEM); 4870f1702c5SYu Xiangning freemsg(head); 4880f1702c5SYu Xiangning return (NULL); 4890f1702c5SYu Xiangning } 4900f1702c5SYu Xiangning 4910f1702c5SYu Xiangning iosize -= blocksize; 4920f1702c5SYu Xiangning } while (iosize > 0); 4930f1702c5SYu Xiangning 4940f1702c5SYu Xiangning done: 4950f1702c5SYu Xiangning *errorp = 0; 4960f1702c5SYu Xiangning return (head); 4970f1702c5SYu Xiangning } 4980f1702c5SYu Xiangning 4990f1702c5SYu Xiangning mblk_t * 5000f1702c5SYu Xiangning socopyoutuio(mblk_t *mp, struct uio *uiop, ssize_t max_read, int *errorp) 5010f1702c5SYu Xiangning { 5020f1702c5SYu Xiangning int error; 5030f1702c5SYu Xiangning ptrdiff_t n; 5040f1702c5SYu Xiangning mblk_t *nmp; 5050f1702c5SYu Xiangning 5060f1702c5SYu Xiangning ASSERT(mp->b_wptr >= mp->b_rptr); 5070f1702c5SYu Xiangning 5080f1702c5SYu Xiangning /* 5090f1702c5SYu Xiangning * max_read is the offset of the oobmark and read can not go pass 5100f1702c5SYu Xiangning * the oobmark. 5110f1702c5SYu Xiangning */ 5120f1702c5SYu Xiangning if (max_read == INFPSZ || max_read > uiop->uio_resid) 5130f1702c5SYu Xiangning max_read = uiop->uio_resid; 5140f1702c5SYu Xiangning 5150f1702c5SYu Xiangning do { 5160f1702c5SYu Xiangning if ((n = MIN(max_read, MBLKL(mp))) != 0) { 5170f1702c5SYu Xiangning ASSERT(n > 0); 5180f1702c5SYu Xiangning 5190f1702c5SYu Xiangning error = uiomove(mp->b_rptr, n, UIO_READ, uiop); 5200f1702c5SYu Xiangning if (error != 0) { 5210f1702c5SYu Xiangning freemsg(mp); 5220f1702c5SYu Xiangning *errorp = error; 5230f1702c5SYu Xiangning return (NULL); 5240f1702c5SYu Xiangning } 5250f1702c5SYu Xiangning } 5260f1702c5SYu Xiangning 5270f1702c5SYu Xiangning mp->b_rptr += n; 5280f1702c5SYu Xiangning max_read -= n; 5290f1702c5SYu Xiangning while (mp != NULL && (mp->b_rptr >= mp->b_wptr)) { 5300f1702c5SYu Xiangning /* 5310f1702c5SYu Xiangning * get rid of zero length mblks 5320f1702c5SYu Xiangning */ 5330f1702c5SYu Xiangning nmp = mp; 5340f1702c5SYu Xiangning mp = mp->b_cont; 5350f1702c5SYu Xiangning freeb(nmp); 5360f1702c5SYu Xiangning } 5370f1702c5SYu Xiangning } while (mp != NULL && max_read > 0); 5380f1702c5SYu Xiangning 5390f1702c5SYu Xiangning *errorp = 0; 5400f1702c5SYu Xiangning return (mp); 5410f1702c5SYu Xiangning } 5420f1702c5SYu Xiangning 5430f1702c5SYu Xiangning static void 5440f1702c5SYu Xiangning so_prepend_msg(struct sonode *so, mblk_t *mp, mblk_t *last_tail) 5450f1702c5SYu Xiangning { 5460f1702c5SYu Xiangning ASSERT(last_tail != NULL); 5470f1702c5SYu Xiangning mp->b_next = so->so_rcv_q_head; 5480f1702c5SYu Xiangning mp->b_prev = last_tail; 5490f1702c5SYu Xiangning ASSERT(!(DB_FLAGS(mp) & DBLK_UIOA)); 5500f1702c5SYu Xiangning 5510f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 5520f1702c5SYu Xiangning ASSERT(so->so_rcv_q_last_head == NULL); 5530f1702c5SYu Xiangning so->so_rcv_q_last_head = mp; 5540f1702c5SYu Xiangning #ifdef DEBUG 5550f1702c5SYu Xiangning } else { 5560f1702c5SYu Xiangning ASSERT(!(DB_FLAGS(so->so_rcv_q_head) & DBLK_UIOA)); 5570f1702c5SYu Xiangning #endif 5580f1702c5SYu Xiangning } 5590f1702c5SYu Xiangning so->so_rcv_q_head = mp; 5600f1702c5SYu Xiangning 5610f1702c5SYu Xiangning #ifdef DEBUG 5620f1702c5SYu Xiangning if (so_debug_length) { 5630f1702c5SYu Xiangning mutex_enter(&so->so_lock); 5640f1702c5SYu Xiangning ASSERT(so_check_length(so)); 5650f1702c5SYu Xiangning mutex_exit(&so->so_lock); 5660f1702c5SYu Xiangning } 5670f1702c5SYu Xiangning #endif 5680f1702c5SYu Xiangning } 5690f1702c5SYu Xiangning 570e4b767e8SAnders Persson /* 571e4b767e8SAnders Persson * Move a mblk chain (mp_head, mp_last_head) to the sonode's rcv queue so it 572e4b767e8SAnders Persson * can be processed by so_dequeue_msg(). 573e4b767e8SAnders Persson */ 574e4b767e8SAnders Persson void 575e4b767e8SAnders Persson so_process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head) 5760f1702c5SYu Xiangning { 5773e95bd4aSAnders Persson if (so->so_filter_active > 0 && 5783e95bd4aSAnders Persson (mp_head = sof_filter_data_in_proc(so, mp_head, 5793e95bd4aSAnders Persson &mp_last_head)) == NULL) 5803e95bd4aSAnders Persson return; 5813e95bd4aSAnders Persson 5820f1702c5SYu Xiangning ASSERT(mp_head->b_prev != NULL); 5830f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 5840f1702c5SYu Xiangning so->so_rcv_q_head = mp_head; 5850f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 5860f1702c5SYu Xiangning ASSERT(so->so_rcv_q_last_head->b_prev != NULL); 5870f1702c5SYu Xiangning } else { 5880f1702c5SYu Xiangning boolean_t flag_equal = ((DB_FLAGS(mp_head) & DBLK_UIOA) == 5890f1702c5SYu Xiangning (DB_FLAGS(so->so_rcv_q_last_head) & DBLK_UIOA)); 5900f1702c5SYu Xiangning 5910f1702c5SYu Xiangning if (mp_head->b_next == NULL && 5920f1702c5SYu Xiangning DB_TYPE(mp_head) == M_DATA && 5930f1702c5SYu Xiangning DB_TYPE(so->so_rcv_q_last_head) == M_DATA && flag_equal) { 5940f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 5950f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 5960f1702c5SYu Xiangning mp_head->b_prev = NULL; 5970f1702c5SYu Xiangning } else if (flag_equal && (DB_FLAGS(mp_head) & DBLK_UIOA)) { 5980f1702c5SYu Xiangning /* 5990f1702c5SYu Xiangning * Append to last_head if more than one mblks, and both 6000f1702c5SYu Xiangning * mp_head and last_head are I/OAT mblks. 6010f1702c5SYu Xiangning */ 6020f1702c5SYu Xiangning ASSERT(mp_head->b_next != NULL); 6030f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 6040f1702c5SYu Xiangning so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 6050f1702c5SYu Xiangning mp_head->b_prev = NULL; 6060f1702c5SYu Xiangning 6070f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = mp_head->b_next; 6080f1702c5SYu Xiangning mp_head->b_next = NULL; 6090f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 6100f1702c5SYu Xiangning } else { 6110f1702c5SYu Xiangning #ifdef DEBUG 6120f1702c5SYu Xiangning { 6130f1702c5SYu Xiangning mblk_t *tmp_mblk; 6140f1702c5SYu Xiangning tmp_mblk = mp_head; 6150f1702c5SYu Xiangning while (tmp_mblk != NULL) { 6160f1702c5SYu Xiangning ASSERT(tmp_mblk->b_prev != NULL); 6170f1702c5SYu Xiangning tmp_mblk = tmp_mblk->b_next; 6180f1702c5SYu Xiangning } 6190f1702c5SYu Xiangning } 6200f1702c5SYu Xiangning #endif 6210f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = mp_head; 6220f1702c5SYu Xiangning so->so_rcv_q_last_head = mp_last_head; 6230f1702c5SYu Xiangning } 6240f1702c5SYu Xiangning } 6250f1702c5SYu Xiangning } 6260f1702c5SYu Xiangning 6275795faa4SRao Shoaib /* 6285795faa4SRao Shoaib * Check flow control on a given sonode. Must have so_lock held, and 629a215d4ebSKacheong Poon * this function will release the hold. Return true if flow control 630a215d4ebSKacheong Poon * is cleared. 6315795faa4SRao Shoaib */ 632a215d4ebSKacheong Poon boolean_t 6335795faa4SRao Shoaib so_check_flow_control(struct sonode *so) 6345795faa4SRao Shoaib { 6355795faa4SRao Shoaib ASSERT(MUTEX_HELD(&so->so_lock)); 6365795faa4SRao Shoaib 6373e95bd4aSAnders Persson if (so->so_flowctrld && (so->so_rcv_queued < so->so_rcvlowat && 6383e95bd4aSAnders Persson !(so->so_state & SS_FIL_RCV_FLOWCTRL))) { 6395795faa4SRao Shoaib so->so_flowctrld = B_FALSE; 6405795faa4SRao Shoaib mutex_exit(&so->so_lock); 6415795faa4SRao Shoaib /* 6425795faa4SRao Shoaib * Open up flow control. SCTP does not have any downcalls, and 6435795faa4SRao Shoaib * it will clr flow ctrl in sosctp_recvmsg(). 6445795faa4SRao Shoaib */ 6455795faa4SRao Shoaib if (so->so_downcalls != NULL && 6465795faa4SRao Shoaib so->so_downcalls->sd_clr_flowctrl != NULL) { 6475795faa4SRao Shoaib (*so->so_downcalls->sd_clr_flowctrl) 6485795faa4SRao Shoaib (so->so_proto_handle); 6495795faa4SRao Shoaib } 6503e95bd4aSAnders Persson /* filters can start injecting data */ 6513e95bd4aSAnders Persson sof_sonode_notify_filters(so, SOF_EV_INJECT_DATA_IN_OK, 0); 652a215d4ebSKacheong Poon return (B_TRUE); 6535795faa4SRao Shoaib } else { 6545795faa4SRao Shoaib mutex_exit(&so->so_lock); 655a215d4ebSKacheong Poon return (B_FALSE); 6565795faa4SRao Shoaib } 6575795faa4SRao Shoaib } 6585795faa4SRao Shoaib 6590f1702c5SYu Xiangning int 6600f1702c5SYu Xiangning so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop, 6610f1702c5SYu Xiangning rval_t *rvalp, int flags) 6620f1702c5SYu Xiangning { 6630f1702c5SYu Xiangning mblk_t *mp, *nmp; 6640f1702c5SYu Xiangning mblk_t *savemp, *savemptail; 6650f1702c5SYu Xiangning mblk_t *new_msg_head; 6660f1702c5SYu Xiangning mblk_t *new_msg_last_head; 6670f1702c5SYu Xiangning mblk_t *last_tail; 6680f1702c5SYu Xiangning boolean_t partial_read; 6690f1702c5SYu Xiangning boolean_t reset_atmark = B_FALSE; 6700f1702c5SYu Xiangning int more = 0; 6710f1702c5SYu Xiangning int error; 6720f1702c5SYu Xiangning ssize_t oobmark; 6730f1702c5SYu Xiangning sodirect_t *sodp = so->so_direct; 6740f1702c5SYu Xiangning 6750f1702c5SYu Xiangning partial_read = B_FALSE; 6760f1702c5SYu Xiangning *mctlp = NULL; 6770f1702c5SYu Xiangning again: 6780f1702c5SYu Xiangning mutex_enter(&so->so_lock); 6790f1702c5SYu Xiangning again1: 6800f1702c5SYu Xiangning #ifdef DEBUG 6810f1702c5SYu Xiangning if (so_debug_length) { 6820f1702c5SYu Xiangning ASSERT(so_check_length(so)); 6830f1702c5SYu Xiangning } 6840f1702c5SYu Xiangning #endif 6858591a19aSAnders Persson if (so->so_state & SS_RCVATMARK) { 6868591a19aSAnders Persson /* Check whether the caller is OK to read past the mark */ 6878591a19aSAnders Persson if (flags & MSG_NOMARK) { 6888591a19aSAnders Persson mutex_exit(&so->so_lock); 6898591a19aSAnders Persson return (EWOULDBLOCK); 6908591a19aSAnders Persson } 6918591a19aSAnders Persson reset_atmark = B_TRUE; 6928591a19aSAnders Persson } 6930f1702c5SYu Xiangning /* 6940f1702c5SYu Xiangning * First move messages from the dump area to processing area 6950f1702c5SYu Xiangning */ 6960f1702c5SYu Xiangning if (sodp != NULL) { 697bbc000e5SAnders Persson if (sodp->sod_enabled) { 6980f1702c5SYu Xiangning if (sodp->sod_uioa.uioa_state & UIOA_ALLOC) { 6990f1702c5SYu Xiangning /* nothing to uioamove */ 7000f1702c5SYu Xiangning sodp = NULL; 7010f1702c5SYu Xiangning } else if (sodp->sod_uioa.uioa_state & UIOA_INIT) { 7020f1702c5SYu Xiangning sodp->sod_uioa.uioa_state &= UIOA_CLR; 7030f1702c5SYu Xiangning sodp->sod_uioa.uioa_state |= UIOA_ENABLED; 7040f1702c5SYu Xiangning /* 7050f1702c5SYu Xiangning * try to uioamove() the data that 7060f1702c5SYu Xiangning * has already queued. 7070f1702c5SYu Xiangning */ 7080f1702c5SYu Xiangning sod_uioa_so_init(so, sodp, uiop); 7090f1702c5SYu Xiangning } 7100f1702c5SYu Xiangning } else { 7110f1702c5SYu Xiangning sodp = NULL; 7120f1702c5SYu Xiangning } 7130f1702c5SYu Xiangning } 7140f1702c5SYu Xiangning new_msg_head = so->so_rcv_head; 7150f1702c5SYu Xiangning new_msg_last_head = so->so_rcv_last_head; 7160f1702c5SYu Xiangning so->so_rcv_head = NULL; 7170f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 7180f1702c5SYu Xiangning oobmark = so->so_oobmark; 7190f1702c5SYu Xiangning /* 7200f1702c5SYu Xiangning * We can release the lock as there can only be one reader 7210f1702c5SYu Xiangning */ 7220f1702c5SYu Xiangning mutex_exit(&so->so_lock); 7230f1702c5SYu Xiangning 7240f1702c5SYu Xiangning if (new_msg_head != NULL) { 725e4b767e8SAnders Persson so_process_new_message(so, new_msg_head, new_msg_last_head); 7260f1702c5SYu Xiangning } 7270f1702c5SYu Xiangning savemp = savemptail = NULL; 728a215d4ebSKacheong Poon rvalp->r_vals = 0; 7290f1702c5SYu Xiangning error = 0; 7300f1702c5SYu Xiangning mp = so->so_rcv_q_head; 7310f1702c5SYu Xiangning 7320f1702c5SYu Xiangning if (mp != NULL && 7330f1702c5SYu Xiangning (so->so_rcv_timer_tid == 0 || 7340f1702c5SYu Xiangning so->so_rcv_queued >= so->so_rcv_thresh)) { 7350f1702c5SYu Xiangning partial_read = B_FALSE; 7360f1702c5SYu Xiangning 7370f1702c5SYu Xiangning if (flags & MSG_PEEK) { 7380f1702c5SYu Xiangning if ((nmp = dupmsg(mp)) == NULL && 7390f1702c5SYu Xiangning (nmp = copymsg(mp)) == NULL) { 7400f1702c5SYu Xiangning size_t size = msgsize(mp); 7410f1702c5SYu Xiangning 7420f1702c5SYu Xiangning error = strwaitbuf(size, BPRI_HI); 7430f1702c5SYu Xiangning if (error) { 7440f1702c5SYu Xiangning return (error); 7450f1702c5SYu Xiangning } 7460f1702c5SYu Xiangning goto again; 7470f1702c5SYu Xiangning } 7480f1702c5SYu Xiangning mp = nmp; 7490f1702c5SYu Xiangning } else { 7500f1702c5SYu Xiangning ASSERT(mp->b_prev != NULL); 7510f1702c5SYu Xiangning last_tail = mp->b_prev; 7520f1702c5SYu Xiangning mp->b_prev = NULL; 7530f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 7540f1702c5SYu Xiangning if (so->so_rcv_q_head == NULL) { 7550f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 7560f1702c5SYu Xiangning } 7570f1702c5SYu Xiangning mp->b_next = NULL; 7580f1702c5SYu Xiangning } 7590f1702c5SYu Xiangning 7600f1702c5SYu Xiangning ASSERT(mctlp != NULL); 7610f1702c5SYu Xiangning /* 7620f1702c5SYu Xiangning * First process PROTO or PCPROTO blocks, if any. 7630f1702c5SYu Xiangning */ 7640f1702c5SYu Xiangning if (DB_TYPE(mp) != M_DATA) { 7650f1702c5SYu Xiangning *mctlp = mp; 7660f1702c5SYu Xiangning savemp = mp; 7670f1702c5SYu Xiangning savemptail = mp; 7680f1702c5SYu Xiangning ASSERT(DB_TYPE(mp) == M_PROTO || 7690f1702c5SYu Xiangning DB_TYPE(mp) == M_PCPROTO); 7700f1702c5SYu Xiangning while (mp->b_cont != NULL && 7710f1702c5SYu Xiangning DB_TYPE(mp->b_cont) != M_DATA) { 7720f1702c5SYu Xiangning ASSERT(DB_TYPE(mp->b_cont) == M_PROTO || 7730f1702c5SYu Xiangning DB_TYPE(mp->b_cont) == M_PCPROTO); 7740f1702c5SYu Xiangning mp = mp->b_cont; 7750f1702c5SYu Xiangning savemptail = mp; 7760f1702c5SYu Xiangning } 7770f1702c5SYu Xiangning mp = savemptail->b_cont; 7780f1702c5SYu Xiangning savemptail->b_cont = NULL; 7790f1702c5SYu Xiangning } 7800f1702c5SYu Xiangning 7810f1702c5SYu Xiangning ASSERT(DB_TYPE(mp) == M_DATA); 7820f1702c5SYu Xiangning /* 7830f1702c5SYu Xiangning * Now process DATA blocks, if any. Note that for sodirect 7840f1702c5SYu Xiangning * enabled socket, uio_resid can be 0. 7850f1702c5SYu Xiangning */ 7860f1702c5SYu Xiangning if (uiop->uio_resid >= 0) { 7870f1702c5SYu Xiangning ssize_t copied = 0; 7880f1702c5SYu Xiangning 7890f1702c5SYu Xiangning if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) { 790bbc000e5SAnders Persson mutex_enter(&so->so_lock); 7910f1702c5SYu Xiangning ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 7920f1702c5SYu Xiangning copied = sod_uioa_mblk(so, mp); 7930f1702c5SYu Xiangning if (copied > 0) 7940f1702c5SYu Xiangning partial_read = B_TRUE; 795bbc000e5SAnders Persson mutex_exit(&so->so_lock); 7960f1702c5SYu Xiangning /* mark this mblk as processed */ 7970f1702c5SYu Xiangning mp = NULL; 7980f1702c5SYu Xiangning } else { 7990f1702c5SYu Xiangning ssize_t oldresid = uiop->uio_resid; 8000f1702c5SYu Xiangning 8010f1702c5SYu Xiangning if (MBLKL(mp) < so_mblk_pull_len) { 8020f1702c5SYu Xiangning if (pullupmsg(mp, -1) == 1) { 8030f1702c5SYu Xiangning last_tail = mp; 8040f1702c5SYu Xiangning } 8050f1702c5SYu Xiangning } 8060f1702c5SYu Xiangning /* 8070f1702c5SYu Xiangning * Can not read beyond the oobmark 8080f1702c5SYu Xiangning */ 8090f1702c5SYu Xiangning mp = socopyoutuio(mp, uiop, 8100f1702c5SYu Xiangning oobmark == 0 ? INFPSZ : oobmark, &error); 8110f1702c5SYu Xiangning if (error != 0) { 8120f1702c5SYu Xiangning freemsg(*mctlp); 8130f1702c5SYu Xiangning *mctlp = NULL; 8140f1702c5SYu Xiangning more = 0; 8150f1702c5SYu Xiangning goto done; 8160f1702c5SYu Xiangning } 8170f1702c5SYu Xiangning ASSERT(oldresid >= uiop->uio_resid); 8180f1702c5SYu Xiangning copied = oldresid - uiop->uio_resid; 8190f1702c5SYu Xiangning if (oldresid > uiop->uio_resid) 8200f1702c5SYu Xiangning partial_read = B_TRUE; 8210f1702c5SYu Xiangning } 8220f1702c5SYu Xiangning ASSERT(copied >= 0); 8230f1702c5SYu Xiangning if (copied > 0 && !(flags & MSG_PEEK)) { 8240f1702c5SYu Xiangning mutex_enter(&so->so_lock); 8250f1702c5SYu Xiangning so->so_rcv_queued -= copied; 8260f1702c5SYu Xiangning ASSERT(so->so_oobmark >= 0); 8270f1702c5SYu Xiangning if (so->so_oobmark > 0) { 8280f1702c5SYu Xiangning so->so_oobmark -= copied; 8290f1702c5SYu Xiangning ASSERT(so->so_oobmark >= 0); 8300f1702c5SYu Xiangning if (so->so_oobmark == 0) { 8310f1702c5SYu Xiangning ASSERT(so->so_state & 8320f1702c5SYu Xiangning SS_OOBPEND); 8330f1702c5SYu Xiangning so->so_oobmark = 0; 8340f1702c5SYu Xiangning so->so_state |= SS_RCVATMARK; 8350f1702c5SYu Xiangning } 8360f1702c5SYu Xiangning } 8370f1702c5SYu Xiangning /* 8385795faa4SRao Shoaib * so_check_flow_control() will drop 8395795faa4SRao Shoaib * so->so_lock. 8400f1702c5SYu Xiangning */ 841a215d4ebSKacheong Poon rvalp->r_val2 = so_check_flow_control(so); 8420f1702c5SYu Xiangning } 8430f1702c5SYu Xiangning } 8440f1702c5SYu Xiangning if (mp != NULL) { /* more data blocks in msg */ 8450f1702c5SYu Xiangning more |= MOREDATA; 8460f1702c5SYu Xiangning if ((flags & (MSG_PEEK|MSG_TRUNC))) { 8475795faa4SRao Shoaib if (flags & MSG_PEEK) { 8480f1702c5SYu Xiangning freemsg(mp); 8495795faa4SRao Shoaib } else { 8505795faa4SRao Shoaib unsigned int msize = msgdsize(mp); 8515795faa4SRao Shoaib 8525795faa4SRao Shoaib freemsg(mp); 8535795faa4SRao Shoaib mutex_enter(&so->so_lock); 8545795faa4SRao Shoaib so->so_rcv_queued -= msize; 8555795faa4SRao Shoaib /* 8565795faa4SRao Shoaib * so_check_flow_control() will drop 8575795faa4SRao Shoaib * so->so_lock. 8585795faa4SRao Shoaib */ 859a215d4ebSKacheong Poon rvalp->r_val2 = 8605795faa4SRao Shoaib so_check_flow_control(so); 8615795faa4SRao Shoaib } 8620f1702c5SYu Xiangning } else if (partial_read && !somsghasdata(mp)) { 8630f1702c5SYu Xiangning /* 8640f1702c5SYu Xiangning * Avoid queuing a zero-length tail part of 8650f1702c5SYu Xiangning * a message. partial_read == 1 indicates that 8660f1702c5SYu Xiangning * we read some of the message. 8670f1702c5SYu Xiangning */ 8680f1702c5SYu Xiangning freemsg(mp); 8690f1702c5SYu Xiangning more &= ~MOREDATA; 8700f1702c5SYu Xiangning } else { 8710f1702c5SYu Xiangning if (savemp != NULL && 8720f1702c5SYu Xiangning (flags & MSG_DUPCTRL)) { 8730f1702c5SYu Xiangning mblk_t *nmp; 8740f1702c5SYu Xiangning /* 8750f1702c5SYu Xiangning * There should only be non data mblks 8760f1702c5SYu Xiangning */ 8770f1702c5SYu Xiangning ASSERT(DB_TYPE(savemp) != M_DATA && 8780f1702c5SYu Xiangning DB_TYPE(savemptail) != M_DATA); 8790f1702c5SYu Xiangning try_again: 8800f1702c5SYu Xiangning if ((nmp = dupmsg(savemp)) == NULL && 8810f1702c5SYu Xiangning (nmp = copymsg(savemp)) == NULL) { 8820f1702c5SYu Xiangning 8830f1702c5SYu Xiangning size_t size = msgsize(savemp); 8840f1702c5SYu Xiangning 8850f1702c5SYu Xiangning error = strwaitbuf(size, 8860f1702c5SYu Xiangning BPRI_HI); 8870f1702c5SYu Xiangning if (error != 0) { 8880f1702c5SYu Xiangning /* 8890f1702c5SYu Xiangning * In case we 8900f1702c5SYu Xiangning * cannot copy 8910f1702c5SYu Xiangning * control data 8920f1702c5SYu Xiangning * free the remaining 8930f1702c5SYu Xiangning * data. 8940f1702c5SYu Xiangning */ 8950f1702c5SYu Xiangning freemsg(mp); 8960f1702c5SYu Xiangning goto done; 8970f1702c5SYu Xiangning } 8980f1702c5SYu Xiangning goto try_again; 8990f1702c5SYu Xiangning } 9000f1702c5SYu Xiangning 9010f1702c5SYu Xiangning ASSERT(nmp != NULL); 9020f1702c5SYu Xiangning ASSERT(DB_TYPE(nmp) != M_DATA); 9030f1702c5SYu Xiangning savemptail->b_cont = mp; 9040f1702c5SYu Xiangning *mctlp = nmp; 9050f1702c5SYu Xiangning mp = savemp; 9060f1702c5SYu Xiangning } 9070f1702c5SYu Xiangning /* 9080f1702c5SYu Xiangning * putback mp 9090f1702c5SYu Xiangning */ 9100f1702c5SYu Xiangning so_prepend_msg(so, mp, last_tail); 9110f1702c5SYu Xiangning } 9120f1702c5SYu Xiangning } 9130f1702c5SYu Xiangning 9140f1702c5SYu Xiangning /* fast check so_rcv_head if there is more data */ 9150f1702c5SYu Xiangning if (partial_read && !(so->so_state & SS_RCVATMARK) && 9160f1702c5SYu Xiangning *mctlp == NULL && uiop->uio_resid > 0 && 9170f1702c5SYu Xiangning !(flags & MSG_PEEK) && so->so_rcv_head != NULL) { 9180f1702c5SYu Xiangning goto again; 9190f1702c5SYu Xiangning } 9200f1702c5SYu Xiangning } else if (!partial_read) { 9210f1702c5SYu Xiangning mutex_enter(&so->so_lock); 9220f1702c5SYu Xiangning if (so->so_error != 0) { 9230f1702c5SYu Xiangning error = sogeterr(so, !(flags & MSG_PEEK)); 9240f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9250f1702c5SYu Xiangning return (error); 9260f1702c5SYu Xiangning } 9270f1702c5SYu Xiangning /* 9280f1702c5SYu Xiangning * No pending data. Return right away for nonblocking 9290f1702c5SYu Xiangning * socket, otherwise sleep waiting for data. 9300f1702c5SYu Xiangning */ 9312caa659dSMike Cheng if (!(so->so_state & SS_CANTRCVMORE) && uiop->uio_resid > 0) { 9320f1702c5SYu Xiangning if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) || 9330f1702c5SYu Xiangning (flags & MSG_DONTWAIT)) { 9340f1702c5SYu Xiangning error = EWOULDBLOCK; 9350f1702c5SYu Xiangning } else { 9360f1702c5SYu Xiangning if (so->so_state & (SS_CLOSING | 9370f1702c5SYu Xiangning SS_FALLBACK_PENDING)) { 9380f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9390f1702c5SYu Xiangning error = EINTR; 9400f1702c5SYu Xiangning goto done; 9410f1702c5SYu Xiangning } 9420f1702c5SYu Xiangning 9430f1702c5SYu Xiangning if (so->so_rcv_head != NULL) { 9440f1702c5SYu Xiangning goto again1; 9450f1702c5SYu Xiangning } 9460f1702c5SYu Xiangning so->so_rcv_wakeup = B_TRUE; 9470f1702c5SYu Xiangning so->so_rcv_wanted = uiop->uio_resid; 9480f1702c5SYu Xiangning if (so->so_rcvtimeo == 0) { 9490f1702c5SYu Xiangning /* 9500f1702c5SYu Xiangning * Zero means disable timeout. 9510f1702c5SYu Xiangning */ 9520f1702c5SYu Xiangning error = cv_wait_sig(&so->so_rcv_cv, 9530f1702c5SYu Xiangning &so->so_lock); 9540f1702c5SYu Xiangning } else { 955d3d50737SRafael Vanoni error = cv_reltimedwait_sig( 956d3d50737SRafael Vanoni &so->so_rcv_cv, &so->so_lock, 957d3d50737SRafael Vanoni so->so_rcvtimeo, TR_CLOCK_TICK); 9580f1702c5SYu Xiangning } 9590f1702c5SYu Xiangning so->so_rcv_wakeup = B_FALSE; 9600f1702c5SYu Xiangning so->so_rcv_wanted = 0; 9610f1702c5SYu Xiangning 9620f1702c5SYu Xiangning if (error == 0) { 9630f1702c5SYu Xiangning error = EINTR; 9640f1702c5SYu Xiangning } else if (error == -1) { 96534dfe683Sshenjian error = EAGAIN; 9660f1702c5SYu Xiangning } else { 9670f1702c5SYu Xiangning goto again1; 9680f1702c5SYu Xiangning } 9690f1702c5SYu Xiangning } 9700f1702c5SYu Xiangning } 9710f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9720f1702c5SYu Xiangning } 9730f1702c5SYu Xiangning if (reset_atmark && partial_read && !(flags & MSG_PEEK)) { 9740f1702c5SYu Xiangning /* 9750f1702c5SYu Xiangning * We are passed the mark, update state 9760f1702c5SYu Xiangning * 4.3BSD and 4.4BSD clears the mark when peeking across it. 9770f1702c5SYu Xiangning * The draft Posix socket spec states that the mark should 9780f1702c5SYu Xiangning * not be cleared when peeking. We follow the latter. 9790f1702c5SYu Xiangning */ 9800f1702c5SYu Xiangning mutex_enter(&so->so_lock); 9810f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 9820f1702c5SYu Xiangning so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 9830f1702c5SYu Xiangning freemsg(so->so_oobmsg); 9840f1702c5SYu Xiangning so->so_oobmsg = NULL; 9850f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 9860f1702c5SYu Xiangning mutex_exit(&so->so_lock); 9870f1702c5SYu Xiangning } 9880f1702c5SYu Xiangning ASSERT(so->so_rcv_wakeup == B_FALSE); 9890f1702c5SYu Xiangning done: 9900f1702c5SYu Xiangning if (sodp != NULL) { 991bbc000e5SAnders Persson mutex_enter(&so->so_lock); 992bbc000e5SAnders Persson if (sodp->sod_enabled && 9930f1702c5SYu Xiangning (sodp->sod_uioa.uioa_state & UIOA_ENABLED)) { 9940f1702c5SYu Xiangning SOD_UIOAFINI(sodp); 9950f1702c5SYu Xiangning if (sodp->sod_uioa.uioa_mbytes > 0) { 9960f1702c5SYu Xiangning ASSERT(so->so_rcv_q_head != NULL || 9970f1702c5SYu Xiangning so->so_rcv_head != NULL); 9980f1702c5SYu Xiangning so->so_rcv_queued -= sod_uioa_mblk(so, NULL); 9990f1702c5SYu Xiangning if (error == EWOULDBLOCK) 10000f1702c5SYu Xiangning error = 0; 10010f1702c5SYu Xiangning } 10020f1702c5SYu Xiangning } 1003bbc000e5SAnders Persson mutex_exit(&so->so_lock); 10040f1702c5SYu Xiangning } 10050f1702c5SYu Xiangning #ifdef DEBUG 10060f1702c5SYu Xiangning if (so_debug_length) { 10070f1702c5SYu Xiangning mutex_enter(&so->so_lock); 10080f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10090f1702c5SYu Xiangning mutex_exit(&so->so_lock); 10100f1702c5SYu Xiangning } 10110f1702c5SYu Xiangning #endif 10120f1702c5SYu Xiangning rvalp->r_val1 = more; 10135795faa4SRao Shoaib ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 10140f1702c5SYu Xiangning return (error); 10150f1702c5SYu Xiangning } 10160f1702c5SYu Xiangning 1017e4b767e8SAnders Persson /* 1018e4b767e8SAnders Persson * Enqueue data from the protocol on the socket's rcv queue. 1019e4b767e8SAnders Persson * 1020e4b767e8SAnders Persson * We try to hook new M_DATA mblks onto an existing chain, however, 1021e4b767e8SAnders Persson * that cannot be done if the existing chain has already been 1022e4b767e8SAnders Persson * processed by I/OAT. Non-M_DATA mblks are just linked together via 1023e4b767e8SAnders Persson * b_next. In all cases the b_prev of the enqueued mblk is set to 1024e4b767e8SAnders Persson * point to the last mblk in its b_cont chain. 1025e4b767e8SAnders Persson */ 10260f1702c5SYu Xiangning void 10270f1702c5SYu Xiangning so_enqueue_msg(struct sonode *so, mblk_t *mp, size_t msg_size) 10280f1702c5SYu Xiangning { 10290f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 10300f1702c5SYu Xiangning 10310f1702c5SYu Xiangning #ifdef DEBUG 10320f1702c5SYu Xiangning if (so_debug_length) { 10330f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10340f1702c5SYu Xiangning } 10350f1702c5SYu Xiangning #endif 10360f1702c5SYu Xiangning so->so_rcv_queued += msg_size; 10370f1702c5SYu Xiangning 10380f1702c5SYu Xiangning if (so->so_rcv_head == NULL) { 10390f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head == NULL); 10400f1702c5SYu Xiangning so->so_rcv_head = mp; 10410f1702c5SYu Xiangning so->so_rcv_last_head = mp; 10420f1702c5SYu Xiangning } else if ((DB_TYPE(mp) == M_DATA && 10430f1702c5SYu Xiangning DB_TYPE(so->so_rcv_last_head) == M_DATA) && 10440f1702c5SYu Xiangning ((DB_FLAGS(mp) & DBLK_UIOA) == 10450f1702c5SYu Xiangning (DB_FLAGS(so->so_rcv_last_head) & DBLK_UIOA))) { 10460f1702c5SYu Xiangning /* Added to the end */ 10470f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head != NULL); 10480f1702c5SYu Xiangning ASSERT(so->so_rcv_last_head->b_prev != NULL); 10490f1702c5SYu Xiangning so->so_rcv_last_head->b_prev->b_cont = mp; 10500f1702c5SYu Xiangning } else { 10510f1702c5SYu Xiangning /* Start a new end */ 10520f1702c5SYu Xiangning so->so_rcv_last_head->b_next = mp; 10530f1702c5SYu Xiangning so->so_rcv_last_head = mp; 10540f1702c5SYu Xiangning } 10550f1702c5SYu Xiangning while (mp->b_cont != NULL) 10560f1702c5SYu Xiangning mp = mp->b_cont; 10570f1702c5SYu Xiangning 10580f1702c5SYu Xiangning so->so_rcv_last_head->b_prev = mp; 10590f1702c5SYu Xiangning #ifdef DEBUG 10600f1702c5SYu Xiangning if (so_debug_length) { 10610f1702c5SYu Xiangning ASSERT(so_check_length(so)); 10620f1702c5SYu Xiangning } 10630f1702c5SYu Xiangning #endif 10640f1702c5SYu Xiangning } 10650f1702c5SYu Xiangning 10660f1702c5SYu Xiangning /* 10670f1702c5SYu Xiangning * Return B_TRUE if there is data in the message, B_FALSE otherwise. 10680f1702c5SYu Xiangning */ 10690f1702c5SYu Xiangning boolean_t 10700f1702c5SYu Xiangning somsghasdata(mblk_t *mp) 10710f1702c5SYu Xiangning { 10720f1702c5SYu Xiangning for (; mp; mp = mp->b_cont) 10730f1702c5SYu Xiangning if (mp->b_datap->db_type == M_DATA) { 10740f1702c5SYu Xiangning ASSERT(mp->b_wptr >= mp->b_rptr); 10750f1702c5SYu Xiangning if (mp->b_wptr > mp->b_rptr) 10760f1702c5SYu Xiangning return (B_TRUE); 10770f1702c5SYu Xiangning } 10780f1702c5SYu Xiangning return (B_FALSE); 10790f1702c5SYu Xiangning } 10800f1702c5SYu Xiangning 10810f1702c5SYu Xiangning /* 10820f1702c5SYu Xiangning * Flush the read side of sockfs. 10830f1702c5SYu Xiangning * 10840f1702c5SYu Xiangning * The caller must be sure that a reader is not already active when the 10850f1702c5SYu Xiangning * buffer is being flushed. 10860f1702c5SYu Xiangning */ 10870f1702c5SYu Xiangning void 10880f1702c5SYu Xiangning so_rcv_flush(struct sonode *so) 10890f1702c5SYu Xiangning { 10900f1702c5SYu Xiangning mblk_t *mp; 10910f1702c5SYu Xiangning 10920f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 10930f1702c5SYu Xiangning 10940f1702c5SYu Xiangning if (so->so_oobmsg != NULL) { 10950f1702c5SYu Xiangning freemsg(so->so_oobmsg); 10960f1702c5SYu Xiangning so->so_oobmsg = NULL; 10970f1702c5SYu Xiangning so->so_oobmark = 0; 10980f1702c5SYu Xiangning so->so_state &= 10990f1702c5SYu Xiangning ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|SS_RCVATMARK); 11000f1702c5SYu Xiangning } 11010f1702c5SYu Xiangning 11020f1702c5SYu Xiangning /* 11033e95bd4aSAnders Persson * Free messages sitting in the recv queues 11040f1702c5SYu Xiangning */ 11050f1702c5SYu Xiangning while (so->so_rcv_q_head != NULL) { 11060f1702c5SYu Xiangning mp = so->so_rcv_q_head; 11070f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 11080f1702c5SYu Xiangning mp->b_next = mp->b_prev = NULL; 11090f1702c5SYu Xiangning freemsg(mp); 11100f1702c5SYu Xiangning } 11110f1702c5SYu Xiangning while (so->so_rcv_head != NULL) { 11120f1702c5SYu Xiangning mp = so->so_rcv_head; 11130f1702c5SYu Xiangning so->so_rcv_head = mp->b_next; 11140f1702c5SYu Xiangning mp->b_next = mp->b_prev = NULL; 11150f1702c5SYu Xiangning freemsg(mp); 11160f1702c5SYu Xiangning } 11170f1702c5SYu Xiangning so->so_rcv_queued = 0; 11180f1702c5SYu Xiangning so->so_rcv_q_head = NULL; 11190f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 11200f1702c5SYu Xiangning so->so_rcv_head = NULL; 11210f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 11220f1702c5SYu Xiangning } 11230f1702c5SYu Xiangning 11240f1702c5SYu Xiangning /* 11250f1702c5SYu Xiangning * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 11260f1702c5SYu Xiangning */ 11270f1702c5SYu Xiangning int 11280f1702c5SYu Xiangning sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags, 11290f1702c5SYu Xiangning boolean_t oob_inline) 11300f1702c5SYu Xiangning { 11310f1702c5SYu Xiangning mblk_t *mp, *nmp; 11320f1702c5SYu Xiangning int error; 11330f1702c5SYu Xiangning 11340f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", (void *)so, (void *)msg, 11350f1702c5SYu Xiangning flags)); 11360f1702c5SYu Xiangning 11370f1702c5SYu Xiangning if (msg != NULL) { 11380f1702c5SYu Xiangning /* 11390f1702c5SYu Xiangning * There is never any oob data with addresses or control since 11400f1702c5SYu Xiangning * the T_EXDATA_IND does not carry any options. 11410f1702c5SYu Xiangning */ 11420f1702c5SYu Xiangning msg->msg_controllen = 0; 11430f1702c5SYu Xiangning msg->msg_namelen = 0; 11440f1702c5SYu Xiangning msg->msg_flags = 0; 11450f1702c5SYu Xiangning } 11460f1702c5SYu Xiangning 11470f1702c5SYu Xiangning mutex_enter(&so->so_lock); 11480f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 11490f1702c5SYu Xiangning if (oob_inline || 11500f1702c5SYu Xiangning (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 11510f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 11520f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11530f1702c5SYu Xiangning return (EINVAL); 11540f1702c5SYu Xiangning } 11550f1702c5SYu Xiangning if (!(so->so_state & SS_HAVEOOBDATA)) { 11560f1702c5SYu Xiangning dprintso(so, 1, ("sorecvoob: no data yet\n")); 11570f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11580f1702c5SYu Xiangning return (EWOULDBLOCK); 11590f1702c5SYu Xiangning } 11600f1702c5SYu Xiangning ASSERT(so->so_oobmsg != NULL); 11610f1702c5SYu Xiangning mp = so->so_oobmsg; 11620f1702c5SYu Xiangning if (flags & MSG_PEEK) { 11630f1702c5SYu Xiangning /* 11640f1702c5SYu Xiangning * Since recv* can not return ENOBUFS we can not use dupmsg. 11650f1702c5SYu Xiangning * Instead we revert to the consolidation private 11660f1702c5SYu Xiangning * allocb_wait plus bcopy. 11670f1702c5SYu Xiangning */ 11680f1702c5SYu Xiangning mblk_t *mp1; 11690f1702c5SYu Xiangning 11700f1702c5SYu Xiangning mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 11710f1702c5SYu Xiangning ASSERT(mp1); 11720f1702c5SYu Xiangning 11730f1702c5SYu Xiangning while (mp != NULL) { 11740f1702c5SYu Xiangning ssize_t size; 11750f1702c5SYu Xiangning 11760f1702c5SYu Xiangning size = MBLKL(mp); 11770f1702c5SYu Xiangning bcopy(mp->b_rptr, mp1->b_wptr, size); 11780f1702c5SYu Xiangning mp1->b_wptr += size; 11790f1702c5SYu Xiangning ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 11800f1702c5SYu Xiangning mp = mp->b_cont; 11810f1702c5SYu Xiangning } 11820f1702c5SYu Xiangning mp = mp1; 11830f1702c5SYu Xiangning } else { 11840f1702c5SYu Xiangning /* 11850f1702c5SYu Xiangning * Update the state indicating that the data has been consumed. 11860f1702c5SYu Xiangning * Keep SS_OOBPEND set until data is consumed past the mark. 11870f1702c5SYu Xiangning */ 11880f1702c5SYu Xiangning so->so_oobmsg = NULL; 11890f1702c5SYu Xiangning so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 11900f1702c5SYu Xiangning } 11910f1702c5SYu Xiangning ASSERT(so_verify_oobstate(so)); 11920f1702c5SYu Xiangning mutex_exit(&so->so_lock); 11930f1702c5SYu Xiangning 11940f1702c5SYu Xiangning error = 0; 11950f1702c5SYu Xiangning nmp = mp; 11960f1702c5SYu Xiangning while (nmp != NULL && uiop->uio_resid > 0) { 11970f1702c5SYu Xiangning ssize_t n = MBLKL(nmp); 11980f1702c5SYu Xiangning 11990f1702c5SYu Xiangning n = MIN(n, uiop->uio_resid); 12000f1702c5SYu Xiangning if (n > 0) 12010f1702c5SYu Xiangning error = uiomove(nmp->b_rptr, n, 12020f1702c5SYu Xiangning UIO_READ, uiop); 12030f1702c5SYu Xiangning if (error) 12040f1702c5SYu Xiangning break; 12050f1702c5SYu Xiangning nmp = nmp->b_cont; 12060f1702c5SYu Xiangning } 12070f1702c5SYu Xiangning ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 12080f1702c5SYu Xiangning freemsg(mp); 12090f1702c5SYu Xiangning return (error); 12100f1702c5SYu Xiangning } 12110f1702c5SYu Xiangning 12120f1702c5SYu Xiangning /* 12130f1702c5SYu Xiangning * Allocate and initializ sonode 12140f1702c5SYu Xiangning */ 12150f1702c5SYu Xiangning /* ARGSUSED */ 12160f1702c5SYu Xiangning struct sonode * 12170f1702c5SYu Xiangning socket_sonode_create(struct sockparams *sp, int family, int type, 12180f1702c5SYu Xiangning int protocol, int version, int sflags, int *errorp, struct cred *cr) 12190f1702c5SYu Xiangning { 12200f1702c5SYu Xiangning sonode_t *so; 12210f1702c5SYu Xiangning int kmflags; 12220f1702c5SYu Xiangning 12230f1702c5SYu Xiangning /* 12240f1702c5SYu Xiangning * Choose the right set of sonodeops based on the upcall and 12250f1702c5SYu Xiangning * down call version that the protocol has provided 12260f1702c5SYu Xiangning */ 12270f1702c5SYu Xiangning if (SOCK_UC_VERSION != sp->sp_smod_info->smod_uc_version || 12280f1702c5SYu Xiangning SOCK_DC_VERSION != sp->sp_smod_info->smod_dc_version) { 12290f1702c5SYu Xiangning /* 12300f1702c5SYu Xiangning * mismatch 12310f1702c5SYu Xiangning */ 12320f1702c5SYu Xiangning #ifdef DEBUG 12330f1702c5SYu Xiangning cmn_err(CE_CONT, "protocol and socket module version mismatch"); 12340f1702c5SYu Xiangning #endif 12350f1702c5SYu Xiangning *errorp = EINVAL; 12360f1702c5SYu Xiangning return (NULL); 12370f1702c5SYu Xiangning } 12380f1702c5SYu Xiangning 12390f1702c5SYu Xiangning kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 12400f1702c5SYu Xiangning 12410f1702c5SYu Xiangning so = kmem_cache_alloc(socket_cache, kmflags); 12420f1702c5SYu Xiangning if (so == NULL) { 12430f1702c5SYu Xiangning *errorp = ENOMEM; 12440f1702c5SYu Xiangning return (NULL); 12450f1702c5SYu Xiangning } 12460f1702c5SYu Xiangning 12470f1702c5SYu Xiangning sonode_init(so, sp, family, type, protocol, &so_sonodeops); 12480f1702c5SYu Xiangning 12490f1702c5SYu Xiangning if (version == SOV_DEFAULT) 12500f1702c5SYu Xiangning version = so_default_version; 12510f1702c5SYu Xiangning 12520f1702c5SYu Xiangning so->so_version = (short)version; 12530f1702c5SYu Xiangning 12540f1702c5SYu Xiangning /* 12550f1702c5SYu Xiangning * set the default values to be INFPSZ 12560f1702c5SYu Xiangning * if a protocol desires it can change the value later 12570f1702c5SYu Xiangning */ 12580f1702c5SYu Xiangning so->so_proto_props.sopp_rxhiwat = SOCKET_RECVHIWATER; 12590f1702c5SYu Xiangning so->so_proto_props.sopp_rxlowat = SOCKET_RECVLOWATER; 12600f1702c5SYu Xiangning so->so_proto_props.sopp_maxpsz = INFPSZ; 12610f1702c5SYu Xiangning so->so_proto_props.sopp_maxblk = INFPSZ; 12620f1702c5SYu Xiangning 12630f1702c5SYu Xiangning return (so); 12640f1702c5SYu Xiangning } 12650f1702c5SYu Xiangning 12660f1702c5SYu Xiangning int 12670f1702c5SYu Xiangning socket_init_common(struct sonode *so, struct sonode *pso, int flags, cred_t *cr) 12680f1702c5SYu Xiangning { 12690f1702c5SYu Xiangning int error = 0; 12700f1702c5SYu Xiangning 12710f1702c5SYu Xiangning if (pso != NULL) { 12720f1702c5SYu Xiangning /* 12730f1702c5SYu Xiangning * We have a passive open, so inherit basic state from 12740f1702c5SYu Xiangning * the parent (listener). 12750f1702c5SYu Xiangning * 12760f1702c5SYu Xiangning * No need to grab the new sonode's lock, since there is no 12770f1702c5SYu Xiangning * one that can have a reference to it. 12780f1702c5SYu Xiangning */ 12790f1702c5SYu Xiangning mutex_enter(&pso->so_lock); 12800f1702c5SYu Xiangning 12810f1702c5SYu Xiangning so->so_state |= SS_ISCONNECTED | (pso->so_state & SS_ASYNC); 12820f1702c5SYu Xiangning so->so_pgrp = pso->so_pgrp; 12830f1702c5SYu Xiangning so->so_rcvtimeo = pso->so_rcvtimeo; 12840f1702c5SYu Xiangning so->so_sndtimeo = pso->so_sndtimeo; 1285a5adac4dSYu Xiangning so->so_xpg_rcvbuf = pso->so_xpg_rcvbuf; 12860f1702c5SYu Xiangning /* 12870f1702c5SYu Xiangning * Make note of the socket level options. TCP and IP level 12880f1702c5SYu Xiangning * options are already inherited. We could do all this after 12890f1702c5SYu Xiangning * accept is successful but doing it here simplifies code and 12900f1702c5SYu Xiangning * no harm done for error case. 12910f1702c5SYu Xiangning */ 12920f1702c5SYu Xiangning so->so_options = pso->so_options & (SO_DEBUG|SO_REUSEADDR| 12930f1702c5SYu Xiangning SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 12940f1702c5SYu Xiangning SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 12950f1702c5SYu Xiangning so->so_proto_props = pso->so_proto_props; 12960f1702c5SYu Xiangning so->so_mode = pso->so_mode; 1297f0267584Sanders so->so_pollev = pso->so_pollev & SO_POLLEV_ALWAYS; 12980f1702c5SYu Xiangning 12990f1702c5SYu Xiangning mutex_exit(&pso->so_lock); 13003e95bd4aSAnders Persson 13013e95bd4aSAnders Persson /* 13023e95bd4aSAnders Persson * If the parent has any filters, try to inherit them. 13033e95bd4aSAnders Persson */ 13043e95bd4aSAnders Persson if (pso->so_filter_active > 0 && 13053e95bd4aSAnders Persson (error = sof_sonode_inherit_filters(so, pso)) != 0) 13063e95bd4aSAnders Persson return (error); 13073e95bd4aSAnders Persson 13080f1702c5SYu Xiangning } else { 13090f1702c5SYu Xiangning struct sockparams *sp = so->so_sockparams; 13100f1702c5SYu Xiangning sock_upcalls_t *upcalls_to_use; 13110f1702c5SYu Xiangning 13120f1702c5SYu Xiangning /* 13133e95bd4aSAnders Persson * Attach automatic filters, if there are any. 13143e95bd4aSAnders Persson */ 13153e95bd4aSAnders Persson if (!list_is_empty(&sp->sp_auto_filters) && 13163e95bd4aSAnders Persson (error = sof_sonode_autoattach_filters(so, cr)) != 0) 13173e95bd4aSAnders Persson return (error); 13183e95bd4aSAnders Persson 13193e95bd4aSAnders Persson /* OK to attach filters */ 13203e95bd4aSAnders Persson so->so_state |= SS_FILOP_OK; 13213e95bd4aSAnders Persson 13223e95bd4aSAnders Persson /* 13230f1702c5SYu Xiangning * Based on the version number select the right upcalls to 13240f1702c5SYu Xiangning * pass down. Currently we only have one version so choose 13250f1702c5SYu Xiangning * default 13260f1702c5SYu Xiangning */ 13270f1702c5SYu Xiangning upcalls_to_use = &so_upcalls; 13280f1702c5SYu Xiangning 13290f1702c5SYu Xiangning /* active open, so create a lower handle */ 13300f1702c5SYu Xiangning so->so_proto_handle = 13310f1702c5SYu Xiangning sp->sp_smod_info->smod_proto_create_func(so->so_family, 13320f1702c5SYu Xiangning so->so_type, so->so_protocol, &so->so_downcalls, 13330f1702c5SYu Xiangning &so->so_mode, &error, flags, cr); 13340f1702c5SYu Xiangning 13350f1702c5SYu Xiangning if (so->so_proto_handle == NULL) { 13360f1702c5SYu Xiangning ASSERT(error != 0); 13370f1702c5SYu Xiangning /* 13380f1702c5SYu Xiangning * To be safe; if a lower handle cannot be created, and 13390f1702c5SYu Xiangning * the proto does not give a reason why, assume there 13400f1702c5SYu Xiangning * was a lack of memory. 13410f1702c5SYu Xiangning */ 13420f1702c5SYu Xiangning return ((error == 0) ? ENOMEM : error); 13430f1702c5SYu Xiangning } 13440f1702c5SYu Xiangning ASSERT(so->so_downcalls != NULL); 13450f1702c5SYu Xiangning ASSERT(so->so_downcalls->sd_send != NULL || 13460f1702c5SYu Xiangning so->so_downcalls->sd_send_uio != NULL); 13470f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) { 13480f1702c5SYu Xiangning ASSERT(so->so_downcalls->sd_poll != NULL); 13490f1702c5SYu Xiangning so->so_pollev |= SO_POLLEV_ALWAYS; 13500f1702c5SYu Xiangning } 13510f1702c5SYu Xiangning 13520f1702c5SYu Xiangning (*so->so_downcalls->sd_activate)(so->so_proto_handle, 13530f1702c5SYu Xiangning (sock_upper_handle_t)so, upcalls_to_use, 0, cr); 13540f1702c5SYu Xiangning 13550f1702c5SYu Xiangning /* Wildcard */ 13560f1702c5SYu Xiangning 13570f1702c5SYu Xiangning /* 13580f1702c5SYu Xiangning * FIXME No need for this, the protocol can deal with it in 13590f1702c5SYu Xiangning * sd_create(). Should update ICMP. 13600f1702c5SYu Xiangning */ 13610f1702c5SYu Xiangning if (so->so_protocol != so->so_sockparams->sp_protocol) { 13620f1702c5SYu Xiangning int protocol = so->so_protocol; 13630f1702c5SYu Xiangning int error; 13640f1702c5SYu Xiangning /* 13650f1702c5SYu Xiangning * Issue SO_PROTOTYPE setsockopt. 13660f1702c5SYu Xiangning */ 13670f1702c5SYu Xiangning error = socket_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 13680f1702c5SYu Xiangning &protocol, (t_uscalar_t)sizeof (protocol), cr); 13690f1702c5SYu Xiangning if (error) { 13700f1702c5SYu Xiangning (void) (*so->so_downcalls->sd_close) 13710f1702c5SYu Xiangning (so->so_proto_handle, 0, cr); 13720f1702c5SYu Xiangning 13730f1702c5SYu Xiangning mutex_enter(&so->so_lock); 13740f1702c5SYu Xiangning so_rcv_flush(so); 13750f1702c5SYu Xiangning mutex_exit(&so->so_lock); 13760f1702c5SYu Xiangning /* 13770f1702c5SYu Xiangning * Setsockopt often fails with ENOPROTOOPT but 13780f1702c5SYu Xiangning * socket() should fail with 13790f1702c5SYu Xiangning * EPROTONOSUPPORT/EPROTOTYPE. 13800f1702c5SYu Xiangning */ 13810f1702c5SYu Xiangning return (EPROTONOSUPPORT); 13820f1702c5SYu Xiangning } 13830f1702c5SYu Xiangning } 13840f1702c5SYu Xiangning } 1385bbc000e5SAnders Persson 1386bbc000e5SAnders Persson if (uioasync.enabled) 1387bbc000e5SAnders Persson sod_sock_init(so); 1388bbc000e5SAnders Persson 13893e95bd4aSAnders Persson /* put an extra reference on the socket for the protocol */ 13903e95bd4aSAnders Persson VN_HOLD(SOTOV(so)); 13913e95bd4aSAnders Persson 1392bbc000e5SAnders Persson return (0); 13930f1702c5SYu Xiangning } 13940f1702c5SYu Xiangning 13950f1702c5SYu Xiangning /* 13960f1702c5SYu Xiangning * int socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 13970f1702c5SYu Xiangning * struct cred *cr, int32_t *rvalp) 13980f1702c5SYu Xiangning * 13990f1702c5SYu Xiangning * Handle ioctls that manipulate basic socket state; non-blocking, 14000f1702c5SYu Xiangning * async, etc. 14010f1702c5SYu Xiangning * 14020f1702c5SYu Xiangning * Returns: 14030f1702c5SYu Xiangning * < 0 - ioctl was not handle 14040f1702c5SYu Xiangning * >= 0 - ioctl was handled, if > 0, then it is an errno 14050f1702c5SYu Xiangning * 14060f1702c5SYu Xiangning * Notes: 14070f1702c5SYu Xiangning * Assumes the standard receive buffer is used to obtain info for 14080f1702c5SYu Xiangning * NREAD. 14090f1702c5SYu Xiangning */ 14100f1702c5SYu Xiangning /* ARGSUSED */ 14110f1702c5SYu Xiangning int 14120f1702c5SYu Xiangning socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 14130f1702c5SYu Xiangning struct cred *cr, int32_t *rvalp) 14140f1702c5SYu Xiangning { 14150f1702c5SYu Xiangning switch (cmd) { 1416bfcb55b8SRao Shoaib case SIOCSQPTR: 1417bfcb55b8SRao Shoaib /* 1418bfcb55b8SRao Shoaib * SIOCSQPTR is valid only when helper stream is created 1419bfcb55b8SRao Shoaib * by the protocol. 1420bfcb55b8SRao Shoaib */ 1421bfcb55b8SRao Shoaib 1422bfcb55b8SRao Shoaib return (EOPNOTSUPP); 14230f1702c5SYu Xiangning case FIONBIO: { 14240f1702c5SYu Xiangning int32_t value; 14250f1702c5SYu Xiangning 14260f1702c5SYu Xiangning if (so_copyin((void *)arg, &value, sizeof (int32_t), 14270f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14280f1702c5SYu Xiangning return (EFAULT); 14290f1702c5SYu Xiangning 14300f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14310f1702c5SYu Xiangning if (value) { 14320f1702c5SYu Xiangning so->so_state |= SS_NDELAY; 14330f1702c5SYu Xiangning } else { 14340f1702c5SYu Xiangning so->so_state &= ~SS_NDELAY; 14350f1702c5SYu Xiangning } 14360f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14370f1702c5SYu Xiangning return (0); 14380f1702c5SYu Xiangning } 14390f1702c5SYu Xiangning case FIOASYNC: { 14400f1702c5SYu Xiangning int32_t value; 14410f1702c5SYu Xiangning 14420f1702c5SYu Xiangning if (so_copyin((void *)arg, &value, sizeof (int32_t), 14430f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14440f1702c5SYu Xiangning return (EFAULT); 14450f1702c5SYu Xiangning 14460f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14470f1702c5SYu Xiangning 14480f1702c5SYu Xiangning if (value) { 14490f1702c5SYu Xiangning /* Turn on SIGIO */ 14500f1702c5SYu Xiangning so->so_state |= SS_ASYNC; 14510f1702c5SYu Xiangning } else { 14520f1702c5SYu Xiangning /* Turn off SIGIO */ 14530f1702c5SYu Xiangning so->so_state &= ~SS_ASYNC; 14540f1702c5SYu Xiangning } 14550f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14560f1702c5SYu Xiangning 14570f1702c5SYu Xiangning return (0); 14580f1702c5SYu Xiangning } 14590f1702c5SYu Xiangning 14600f1702c5SYu Xiangning case SIOCSPGRP: 14610f1702c5SYu Xiangning case FIOSETOWN: { 14620f1702c5SYu Xiangning int error; 14630f1702c5SYu Xiangning pid_t pid; 14640f1702c5SYu Xiangning 14650f1702c5SYu Xiangning if (so_copyin((void *)arg, &pid, sizeof (pid_t), 14660f1702c5SYu Xiangning (mode & (int)FKIOCTL))) 14670f1702c5SYu Xiangning return (EFAULT); 14680f1702c5SYu Xiangning 14690f1702c5SYu Xiangning mutex_enter(&so->so_lock); 14700f1702c5SYu Xiangning error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0; 14710f1702c5SYu Xiangning mutex_exit(&so->so_lock); 14720f1702c5SYu Xiangning return (error); 14730f1702c5SYu Xiangning } 14740f1702c5SYu Xiangning case SIOCGPGRP: 14750f1702c5SYu Xiangning case FIOGETOWN: 14760f1702c5SYu Xiangning if (so_copyout(&so->so_pgrp, (void *)arg, 14770f1702c5SYu Xiangning sizeof (pid_t), (mode & (int)FKIOCTL))) 14780f1702c5SYu Xiangning return (EFAULT); 14790f1702c5SYu Xiangning 14800f1702c5SYu Xiangning return (0); 14810f1702c5SYu Xiangning case SIOCATMARK: { 14820f1702c5SYu Xiangning int retval; 14830f1702c5SYu Xiangning 14840f1702c5SYu Xiangning /* 14850f1702c5SYu Xiangning * Only protocols that support urgent data can handle ATMARK. 14860f1702c5SYu Xiangning */ 14870f1702c5SYu Xiangning if ((so->so_mode & SM_EXDATA) == 0) 14880f1702c5SYu Xiangning return (EINVAL); 14890f1702c5SYu Xiangning 14900f1702c5SYu Xiangning /* 14910f1702c5SYu Xiangning * If the protocol is maintaining its own buffer, then the 14920f1702c5SYu Xiangning * request must be passed down. 14930f1702c5SYu Xiangning */ 14940f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) 14950f1702c5SYu Xiangning return (-1); 14960f1702c5SYu Xiangning 14970f1702c5SYu Xiangning retval = (so->so_state & SS_RCVATMARK) != 0; 14980f1702c5SYu Xiangning 14990f1702c5SYu Xiangning if (so_copyout(&retval, (void *)arg, sizeof (int), 15000f1702c5SYu Xiangning (mode & (int)FKIOCTL))) { 15010f1702c5SYu Xiangning return (EFAULT); 15020f1702c5SYu Xiangning } 15030f1702c5SYu Xiangning return (0); 15040f1702c5SYu Xiangning } 15050f1702c5SYu Xiangning 15060f1702c5SYu Xiangning case FIONREAD: { 15070f1702c5SYu Xiangning int retval; 15080f1702c5SYu Xiangning 15090f1702c5SYu Xiangning /* 15100f1702c5SYu Xiangning * If the protocol is maintaining its own buffer, then the 15110f1702c5SYu Xiangning * request must be passed down. 15120f1702c5SYu Xiangning */ 15130f1702c5SYu Xiangning if (so->so_downcalls->sd_recv_uio != NULL) 15140f1702c5SYu Xiangning return (-1); 15150f1702c5SYu Xiangning 15160f1702c5SYu Xiangning retval = MIN(so->so_rcv_queued, INT_MAX); 15170f1702c5SYu Xiangning 15180f1702c5SYu Xiangning if (so_copyout(&retval, (void *)arg, 15190f1702c5SYu Xiangning sizeof (retval), (mode & (int)FKIOCTL))) { 15200f1702c5SYu Xiangning return (EFAULT); 15210f1702c5SYu Xiangning } 15220f1702c5SYu Xiangning return (0); 15230f1702c5SYu Xiangning } 15240f1702c5SYu Xiangning 15250f1702c5SYu Xiangning case _I_GETPEERCRED: { 15260f1702c5SYu Xiangning int error = 0; 15270f1702c5SYu Xiangning 15280f1702c5SYu Xiangning if ((mode & FKIOCTL) == 0) 15290f1702c5SYu Xiangning return (EINVAL); 15300f1702c5SYu Xiangning 15310f1702c5SYu Xiangning mutex_enter(&so->so_lock); 15320f1702c5SYu Xiangning if ((so->so_mode & SM_CONNREQUIRED) == 0) { 15330f1702c5SYu Xiangning error = ENOTSUP; 15340f1702c5SYu Xiangning } else if ((so->so_state & SS_ISCONNECTED) == 0) { 15350f1702c5SYu Xiangning error = ENOTCONN; 15360f1702c5SYu Xiangning } else if (so->so_peercred != NULL) { 15370f1702c5SYu Xiangning k_peercred_t *kp = (k_peercred_t *)arg; 15380f1702c5SYu Xiangning kp->pc_cr = so->so_peercred; 15390f1702c5SYu Xiangning kp->pc_cpid = so->so_cpid; 15400f1702c5SYu Xiangning crhold(so->so_peercred); 15410f1702c5SYu Xiangning } else { 15420f1702c5SYu Xiangning error = EINVAL; 15430f1702c5SYu Xiangning } 15440f1702c5SYu Xiangning mutex_exit(&so->so_lock); 15450f1702c5SYu Xiangning return (error); 15460f1702c5SYu Xiangning } 15470f1702c5SYu Xiangning default: 15480f1702c5SYu Xiangning return (-1); 15490f1702c5SYu Xiangning } 15500f1702c5SYu Xiangning } 15510f1702c5SYu Xiangning 15520f1702c5SYu Xiangning /* 155341174437SAnders Persson * Handle the I_NREAD STREAM ioctl. 155441174437SAnders Persson */ 155541174437SAnders Persson static int 155641174437SAnders Persson so_strioc_nread(struct sonode *so, intptr_t arg, int mode, int32_t *rvalp) 155741174437SAnders Persson { 155841174437SAnders Persson size_t size = 0; 155941174437SAnders Persson int retval; 156041174437SAnders Persson int count = 0; 156141174437SAnders Persson mblk_t *mp; 1562d3d50737SRafael Vanoni clock_t wakeup = drv_usectohz(10); 156341174437SAnders Persson 156441174437SAnders Persson if (so->so_downcalls == NULL || 156541174437SAnders Persson so->so_downcalls->sd_recv_uio != NULL) 156641174437SAnders Persson return (EINVAL); 156741174437SAnders Persson 156841174437SAnders Persson mutex_enter(&so->so_lock); 156941174437SAnders Persson /* Wait for reader to get out of the way. */ 157041174437SAnders Persson while (so->so_flag & SOREADLOCKED) { 157141174437SAnders Persson /* 157241174437SAnders Persson * If reader is waiting for data, then there should be nothing 157341174437SAnders Persson * on the rcv queue. 157441174437SAnders Persson */ 157541174437SAnders Persson if (so->so_rcv_wakeup) 157641174437SAnders Persson goto out; 157741174437SAnders Persson 157841174437SAnders Persson /* Do a timed sleep, in case the reader goes to sleep. */ 1579decd6ccdSAnders Persson (void) cv_reltimedwait(&so->so_read_cv, &so->so_lock, wakeup, 1580d3d50737SRafael Vanoni TR_CLOCK_TICK); 158141174437SAnders Persson } 158241174437SAnders Persson 158341174437SAnders Persson /* 158441174437SAnders Persson * Since we are holding so_lock no new reader will come in, and the 158541174437SAnders Persson * protocol will not be able to enqueue data. So it's safe to walk 158641174437SAnders Persson * both rcv queues. 158741174437SAnders Persson */ 158841174437SAnders Persson mp = so->so_rcv_q_head; 158941174437SAnders Persson if (mp != NULL) { 159041174437SAnders Persson size = msgdsize(so->so_rcv_q_head); 159141174437SAnders Persson for (; mp != NULL; mp = mp->b_next) 159241174437SAnders Persson count++; 159341174437SAnders Persson } else { 159441174437SAnders Persson /* 159541174437SAnders Persson * In case the processing list was empty, get the size of the 159641174437SAnders Persson * next msg in line. 159741174437SAnders Persson */ 159841174437SAnders Persson size = msgdsize(so->so_rcv_head); 159941174437SAnders Persson } 160041174437SAnders Persson 160141174437SAnders Persson for (mp = so->so_rcv_head; mp != NULL; mp = mp->b_next) 160241174437SAnders Persson count++; 160341174437SAnders Persson out: 160441174437SAnders Persson mutex_exit(&so->so_lock); 160541174437SAnders Persson 160641174437SAnders Persson /* 160741174437SAnders Persson * Drop down from size_t to the "int" required by the 160841174437SAnders Persson * interface. Cap at INT_MAX. 160941174437SAnders Persson */ 161041174437SAnders Persson retval = MIN(size, INT_MAX); 161141174437SAnders Persson if (so_copyout(&retval, (void *)arg, sizeof (retval), 161241174437SAnders Persson (mode & (int)FKIOCTL))) { 161341174437SAnders Persson return (EFAULT); 161441174437SAnders Persson } else { 161541174437SAnders Persson *rvalp = count; 161641174437SAnders Persson return (0); 161741174437SAnders Persson } 161841174437SAnders Persson } 161941174437SAnders Persson 162041174437SAnders Persson /* 162141174437SAnders Persson * Process STREAM ioctls. 16220f1702c5SYu Xiangning * 16230f1702c5SYu Xiangning * Returns: 16240f1702c5SYu Xiangning * < 0 - ioctl was not handle 16250f1702c5SYu Xiangning * >= 0 - ioctl was handled, if > 0, then it is an errno 16260f1702c5SYu Xiangning */ 16270f1702c5SYu Xiangning int 16280f1702c5SYu Xiangning socket_strioc_common(struct sonode *so, int cmd, intptr_t arg, int mode, 16290f1702c5SYu Xiangning struct cred *cr, int32_t *rvalp) 16300f1702c5SYu Xiangning { 16310f1702c5SYu Xiangning int retval; 16320f1702c5SYu Xiangning 163341174437SAnders Persson /* Only STREAM iotcls are handled here */ 163441174437SAnders Persson if ((cmd & 0xffffff00U) != STR) 163541174437SAnders Persson return (-1); 163641174437SAnders Persson 163741174437SAnders Persson switch (cmd) { 163841174437SAnders Persson case I_CANPUT: 163941174437SAnders Persson /* 164041174437SAnders Persson * We return an error for I_CANPUT so that isastream(3C) will 164141174437SAnders Persson * not report the socket as being a STREAM. 164241174437SAnders Persson */ 164341174437SAnders Persson return (EOPNOTSUPP); 164441174437SAnders Persson case I_NREAD: 164541174437SAnders Persson /* Avoid doing a fallback for I_NREAD. */ 164641174437SAnders Persson return (so_strioc_nread(so, arg, mode, rvalp)); 16470f1702c5SYu Xiangning case I_LOOK: 164841174437SAnders Persson /* Avoid doing a fallback for I_LOOK. */ 16490f1702c5SYu Xiangning if (so_copyout("sockmod", (void *)arg, strlen("sockmod") + 1, 16500f1702c5SYu Xiangning (mode & (int)FKIOCTL))) { 16510f1702c5SYu Xiangning return (EFAULT); 16520f1702c5SYu Xiangning } 16530f1702c5SYu Xiangning return (0); 16540f1702c5SYu Xiangning default: 165541174437SAnders Persson break; 165641174437SAnders Persson } 165741174437SAnders Persson 165841174437SAnders Persson /* 165941174437SAnders Persson * Try to fall back to TPI, and if successful, reissue the ioctl. 166041174437SAnders Persson */ 166141174437SAnders Persson if ((retval = so_tpi_fallback(so, cr)) == 0) { 166241174437SAnders Persson /* Reissue the ioctl */ 166341174437SAnders Persson ASSERT(so->so_rcv_q_head == NULL); 166441174437SAnders Persson return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 166541174437SAnders Persson } else { 166641174437SAnders Persson return (retval); 16670f1702c5SYu Xiangning } 16680f1702c5SYu Xiangning } 16690f1702c5SYu Xiangning 16702c632ad5SAnders Persson /* 16712c632ad5SAnders Persson * This is called for all socket types to verify that the buffer size is large 16722c632ad5SAnders Persson * enough for the option, and if we can, handle the request as well. Most 16732c632ad5SAnders Persson * options will be forwarded to the protocol. 16742c632ad5SAnders Persson */ 16750f1702c5SYu Xiangning int 16760f1702c5SYu Xiangning socket_getopt_common(struct sonode *so, int level, int option_name, 1677a5adac4dSYu Xiangning void *optval, socklen_t *optlenp, int flags) 16780f1702c5SYu Xiangning { 16790f1702c5SYu Xiangning if (level != SOL_SOCKET) 16800f1702c5SYu Xiangning return (-1); 16810f1702c5SYu Xiangning 16820f1702c5SYu Xiangning switch (option_name) { 16830f1702c5SYu Xiangning case SO_ERROR: 16840f1702c5SYu Xiangning case SO_DOMAIN: 16850f1702c5SYu Xiangning case SO_TYPE: 16860f1702c5SYu Xiangning case SO_ACCEPTCONN: { 16870f1702c5SYu Xiangning int32_t value; 16880f1702c5SYu Xiangning socklen_t optlen = *optlenp; 16890f1702c5SYu Xiangning 16900f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) { 16910f1702c5SYu Xiangning return (EINVAL); 16920f1702c5SYu Xiangning } 16930f1702c5SYu Xiangning 16940f1702c5SYu Xiangning switch (option_name) { 16950f1702c5SYu Xiangning case SO_ERROR: 16960f1702c5SYu Xiangning mutex_enter(&so->so_lock); 16970f1702c5SYu Xiangning value = sogeterr(so, B_TRUE); 16980f1702c5SYu Xiangning mutex_exit(&so->so_lock); 16990f1702c5SYu Xiangning break; 17000f1702c5SYu Xiangning case SO_DOMAIN: 17010f1702c5SYu Xiangning value = so->so_family; 17020f1702c5SYu Xiangning break; 17030f1702c5SYu Xiangning case SO_TYPE: 17040f1702c5SYu Xiangning value = so->so_type; 17050f1702c5SYu Xiangning break; 17060f1702c5SYu Xiangning case SO_ACCEPTCONN: 17070f1702c5SYu Xiangning if (so->so_state & SS_ACCEPTCONN) 17080f1702c5SYu Xiangning value = SO_ACCEPTCONN; 17090f1702c5SYu Xiangning else 17100f1702c5SYu Xiangning value = 0; 17110f1702c5SYu Xiangning break; 17120f1702c5SYu Xiangning } 17130f1702c5SYu Xiangning 17140f1702c5SYu Xiangning bcopy(&value, optval, sizeof (value)); 17150f1702c5SYu Xiangning *optlenp = sizeof (value); 17160f1702c5SYu Xiangning 17170f1702c5SYu Xiangning return (0); 17180f1702c5SYu Xiangning } 17190f1702c5SYu Xiangning case SO_SNDTIMEO: 17200f1702c5SYu Xiangning case SO_RCVTIMEO: { 17210f1702c5SYu Xiangning clock_t value; 17220f1702c5SYu Xiangning socklen_t optlen = *optlenp; 1723e5083e81Sshenjian 1724e5083e81Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 1725e5083e81Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 172622238f73Sshenjian if (optlen < sizeof (struct timeval)) 172722238f73Sshenjian return (EINVAL); 172822238f73Sshenjian } else { 172922238f73Sshenjian if (optlen < sizeof (struct timeval32)) 17300f1702c5SYu Xiangning return (EINVAL); 17310f1702c5SYu Xiangning } 17320f1702c5SYu Xiangning if (option_name == SO_RCVTIMEO) 17330f1702c5SYu Xiangning value = drv_hztousec(so->so_rcvtimeo); 17340f1702c5SYu Xiangning else 17350f1702c5SYu Xiangning value = drv_hztousec(so->so_sndtimeo); 173622238f73Sshenjian 1737e5083e81Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 1738e5083e81Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 173922238f73Sshenjian ((struct timeval *)(optval))->tv_sec = 174022238f73Sshenjian value / (1000 * 1000); 174122238f73Sshenjian ((struct timeval *)(optval))->tv_usec = 174222238f73Sshenjian value % (1000 * 1000); 17430f1702c5SYu Xiangning *optlenp = sizeof (struct timeval); 174422238f73Sshenjian } else { 174522238f73Sshenjian ((struct timeval32 *)(optval))->tv_sec = 174622238f73Sshenjian value / (1000 * 1000); 174722238f73Sshenjian ((struct timeval32 *)(optval))->tv_usec = 174822238f73Sshenjian value % (1000 * 1000); 174922238f73Sshenjian *optlenp = sizeof (struct timeval32); 175022238f73Sshenjian } 17510f1702c5SYu Xiangning return (0); 17520f1702c5SYu Xiangning } 17530f1702c5SYu Xiangning case SO_DEBUG: 17540f1702c5SYu Xiangning case SO_REUSEADDR: 1755*78918900SArne Jansen case SO_REUSEPORT: 17560f1702c5SYu Xiangning case SO_KEEPALIVE: 17570f1702c5SYu Xiangning case SO_DONTROUTE: 17580f1702c5SYu Xiangning case SO_BROADCAST: 17590f1702c5SYu Xiangning case SO_USELOOPBACK: 17600f1702c5SYu Xiangning case SO_OOBINLINE: 17610f1702c5SYu Xiangning case SO_SNDBUF: 17620f1702c5SYu Xiangning #ifdef notyet 17630f1702c5SYu Xiangning case SO_SNDLOWAT: 17640f1702c5SYu Xiangning case SO_RCVLOWAT: 17650f1702c5SYu Xiangning #endif /* notyet */ 17660f1702c5SYu Xiangning case SO_DGRAM_ERRIND: { 17670f1702c5SYu Xiangning socklen_t optlen = *optlenp; 17680f1702c5SYu Xiangning 17690f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) 17700f1702c5SYu Xiangning return (EINVAL); 17710f1702c5SYu Xiangning break; 17720f1702c5SYu Xiangning } 1773a5adac4dSYu Xiangning case SO_RCVBUF: { 1774a5adac4dSYu Xiangning socklen_t optlen = *optlenp; 1775a5adac4dSYu Xiangning 1776a5adac4dSYu Xiangning if (optlen < (t_uscalar_t)sizeof (int32_t)) 1777a5adac4dSYu Xiangning return (EINVAL); 1778a5adac4dSYu Xiangning 1779a5adac4dSYu Xiangning if ((flags & _SOGETSOCKOPT_XPG4_2) && so->so_xpg_rcvbuf != 0) { 1780a5adac4dSYu Xiangning /* 1781a5adac4dSYu Xiangning * XXX If SO_RCVBUF has been set and this is an 1782a5adac4dSYu Xiangning * XPG 4.2 application then do not ask the transport 1783a5adac4dSYu Xiangning * since the transport might adjust the value and not 1784a5adac4dSYu Xiangning * return exactly what was set by the application. 1785a5adac4dSYu Xiangning * For non-XPG 4.2 application we return the value 1786a5adac4dSYu Xiangning * that the transport is actually using. 1787a5adac4dSYu Xiangning */ 1788a5adac4dSYu Xiangning *(int32_t *)optval = so->so_xpg_rcvbuf; 1789a5adac4dSYu Xiangning *optlenp = sizeof (so->so_xpg_rcvbuf); 1790a5adac4dSYu Xiangning return (0); 1791a5adac4dSYu Xiangning } 1792a5adac4dSYu Xiangning /* 1793a5adac4dSYu Xiangning * If the option has not been set then get a default 1794a5adac4dSYu Xiangning * value from the transport. 1795a5adac4dSYu Xiangning */ 1796a5adac4dSYu Xiangning break; 1797a5adac4dSYu Xiangning } 17980f1702c5SYu Xiangning case SO_LINGER: { 17990f1702c5SYu Xiangning socklen_t optlen = *optlenp; 18000f1702c5SYu Xiangning 18010f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (struct linger)) 18020f1702c5SYu Xiangning return (EINVAL); 18030f1702c5SYu Xiangning break; 18040f1702c5SYu Xiangning } 18050f1702c5SYu Xiangning case SO_SND_BUFINFO: { 18060f1702c5SYu Xiangning socklen_t optlen = *optlenp; 18070f1702c5SYu Xiangning 18080f1702c5SYu Xiangning if (optlen < (t_uscalar_t)sizeof (struct so_snd_bufinfo)) 18090f1702c5SYu Xiangning return (EINVAL); 18100f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_wroff = 18110f1702c5SYu Xiangning (so->so_proto_props).sopp_wroff; 18120f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_maxblk = 18130f1702c5SYu Xiangning (so->so_proto_props).sopp_maxblk; 18140f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_maxpsz = 18150f1702c5SYu Xiangning (so->so_proto_props).sopp_maxpsz; 18160f1702c5SYu Xiangning ((struct so_snd_bufinfo *)(optval))->sbi_tail = 18170f1702c5SYu Xiangning (so->so_proto_props).sopp_tail; 18180f1702c5SYu Xiangning *optlenp = sizeof (struct so_snd_bufinfo); 18190f1702c5SYu Xiangning return (0); 18200f1702c5SYu Xiangning } 18213e95bd4aSAnders Persson case SO_SND_COPYAVOID: { 18223e95bd4aSAnders Persson sof_instance_t *inst; 18233e95bd4aSAnders Persson 18243e95bd4aSAnders Persson /* 18253e95bd4aSAnders Persson * Avoid zero-copy if there is a filter with a data_out 18263e95bd4aSAnders Persson * callback. We could let the operation succeed, but then 18273e95bd4aSAnders Persson * the filter would have to copy the data anyway. 18283e95bd4aSAnders Persson */ 18293e95bd4aSAnders Persson for (inst = so->so_filter_top; inst != NULL; 18303e95bd4aSAnders Persson inst = inst->sofi_next) { 18313e95bd4aSAnders Persson if (SOF_INTERESTED(inst, data_out)) 18323e95bd4aSAnders Persson return (EOPNOTSUPP); 18333e95bd4aSAnders Persson } 18343e95bd4aSAnders Persson break; 18353e95bd4aSAnders Persson } 18363e95bd4aSAnders Persson 18370f1702c5SYu Xiangning default: 18380f1702c5SYu Xiangning break; 18390f1702c5SYu Xiangning } 18400f1702c5SYu Xiangning 18410f1702c5SYu Xiangning /* Unknown Option */ 18420f1702c5SYu Xiangning return (-1); 18430f1702c5SYu Xiangning } 18440f1702c5SYu Xiangning 18450f1702c5SYu Xiangning void 18460f1702c5SYu Xiangning socket_sonode_destroy(struct sonode *so) 18470f1702c5SYu Xiangning { 18480f1702c5SYu Xiangning sonode_fini(so); 18490f1702c5SYu Xiangning kmem_cache_free(socket_cache, so); 18500f1702c5SYu Xiangning } 18510f1702c5SYu Xiangning 18520f1702c5SYu Xiangning int 18530f1702c5SYu Xiangning so_zcopy_wait(struct sonode *so) 18540f1702c5SYu Xiangning { 18550f1702c5SYu Xiangning int error = 0; 18560f1702c5SYu Xiangning 18570f1702c5SYu Xiangning mutex_enter(&so->so_lock); 18580f1702c5SYu Xiangning while (!(so->so_copyflag & STZCNOTIFY)) { 18590f1702c5SYu Xiangning if (so->so_state & SS_CLOSING) { 18600f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18610f1702c5SYu Xiangning return (EINTR); 18620f1702c5SYu Xiangning } 18630f1702c5SYu Xiangning if (cv_wait_sig(&so->so_copy_cv, &so->so_lock) == 0) { 18640f1702c5SYu Xiangning error = EINTR; 18650f1702c5SYu Xiangning break; 18660f1702c5SYu Xiangning } 18670f1702c5SYu Xiangning } 18680f1702c5SYu Xiangning so->so_copyflag &= ~STZCNOTIFY; 18690f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18700f1702c5SYu Xiangning return (error); 18710f1702c5SYu Xiangning } 18720f1702c5SYu Xiangning 18730f1702c5SYu Xiangning void 18740f1702c5SYu Xiangning so_timer_callback(void *arg) 18750f1702c5SYu Xiangning { 18760f1702c5SYu Xiangning struct sonode *so = (struct sonode *)arg; 18770f1702c5SYu Xiangning 18780f1702c5SYu Xiangning mutex_enter(&so->so_lock); 18790f1702c5SYu Xiangning 18800f1702c5SYu Xiangning so->so_rcv_timer_tid = 0; 18810f1702c5SYu Xiangning if (so->so_rcv_queued > 0) { 18820f1702c5SYu Xiangning so_notify_data(so, so->so_rcv_queued); 18830f1702c5SYu Xiangning } else { 18840f1702c5SYu Xiangning mutex_exit(&so->so_lock); 18850f1702c5SYu Xiangning } 18860f1702c5SYu Xiangning } 18870f1702c5SYu Xiangning 18880f1702c5SYu Xiangning #ifdef DEBUG 18890f1702c5SYu Xiangning /* 18900f1702c5SYu Xiangning * Verify that the length stored in so_rcv_queued and the length of data blocks 18910f1702c5SYu Xiangning * queued is same. 18920f1702c5SYu Xiangning */ 18930f1702c5SYu Xiangning static boolean_t 18940f1702c5SYu Xiangning so_check_length(sonode_t *so) 18950f1702c5SYu Xiangning { 18960f1702c5SYu Xiangning mblk_t *mp = so->so_rcv_q_head; 18970f1702c5SYu Xiangning int len = 0; 18980f1702c5SYu Xiangning 18990f1702c5SYu Xiangning ASSERT(MUTEX_HELD(&so->so_lock)); 19000f1702c5SYu Xiangning 19010f1702c5SYu Xiangning if (mp != NULL) { 19020f1702c5SYu Xiangning len = msgdsize(mp); 19030f1702c5SYu Xiangning while ((mp = mp->b_next) != NULL) 19040f1702c5SYu Xiangning len += msgdsize(mp); 19050f1702c5SYu Xiangning } 19060f1702c5SYu Xiangning mp = so->so_rcv_head; 19070f1702c5SYu Xiangning if (mp != NULL) { 19080f1702c5SYu Xiangning len += msgdsize(mp); 19090f1702c5SYu Xiangning while ((mp = mp->b_next) != NULL) 19100f1702c5SYu Xiangning len += msgdsize(mp); 19110f1702c5SYu Xiangning } 19120f1702c5SYu Xiangning return ((len == so->so_rcv_queued) ? B_TRUE : B_FALSE); 19130f1702c5SYu Xiangning } 19140f1702c5SYu Xiangning #endif 19150f1702c5SYu Xiangning 19160f1702c5SYu Xiangning int 19170f1702c5SYu Xiangning so_get_mod_version(struct sockparams *sp) 19180f1702c5SYu Xiangning { 19190f1702c5SYu Xiangning ASSERT(sp != NULL && sp->sp_smod_info != NULL); 19200f1702c5SYu Xiangning return (sp->sp_smod_info->smod_version); 19210f1702c5SYu Xiangning } 19220f1702c5SYu Xiangning 19230f1702c5SYu Xiangning /* 19240f1702c5SYu Xiangning * so_start_fallback() 19250f1702c5SYu Xiangning * 19260f1702c5SYu Xiangning * Block new socket operations from coming in, and wait for active operations 19270f1702c5SYu Xiangning * to complete. Threads that are sleeping will be woken up so they can get 19280f1702c5SYu Xiangning * out of the way. 19290f1702c5SYu Xiangning * 19300f1702c5SYu Xiangning * The caller must be a reader on so_fallback_rwlock. 19310f1702c5SYu Xiangning */ 19320f1702c5SYu Xiangning static boolean_t 19330f1702c5SYu Xiangning so_start_fallback(struct sonode *so) 19340f1702c5SYu Xiangning { 19350f1702c5SYu Xiangning ASSERT(RW_READ_HELD(&so->so_fallback_rwlock)); 19360f1702c5SYu Xiangning 19370f1702c5SYu Xiangning mutex_enter(&so->so_lock); 19380f1702c5SYu Xiangning if (so->so_state & SS_FALLBACK_PENDING) { 19390f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19400f1702c5SYu Xiangning return (B_FALSE); 19410f1702c5SYu Xiangning } 19420f1702c5SYu Xiangning so->so_state |= SS_FALLBACK_PENDING; 19430f1702c5SYu Xiangning /* 19440f1702c5SYu Xiangning * Poke all threads that might be sleeping. Any operation that comes 19450f1702c5SYu Xiangning * in after the cv_broadcast will observe the fallback pending flag 19460f1702c5SYu Xiangning * which cause the call to return where it would normally sleep. 19470f1702c5SYu Xiangning */ 19480f1702c5SYu Xiangning cv_broadcast(&so->so_state_cv); /* threads in connect() */ 19490f1702c5SYu Xiangning cv_broadcast(&so->so_rcv_cv); /* threads in recvmsg() */ 19500f1702c5SYu Xiangning cv_broadcast(&so->so_snd_cv); /* threads in sendmsg() */ 19510f1702c5SYu Xiangning mutex_enter(&so->so_acceptq_lock); 19520f1702c5SYu Xiangning cv_broadcast(&so->so_acceptq_cv); /* threads in accept() */ 19530f1702c5SYu Xiangning mutex_exit(&so->so_acceptq_lock); 19540f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19550f1702c5SYu Xiangning 19560f1702c5SYu Xiangning /* 19570f1702c5SYu Xiangning * The main reason for the rw_tryupgrade call is to provide 19580f1702c5SYu Xiangning * observability during the fallback process. We want to 19590f1702c5SYu Xiangning * be able to see if there are pending operations. 19600f1702c5SYu Xiangning */ 19610f1702c5SYu Xiangning if (rw_tryupgrade(&so->so_fallback_rwlock) == 0) { 19620f1702c5SYu Xiangning /* 19630f1702c5SYu Xiangning * It is safe to drop and reaquire the fallback lock, because 19640f1702c5SYu Xiangning * we are guaranteed that another fallback cannot take place. 19650f1702c5SYu Xiangning */ 19660f1702c5SYu Xiangning rw_exit(&so->so_fallback_rwlock); 19670f1702c5SYu Xiangning DTRACE_PROBE1(pending__ops__wait, (struct sonode *), so); 19680f1702c5SYu Xiangning rw_enter(&so->so_fallback_rwlock, RW_WRITER); 19690f1702c5SYu Xiangning DTRACE_PROBE1(pending__ops__complete, (struct sonode *), so); 19700f1702c5SYu Xiangning } 19710f1702c5SYu Xiangning 19720f1702c5SYu Xiangning return (B_TRUE); 19730f1702c5SYu Xiangning } 19740f1702c5SYu Xiangning 19750f1702c5SYu Xiangning /* 19760f1702c5SYu Xiangning * so_end_fallback() 19770f1702c5SYu Xiangning * 19780f1702c5SYu Xiangning * Allow socket opertions back in. 19790f1702c5SYu Xiangning * 19800f1702c5SYu Xiangning * The caller must be a writer on so_fallback_rwlock. 19810f1702c5SYu Xiangning */ 19820f1702c5SYu Xiangning static void 19830f1702c5SYu Xiangning so_end_fallback(struct sonode *so) 19840f1702c5SYu Xiangning { 19850f1702c5SYu Xiangning ASSERT(RW_ISWRITER(&so->so_fallback_rwlock)); 19860f1702c5SYu Xiangning 19870f1702c5SYu Xiangning mutex_enter(&so->so_lock); 198841174437SAnders Persson so->so_state &= ~(SS_FALLBACK_PENDING|SS_FALLBACK_DRAIN); 19890f1702c5SYu Xiangning mutex_exit(&so->so_lock); 19900f1702c5SYu Xiangning 19910f1702c5SYu Xiangning rw_downgrade(&so->so_fallback_rwlock); 19920f1702c5SYu Xiangning } 19930f1702c5SYu Xiangning 19940f1702c5SYu Xiangning /* 19950f1702c5SYu Xiangning * so_quiesced_cb() 19960f1702c5SYu Xiangning * 19970f1702c5SYu Xiangning * Callback passed to the protocol during fallback. It is called once 19980f1702c5SYu Xiangning * the endpoint is quiescent. 19990f1702c5SYu Xiangning * 20000f1702c5SYu Xiangning * No requests from the user, no notifications from the protocol, so it 20010f1702c5SYu Xiangning * is safe to synchronize the state. Data can also be moved without 20020f1702c5SYu Xiangning * risk for reordering. 20030f1702c5SYu Xiangning * 20040f1702c5SYu Xiangning * We do not need to hold so_lock, since there can be only one thread 20050f1702c5SYu Xiangning * operating on the sonode. 20060f1702c5SYu Xiangning */ 20073e95bd4aSAnders Persson static mblk_t * 20083e95bd4aSAnders Persson so_quiesced_cb(sock_upper_handle_t sock_handle, sock_quiesce_arg_t *arg, 20093e95bd4aSAnders Persson struct T_capability_ack *tcap, 20103e95bd4aSAnders Persson struct sockaddr *laddr, socklen_t laddrlen, 20110f1702c5SYu Xiangning struct sockaddr *faddr, socklen_t faddrlen, short opts) 20120f1702c5SYu Xiangning { 20130f1702c5SYu Xiangning struct sonode *so = (struct sonode *)sock_handle; 201441174437SAnders Persson boolean_t atmark; 20153e95bd4aSAnders Persson mblk_t *retmp = NULL, **tailmpp = &retmp; 20160f1702c5SYu Xiangning 20173e95bd4aSAnders Persson if (tcap != NULL) 20183e95bd4aSAnders Persson sotpi_update_state(so, tcap, laddr, laddrlen, faddr, faddrlen, 20193e95bd4aSAnders Persson opts); 20200f1702c5SYu Xiangning 202141174437SAnders Persson /* 202241174437SAnders Persson * Some protocols do not quiece the data path during fallback. Once 202341174437SAnders Persson * we set the SS_FALLBACK_DRAIN flag any attempt to queue data will 202441174437SAnders Persson * fail and the protocol is responsible for saving the data for later 202541174437SAnders Persson * delivery (i.e., once the fallback has completed). 202641174437SAnders Persson */ 20270f1702c5SYu Xiangning mutex_enter(&so->so_lock); 202841174437SAnders Persson so->so_state |= SS_FALLBACK_DRAIN; 20290f1702c5SYu Xiangning SOCKET_TIMER_CANCEL(so); 20300f1702c5SYu Xiangning mutex_exit(&so->so_lock); 203141174437SAnders Persson 20320f1702c5SYu Xiangning if (so->so_rcv_head != NULL) { 20330f1702c5SYu Xiangning if (so->so_rcv_q_last_head == NULL) 20340f1702c5SYu Xiangning so->so_rcv_q_head = so->so_rcv_head; 20350f1702c5SYu Xiangning else 20360f1702c5SYu Xiangning so->so_rcv_q_last_head->b_next = so->so_rcv_head; 20370f1702c5SYu Xiangning so->so_rcv_q_last_head = so->so_rcv_last_head; 20380f1702c5SYu Xiangning } 20390f1702c5SYu Xiangning 204041174437SAnders Persson atmark = (so->so_state & SS_RCVATMARK) != 0; 204141174437SAnders Persson /* 204241174437SAnders Persson * Clear any OOB state having to do with pending data. The TPI 204341174437SAnders Persson * code path will set the appropriate oob state when we move the 204441174437SAnders Persson * oob data to the STREAM head. We leave SS_HADOOBDATA since the oob 204541174437SAnders Persson * data has already been consumed. 204641174437SAnders Persson */ 204741174437SAnders Persson so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA); 204841174437SAnders Persson 204941174437SAnders Persson ASSERT(so->so_oobmsg != NULL || so->so_oobmark <= so->so_rcv_queued); 205041174437SAnders Persson 205141174437SAnders Persson /* 205241174437SAnders Persson * Move data to the STREAM head. 205341174437SAnders Persson */ 20540f1702c5SYu Xiangning while (so->so_rcv_q_head != NULL) { 20550f1702c5SYu Xiangning mblk_t *mp = so->so_rcv_q_head; 20560f1702c5SYu Xiangning size_t mlen = msgdsize(mp); 20570f1702c5SYu Xiangning 20580f1702c5SYu Xiangning so->so_rcv_q_head = mp->b_next; 20590f1702c5SYu Xiangning mp->b_next = NULL; 20600f1702c5SYu Xiangning mp->b_prev = NULL; 206141174437SAnders Persson 206241174437SAnders Persson /* 206341174437SAnders Persson * Send T_EXDATA_IND if we are at the oob mark. 206441174437SAnders Persson */ 206541174437SAnders Persson if (atmark) { 206641174437SAnders Persson struct T_exdata_ind *tei; 20673e95bd4aSAnders Persson mblk_t *mp1 = arg->soqa_exdata_mp; 206841174437SAnders Persson 20693e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 207041174437SAnders Persson ASSERT(mp1 != NULL); 207141174437SAnders Persson mp1->b_datap->db_type = M_PROTO; 207241174437SAnders Persson tei = (struct T_exdata_ind *)mp1->b_rptr; 207341174437SAnders Persson tei->PRIM_type = T_EXDATA_IND; 207441174437SAnders Persson tei->MORE_flag = 0; 207541174437SAnders Persson mp1->b_wptr = (uchar_t *)&tei[1]; 207641174437SAnders Persson 207741174437SAnders Persson if (IS_SO_OOB_INLINE(so)) { 207841174437SAnders Persson mp1->b_cont = mp; 207941174437SAnders Persson } else { 208041174437SAnders Persson ASSERT(so->so_oobmsg != NULL); 208141174437SAnders Persson mp1->b_cont = so->so_oobmsg; 208241174437SAnders Persson so->so_oobmsg = NULL; 208341174437SAnders Persson 208441174437SAnders Persson /* process current mp next time around */ 208541174437SAnders Persson mp->b_next = so->so_rcv_q_head; 208641174437SAnders Persson so->so_rcv_q_head = mp; 208741174437SAnders Persson mlen = 0; 208841174437SAnders Persson } 208941174437SAnders Persson mp = mp1; 209041174437SAnders Persson 209141174437SAnders Persson /* we have consumed the oob mark */ 209241174437SAnders Persson atmark = B_FALSE; 209341174437SAnders Persson } else if (so->so_oobmark > 0) { 209441174437SAnders Persson /* 209541174437SAnders Persson * Check if the OOB mark is within the current 209641174437SAnders Persson * mblk chain. In that case we have to split it up. 209741174437SAnders Persson */ 209841174437SAnders Persson if (so->so_oobmark < mlen) { 209941174437SAnders Persson mblk_t *urg_mp = mp; 210041174437SAnders Persson 210141174437SAnders Persson atmark = B_TRUE; 210241174437SAnders Persson mp = NULL; 210341174437SAnders Persson mlen = so->so_oobmark; 210441174437SAnders Persson 210541174437SAnders Persson /* 210641174437SAnders Persson * It is assumed that the OOB mark does 210741174437SAnders Persson * not land within a mblk. 210841174437SAnders Persson */ 210941174437SAnders Persson do { 211041174437SAnders Persson so->so_oobmark -= MBLKL(urg_mp); 211141174437SAnders Persson mp = urg_mp; 211241174437SAnders Persson urg_mp = urg_mp->b_cont; 211341174437SAnders Persson } while (so->so_oobmark > 0); 211441174437SAnders Persson mp->b_cont = NULL; 211541174437SAnders Persson if (urg_mp != NULL) { 211641174437SAnders Persson urg_mp->b_next = so->so_rcv_q_head; 211741174437SAnders Persson so->so_rcv_q_head = urg_mp; 211841174437SAnders Persson } 211941174437SAnders Persson } else { 212041174437SAnders Persson so->so_oobmark -= mlen; 212141174437SAnders Persson if (so->so_oobmark == 0) 212241174437SAnders Persson atmark = B_TRUE; 212341174437SAnders Persson } 212441174437SAnders Persson } 212541174437SAnders Persson 212641174437SAnders Persson /* 212741174437SAnders Persson * Queue data on the STREAM head. 212841174437SAnders Persson */ 21290f1702c5SYu Xiangning so->so_rcv_queued -= mlen; 21303e95bd4aSAnders Persson *tailmpp = mp; 21313e95bd4aSAnders Persson tailmpp = &mp->b_next; 21320f1702c5SYu Xiangning } 21330f1702c5SYu Xiangning so->so_rcv_head = NULL; 21340f1702c5SYu Xiangning so->so_rcv_last_head = NULL; 21350f1702c5SYu Xiangning so->so_rcv_q_head = NULL; 21360f1702c5SYu Xiangning so->so_rcv_q_last_head = NULL; 21370f1702c5SYu Xiangning 213841174437SAnders Persson /* 213941174437SAnders Persson * Check if the oob byte is at the end of the data stream, or if the 214041174437SAnders Persson * oob byte has not yet arrived. In the latter case we have to send a 214141174437SAnders Persson * SIGURG and a mark indicator to the STREAM head. The mark indicator 214241174437SAnders Persson * is needed to guarantee correct behavior for SIOCATMARK. See block 214341174437SAnders Persson * comment in socktpi.h for more details. 214441174437SAnders Persson */ 214541174437SAnders Persson if (atmark || so->so_oobmark > 0) { 214641174437SAnders Persson mblk_t *mp; 21470f1702c5SYu Xiangning 214841174437SAnders Persson if (atmark && so->so_oobmsg != NULL) { 214941174437SAnders Persson struct T_exdata_ind *tei; 215041174437SAnders Persson 21513e95bd4aSAnders Persson mp = arg->soqa_exdata_mp; 21523e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 215341174437SAnders Persson ASSERT(mp != NULL); 215441174437SAnders Persson mp->b_datap->db_type = M_PROTO; 215541174437SAnders Persson tei = (struct T_exdata_ind *)mp->b_rptr; 215641174437SAnders Persson tei->PRIM_type = T_EXDATA_IND; 215741174437SAnders Persson tei->MORE_flag = 0; 215841174437SAnders Persson mp->b_wptr = (uchar_t *)&tei[1]; 215941174437SAnders Persson 216041174437SAnders Persson mp->b_cont = so->so_oobmsg; 216141174437SAnders Persson so->so_oobmsg = NULL; 216241174437SAnders Persson 21633e95bd4aSAnders Persson *tailmpp = mp; 21643e95bd4aSAnders Persson tailmpp = &mp->b_next; 216541174437SAnders Persson } else { 216641174437SAnders Persson /* Send up the signal */ 21673e95bd4aSAnders Persson mp = arg->soqa_exdata_mp; 21683e95bd4aSAnders Persson arg->soqa_exdata_mp = NULL; 216941174437SAnders Persson ASSERT(mp != NULL); 217041174437SAnders Persson DB_TYPE(mp) = M_PCSIG; 217141174437SAnders Persson *mp->b_wptr++ = (uchar_t)SIGURG; 21723e95bd4aSAnders Persson *tailmpp = mp; 21733e95bd4aSAnders Persson tailmpp = &mp->b_next; 217441174437SAnders Persson 217541174437SAnders Persson /* Send up the mark indicator */ 21763e95bd4aSAnders Persson mp = arg->soqa_urgmark_mp; 21773e95bd4aSAnders Persson arg->soqa_urgmark_mp = NULL; 217841174437SAnders Persson mp->b_flag = atmark ? MSGMARKNEXT : MSGNOTMARKNEXT; 21793e95bd4aSAnders Persson *tailmpp = mp; 21803e95bd4aSAnders Persson tailmpp = &mp->b_next; 218141174437SAnders Persson 218241174437SAnders Persson so->so_oobmark = 0; 218341174437SAnders Persson } 218441174437SAnders Persson } 218541174437SAnders Persson ASSERT(so->so_oobmark == 0); 21860f1702c5SYu Xiangning ASSERT(so->so_rcv_queued == 0); 21873e95bd4aSAnders Persson 21883e95bd4aSAnders Persson return (retmp); 21890f1702c5SYu Xiangning } 21900f1702c5SYu Xiangning 219141174437SAnders Persson #ifdef DEBUG 219241174437SAnders Persson /* 219341174437SAnders Persson * Do an integrity check of the sonode. This should be done if a 219441174437SAnders Persson * fallback fails after sonode has initially been converted to use 219541174437SAnders Persson * TPI and subsequently have to be reverted. 219641174437SAnders Persson * 219741174437SAnders Persson * Failure to pass the integrity check will panic the system. 219841174437SAnders Persson */ 219941174437SAnders Persson void 220041174437SAnders Persson so_integrity_check(struct sonode *cur, struct sonode *orig) 220141174437SAnders Persson { 220241174437SAnders Persson VERIFY(cur->so_vnode == orig->so_vnode); 220341174437SAnders Persson VERIFY(cur->so_ops == orig->so_ops); 220441174437SAnders Persson /* 220541174437SAnders Persson * For so_state we can only VERIFY the state flags in CHECK_STATE. 220641174437SAnders Persson * The other state flags might be affected by a notification from the 220741174437SAnders Persson * protocol. 220841174437SAnders Persson */ 220941174437SAnders Persson #define CHECK_STATE (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_NDELAY|SS_NONBLOCK| \ 221041174437SAnders Persson SS_ASYNC|SS_ACCEPTCONN|SS_SAVEDEOR|SS_RCVATMARK|SS_OOBPEND| \ 221141174437SAnders Persson SS_HAVEOOBDATA|SS_HADOOBDATA|SS_SENTLASTREADSIG|SS_SENTLASTWRITESIG) 221241174437SAnders Persson VERIFY((cur->so_state & (orig->so_state & CHECK_STATE)) == 221341174437SAnders Persson (orig->so_state & CHECK_STATE)); 221441174437SAnders Persson VERIFY(cur->so_mode == orig->so_mode); 221541174437SAnders Persson VERIFY(cur->so_flag == orig->so_flag); 221641174437SAnders Persson VERIFY(cur->so_count == orig->so_count); 221741174437SAnders Persson /* Cannot VERIFY so_proto_connid; proto can update it */ 221841174437SAnders Persson VERIFY(cur->so_sockparams == orig->so_sockparams); 221941174437SAnders Persson /* an error might have been recorded, but it can not be lost */ 222041174437SAnders Persson VERIFY(cur->so_error != 0 || orig->so_error == 0); 222141174437SAnders Persson VERIFY(cur->so_family == orig->so_family); 222241174437SAnders Persson VERIFY(cur->so_type == orig->so_type); 222341174437SAnders Persson VERIFY(cur->so_protocol == orig->so_protocol); 222441174437SAnders Persson VERIFY(cur->so_version == orig->so_version); 222541174437SAnders Persson /* New conns might have arrived, but none should have been lost */ 222641174437SAnders Persson VERIFY(cur->so_acceptq_len >= orig->so_acceptq_len); 22273e95bd4aSAnders Persson VERIFY(list_head(&cur->so_acceptq_list) == 22283e95bd4aSAnders Persson list_head(&orig->so_acceptq_list)); 222941174437SAnders Persson VERIFY(cur->so_backlog == orig->so_backlog); 223041174437SAnders Persson /* New OOB migth have arrived, but mark should not have been lost */ 223141174437SAnders Persson VERIFY(cur->so_oobmark >= orig->so_oobmark); 223241174437SAnders Persson /* Cannot VERIFY so_oobmsg; the proto might have sent up a new one */ 223341174437SAnders Persson VERIFY(cur->so_pgrp == orig->so_pgrp); 223441174437SAnders Persson VERIFY(cur->so_peercred == orig->so_peercred); 223541174437SAnders Persson VERIFY(cur->so_cpid == orig->so_cpid); 223641174437SAnders Persson VERIFY(cur->so_zoneid == orig->so_zoneid); 223741174437SAnders Persson /* New data migth have arrived, but none should have been lost */ 223841174437SAnders Persson VERIFY(cur->so_rcv_queued >= orig->so_rcv_queued); 223941174437SAnders Persson VERIFY(cur->so_rcv_q_head == orig->so_rcv_q_head); 224041174437SAnders Persson VERIFY(cur->so_rcv_head == orig->so_rcv_head); 224141174437SAnders Persson VERIFY(cur->so_proto_handle == orig->so_proto_handle); 224241174437SAnders Persson VERIFY(cur->so_downcalls == orig->so_downcalls); 224341174437SAnders Persson /* Cannot VERIFY so_proto_props; they can be updated by proto */ 224441174437SAnders Persson } 224541174437SAnders Persson #endif 224641174437SAnders Persson 22470f1702c5SYu Xiangning /* 22480f1702c5SYu Xiangning * so_tpi_fallback() 22490f1702c5SYu Xiangning * 225041174437SAnders Persson * This is the fallback initation routine; things start here. 22510f1702c5SYu Xiangning * 22520f1702c5SYu Xiangning * Basic strategy: 22530f1702c5SYu Xiangning * o Block new socket operations from coming in 22540f1702c5SYu Xiangning * o Allocate/initate info needed by TPI 22550f1702c5SYu Xiangning * o Quiesce the connection, at which point we sync 22560f1702c5SYu Xiangning * state and move data 22570f1702c5SYu Xiangning * o Change operations (sonodeops) associated with the socket 22580f1702c5SYu Xiangning * o Unblock threads waiting for the fallback to finish 22590f1702c5SYu Xiangning */ 22600f1702c5SYu Xiangning int 22610f1702c5SYu Xiangning so_tpi_fallback(struct sonode *so, struct cred *cr) 22620f1702c5SYu Xiangning { 22630f1702c5SYu Xiangning int error; 22640f1702c5SYu Xiangning queue_t *q; 22650f1702c5SYu Xiangning struct sockparams *sp; 226641174437SAnders Persson struct sockparams *newsp = NULL; 22670f1702c5SYu Xiangning so_proto_fallback_func_t fbfunc; 22683e95bd4aSAnders Persson const char *devpath; 22690f1702c5SYu Xiangning boolean_t direct; 227041174437SAnders Persson struct sonode *nso; 22713e95bd4aSAnders Persson sock_quiesce_arg_t arg = { NULL, NULL }; 227241174437SAnders Persson #ifdef DEBUG 227341174437SAnders Persson struct sonode origso; 227441174437SAnders Persson #endif 22750f1702c5SYu Xiangning error = 0; 22760f1702c5SYu Xiangning sp = so->so_sockparams; 22770f1702c5SYu Xiangning fbfunc = sp->sp_smod_info->smod_proto_fallback_func; 22780f1702c5SYu Xiangning 22790f1702c5SYu Xiangning /* 22803e95bd4aSAnders Persson * Cannot fallback if the socket has active filters 22810f1702c5SYu Xiangning */ 22823e95bd4aSAnders Persson if (so->so_filter_active > 0) 22833e95bd4aSAnders Persson return (EINVAL); 22843e95bd4aSAnders Persson 22853e95bd4aSAnders Persson switch (so->so_family) { 22863e95bd4aSAnders Persson case AF_INET: 22873e95bd4aSAnders Persson devpath = sp->sp_smod_info->smod_fallback_devpath_v4; 22883e95bd4aSAnders Persson break; 22893e95bd4aSAnders Persson case AF_INET6: 22903e95bd4aSAnders Persson devpath = sp->sp_smod_info->smod_fallback_devpath_v6; 22913e95bd4aSAnders Persson break; 22923e95bd4aSAnders Persson default: 22933e95bd4aSAnders Persson return (EINVAL); 22943e95bd4aSAnders Persson } 22953e95bd4aSAnders Persson 22963e95bd4aSAnders Persson /* 22973e95bd4aSAnders Persson * Fallback can only happen if the socket module has a TPI device 22983e95bd4aSAnders Persson * and fallback function. 22993e95bd4aSAnders Persson */ 23003e95bd4aSAnders Persson if (devpath == NULL || fbfunc == NULL) 23010f1702c5SYu Xiangning return (EINVAL); 23020f1702c5SYu Xiangning 23030f1702c5SYu Xiangning /* 23040f1702c5SYu Xiangning * Initiate fallback; upon success we know that no new requests 23050f1702c5SYu Xiangning * will come in from the user. 23060f1702c5SYu Xiangning */ 23070f1702c5SYu Xiangning if (!so_start_fallback(so)) 23080f1702c5SYu Xiangning return (EAGAIN); 230941174437SAnders Persson #ifdef DEBUG 231041174437SAnders Persson /* 231141174437SAnders Persson * Make a copy of the sonode in case we need to make an integrity 231241174437SAnders Persson * check later on. 231341174437SAnders Persson */ 231441174437SAnders Persson bcopy(so, &origso, sizeof (*so)); 231541174437SAnders Persson #endif 23160f1702c5SYu Xiangning 23177d64f41bSAnders Persson sp->sp_stats.sps_nfallback.value.ui64++; 23187d64f41bSAnders Persson 23190f1702c5SYu Xiangning newsp = sockparams_hold_ephemeral_bydev(so->so_family, so->so_type, 23203e95bd4aSAnders Persson so->so_protocol, devpath, KM_SLEEP, &error); 23210f1702c5SYu Xiangning if (error != 0) 23220f1702c5SYu Xiangning goto out; 23230f1702c5SYu Xiangning 23240f1702c5SYu Xiangning if (so->so_direct != NULL) { 23250f1702c5SYu Xiangning sodirect_t *sodp = so->so_direct; 2326bbc000e5SAnders Persson mutex_enter(&so->so_lock); 23270f1702c5SYu Xiangning 2328bbc000e5SAnders Persson so->so_direct->sod_enabled = B_FALSE; 23290f1702c5SYu Xiangning so->so_state &= ~SS_SODIRECT; 23300f1702c5SYu Xiangning ASSERT(sodp->sod_uioafh == NULL); 2331bbc000e5SAnders Persson mutex_exit(&so->so_lock); 23320f1702c5SYu Xiangning } 23330f1702c5SYu Xiangning 23340f1702c5SYu Xiangning /* Turn sonode into a TPI socket */ 233541174437SAnders Persson error = sotpi_convert_sonode(so, newsp, &direct, &q, cr); 233641174437SAnders Persson if (error != 0) 23370f1702c5SYu Xiangning goto out; 23383e95bd4aSAnders Persson /* 23393e95bd4aSAnders Persson * When it comes to urgent data we have two cases to deal with; 23403e95bd4aSAnders Persson * (1) The oob byte has already arrived, or (2) the protocol has 23413e95bd4aSAnders Persson * notified that oob data is pending, but it has not yet arrived. 23423e95bd4aSAnders Persson * 23433e95bd4aSAnders Persson * For (1) all we need to do is send a T_EXDATA_IND to indicate were 23443e95bd4aSAnders Persson * in the byte stream the oob byte is. For (2) we have to send a 23453e95bd4aSAnders Persson * SIGURG (M_PCSIG), followed by a zero-length mblk indicating whether 23463e95bd4aSAnders Persson * the oob byte will be the next byte from the protocol. 23473e95bd4aSAnders Persson * 23483e95bd4aSAnders Persson * So in the worst case we need two mblks, one for the signal, another 23493e95bd4aSAnders Persson * for mark indication. In that case we use the exdata_mp for the sig. 23503e95bd4aSAnders Persson */ 23513e95bd4aSAnders Persson arg.soqa_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind), 23523e95bd4aSAnders Persson BPRI_MED, STR_NOSIG, NULL); 23533e95bd4aSAnders Persson arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); 23540f1702c5SYu Xiangning 23550f1702c5SYu Xiangning /* 23560f1702c5SYu Xiangning * Now tell the protocol to start using TPI. so_quiesced_cb be 23570f1702c5SYu Xiangning * called once it's safe to synchronize state. 23580f1702c5SYu Xiangning */ 23590f1702c5SYu Xiangning DTRACE_PROBE1(proto__fallback__begin, struct sonode *, so); 23603e95bd4aSAnders Persson error = (*fbfunc)(so->so_proto_handle, q, direct, so_quiesced_cb, 23613e95bd4aSAnders Persson &arg); 23620f1702c5SYu Xiangning DTRACE_PROBE1(proto__fallback__end, struct sonode *, so); 23630f1702c5SYu Xiangning 236441174437SAnders Persson if (error != 0) { 236541174437SAnders Persson /* protocol was unable to do a fallback, revert the sonode */ 236641174437SAnders Persson sotpi_revert_sonode(so, cr); 236741174437SAnders Persson goto out; 236841174437SAnders Persson } 236941174437SAnders Persson 23700f1702c5SYu Xiangning /* 237141174437SAnders Persson * Walk the accept queue and notify the proto that they should 237241174437SAnders Persson * fall back to TPI. The protocol will send up the T_CONN_IND. 237341174437SAnders Persson */ 23743e95bd4aSAnders Persson nso = list_head(&so->so_acceptq_list); 237541174437SAnders Persson while (nso != NULL) { 237641174437SAnders Persson int rval; 23773e95bd4aSAnders Persson struct sonode *next; 23783e95bd4aSAnders Persson 23793e95bd4aSAnders Persson if (arg.soqa_exdata_mp == NULL) { 23803e95bd4aSAnders Persson arg.soqa_exdata_mp = 23813e95bd4aSAnders Persson allocb_wait(sizeof (struct T_exdata_ind), 23823e95bd4aSAnders Persson BPRI_MED, STR_NOSIG, NULL); 23833e95bd4aSAnders Persson } 23843e95bd4aSAnders Persson if (arg.soqa_urgmark_mp == NULL) { 23853e95bd4aSAnders Persson arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED, 23863e95bd4aSAnders Persson STR_NOSIG, NULL); 23873e95bd4aSAnders Persson } 238841174437SAnders Persson 238941174437SAnders Persson DTRACE_PROBE1(proto__fallback__begin, struct sonode *, nso); 23903e95bd4aSAnders Persson rval = (*fbfunc)(nso->so_proto_handle, NULL, direct, 23913e95bd4aSAnders Persson so_quiesced_cb, &arg); 239241174437SAnders Persson DTRACE_PROBE1(proto__fallback__end, struct sonode *, nso); 239341174437SAnders Persson if (rval != 0) { 23943e95bd4aSAnders Persson /* Abort the connection */ 239541174437SAnders Persson zcmn_err(getzoneid(), CE_WARN, 239641174437SAnders Persson "Failed to convert socket in accept queue to TPI. " 239741174437SAnders Persson "Pid = %d\n", curproc->p_pid); 23983e95bd4aSAnders Persson next = list_next(&so->so_acceptq_list, nso); 23993e95bd4aSAnders Persson list_remove(&so->so_acceptq_list, nso); 24003e95bd4aSAnders Persson so->so_acceptq_len--; 24013e95bd4aSAnders Persson 24023e95bd4aSAnders Persson (void) socket_close(nso, 0, CRED()); 24033e95bd4aSAnders Persson socket_destroy(nso); 24043e95bd4aSAnders Persson nso = next; 24053e95bd4aSAnders Persson } else { 24063e95bd4aSAnders Persson nso = list_next(&so->so_acceptq_list, nso); 240741174437SAnders Persson } 240841174437SAnders Persson } 240941174437SAnders Persson 241041174437SAnders Persson /* 241141174437SAnders Persson * Now flush the acceptq, this will destroy all sockets. They will 241241174437SAnders Persson * be recreated in sotpi_accept(). 24130f1702c5SYu Xiangning */ 24142320a8c1SAnders Persson so_acceptq_flush(so, B_FALSE); 24150f1702c5SYu Xiangning 24160f1702c5SYu Xiangning mutex_enter(&so->so_lock); 24170f1702c5SYu Xiangning so->so_state |= SS_FALLBACK_COMP; 24180f1702c5SYu Xiangning mutex_exit(&so->so_lock); 24190f1702c5SYu Xiangning 24200f1702c5SYu Xiangning /* 24210f1702c5SYu Xiangning * Swap the sonode ops. Socket opertations that come in once this 24220f1702c5SYu Xiangning * is done will proceed without blocking. 24230f1702c5SYu Xiangning */ 24240f1702c5SYu Xiangning so->so_ops = &sotpi_sonodeops; 24250f1702c5SYu Xiangning 24260f1702c5SYu Xiangning /* 24270f1702c5SYu Xiangning * Wake up any threads stuck in poll. This is needed since the poll 24280f1702c5SYu Xiangning * head changes when the fallback happens (moves from the sonode to 24290f1702c5SYu Xiangning * the STREAMS head). 24300f1702c5SYu Xiangning */ 24310f1702c5SYu Xiangning pollwakeup(&so->so_poll_list, POLLERR); 24323e95bd4aSAnders Persson 24333e95bd4aSAnders Persson /* 24343e95bd4aSAnders Persson * When this non-STREAM socket was created we placed an extra ref on 24353e95bd4aSAnders Persson * the associated vnode to support asynchronous close. Drop that ref 24363e95bd4aSAnders Persson * here. 24373e95bd4aSAnders Persson */ 24383e95bd4aSAnders Persson ASSERT(SOTOV(so)->v_count >= 2); 24393e95bd4aSAnders Persson VN_RELE(SOTOV(so)); 24400f1702c5SYu Xiangning out: 24410f1702c5SYu Xiangning so_end_fallback(so); 24420f1702c5SYu Xiangning 244341174437SAnders Persson if (error != 0) { 244441174437SAnders Persson #ifdef DEBUG 244541174437SAnders Persson so_integrity_check(so, &origso); 244641174437SAnders Persson #endif 244741174437SAnders Persson zcmn_err(getzoneid(), CE_WARN, 244841174437SAnders Persson "Failed to convert socket to TPI (err=%d). Pid = %d\n", 244941174437SAnders Persson error, curproc->p_pid); 245041174437SAnders Persson if (newsp != NULL) 245141174437SAnders Persson SOCKPARAMS_DEC_REF(newsp); 245241174437SAnders Persson } 24533e95bd4aSAnders Persson if (arg.soqa_exdata_mp != NULL) 24543e95bd4aSAnders Persson freemsg(arg.soqa_exdata_mp); 24553e95bd4aSAnders Persson if (arg.soqa_urgmark_mp != NULL) 24563e95bd4aSAnders Persson freemsg(arg.soqa_urgmark_mp); 245741174437SAnders Persson 24580f1702c5SYu Xiangning return (error); 24590f1702c5SYu Xiangning } 2460