1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/inttypes.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/conf.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/sysmacros.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/time.h>
41 #include <sys/file.h>
42 #include <sys/user.h>
43 #include <sys/stream.h>
44 #include <sys/strsubr.h>
45 #include <sys/esunddi.h>
46 #include <sys/flock.h>
47 #include <sys/modctl.h>
48 #include <sys/vtrace.h>
49 #include <sys/strsun.h>
50 #include <sys/cmn_err.h>
51 #include <sys/proc.h>
52 #include <sys/ddi.h>
53
54 #include <sys/suntpi.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/socketvar.h>
58 #include <netinet/in.h>
59 #include <inet/common.h>
60 #include <inet/proto_set.h>
61
62 #include <sys/tiuser.h>
63 #define _SUN_TPI_VERSION 2
64 #include <sys/tihdr.h>
65
66 #include <c2/audit.h>
67
68 #include <fs/sockfs/socktpi.h>
69 #include <fs/sockfs/socktpi_impl.h>
70
71 int so_default_version = SOV_SOCKSTREAM;
72
73 #ifdef DEBUG
74 /* Set sockdebug to print debug messages when SO_DEBUG is set */
75 int sockdebug = 0;
76
77 /* Set sockprinterr to print error messages when SO_DEBUG is set */
78 int sockprinterr = 0;
79
80 /*
81 * Set so_default_options to SO_DEBUG is all sockets should be created
82 * with SO_DEBUG set. This is needed to get debug printouts from the
83 * socket() call itself.
84 */
85 int so_default_options = 0;
86 #endif /* DEBUG */
87
88 #ifdef SOCK_TEST
89 /*
90 * Set to number of ticks to limit cv_waits for code coverage testing.
91 * Set to 1000 when SO_DEBUG is set to 2.
92 */
93 clock_t sock_test_timelimit = 0;
94 #endif /* SOCK_TEST */
95
96 /*
97 * For concurrency testing of e.g. opening /dev/ip which does not
98 * handle T_INFO_REQ messages.
99 */
100 int so_no_tinfo = 0;
101
102 /*
103 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
104 * to simply ignore the T_CAPABILITY_REQ.
105 */
106 clock_t sock_capability_timeout = 2; /* seconds */
107
108 static int do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
109 static void so_removehooks(struct sonode *so);
110
111 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
112 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
113 strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
114 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
115 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
116 strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
117
118 /*
119 * Convert a socket to a stream. Invoked when the illusory sockmod
120 * is popped from the stream.
121 * Change the stream head back to default operation without losing
122 * any messages (T_conn_ind's are moved to the stream head queue).
123 */
124 int
so_sock2stream(struct sonode * so)125 so_sock2stream(struct sonode *so)
126 {
127 struct vnode *vp = SOTOV(so);
128 queue_t *rq;
129 mblk_t *mp;
130 int error = 0;
131 sotpi_info_t *sti = SOTOTPI(so);
132
133 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
134
135 mutex_enter(&so->so_lock);
136 so_lock_single(so);
137
138 ASSERT(so->so_version != SOV_STREAM);
139
140 if (sti->sti_direct) {
141 mblk_t **mpp;
142 int rval;
143
144 /*
145 * Tell the transport below that sockmod is being popped
146 */
147 mutex_exit(&so->so_lock);
148 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
149 &rval);
150 mutex_enter(&so->so_lock);
151 if (error != 0) {
152 dprintso(so, 0, ("so_sock2stream(%p): "
153 "_SIOCSOCKFALLBACK failed\n", (void *)so));
154 goto exit;
155 }
156 sti->sti_direct = 0;
157
158 for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
159 mpp = &mp->b_next) {
160 struct T_conn_ind *conn_ind;
161
162 /*
163 * strsock_proto() has already verified the length of
164 * this message block.
165 */
166 ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
167
168 conn_ind = (struct T_conn_ind *)mp->b_rptr;
169 if (conn_ind->OPT_length == 0 &&
170 conn_ind->OPT_offset == 0)
171 continue;
172
173 if (DB_REF(mp) > 1) {
174 mblk_t *newmp;
175 size_t length;
176 cred_t *cr;
177 pid_t cpid;
178 int error; /* Dummy - error not returned */
179
180 /*
181 * Copy the message block because it is used
182 * elsewhere, too.
183 * Can't use copyb since we want to wait
184 * yet allow for EINTR.
185 */
186 /* Round up size for reuse */
187 length = MAX(MBLKL(mp), 64);
188 cr = msg_getcred(mp, &cpid);
189 if (cr != NULL) {
190 newmp = allocb_cred_wait(length, 0,
191 &error, cr, cpid);
192 } else {
193 newmp = allocb_wait(length, 0, 0,
194 &error);
195 }
196 if (newmp == NULL) {
197 error = EINTR;
198 goto exit;
199 }
200 bcopy(mp->b_rptr, newmp->b_wptr, length);
201 newmp->b_wptr += length;
202 newmp->b_next = mp->b_next;
203
204 /*
205 * Link the new message block into the queue
206 * and free the old one.
207 */
208 *mpp = newmp;
209 mp->b_next = NULL;
210 freemsg(mp);
211
212 mp = newmp;
213 conn_ind = (struct T_conn_ind *)mp->b_rptr;
214 }
215
216 /*
217 * Remove options added by TCP for accept fast-path.
218 */
219 conn_ind->OPT_length = 0;
220 conn_ind->OPT_offset = 0;
221 }
222 }
223
224 so->so_version = SOV_STREAM;
225 so->so_proto_handle = NULL;
226
227 /*
228 * Remove the hooks in the stream head to avoid queuing more
229 * packets in sockfs.
230 */
231 mutex_exit(&so->so_lock);
232 so_removehooks(so);
233 mutex_enter(&so->so_lock);
234
235 /*
236 * Clear any state related to urgent data. Leave any T_EXDATA_IND
237 * on the queue - the behavior of urgent data after a switch is
238 * left undefined.
239 */
240 so->so_error = sti->sti_delayed_error = 0;
241 freemsg(so->so_oobmsg);
242 so->so_oobmsg = NULL;
243 sti->sti_oobsigcnt = sti->sti_oobcnt = 0;
244
245 so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
246 SS_SAVEDEOR);
247 ASSERT(so_verify_oobstate(so));
248
249 freemsg(sti->sti_ack_mp);
250 sti->sti_ack_mp = NULL;
251
252 /*
253 * Flush the T_DISCON_IND on sti_discon_ind_mp.
254 */
255 so_flush_discon_ind(so);
256
257 /*
258 * Move any queued T_CONN_IND messages to stream head queue.
259 */
260 rq = RD(strvp2wq(vp));
261 while ((mp = sti->sti_conn_ind_head) != NULL) {
262 sti->sti_conn_ind_head = mp->b_next;
263 mp->b_next = NULL;
264 if (sti->sti_conn_ind_head == NULL) {
265 ASSERT(sti->sti_conn_ind_tail == mp);
266 sti->sti_conn_ind_tail = NULL;
267 }
268 dprintso(so, 0,
269 ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));
270
271 /* Drop lock across put() */
272 mutex_exit(&so->so_lock);
273 put(rq, mp);
274 mutex_enter(&so->so_lock);
275 }
276
277 exit:
278 ASSERT(MUTEX_HELD(&so->so_lock));
279 so_unlock_single(so, SOLOCKED);
280 mutex_exit(&so->so_lock);
281 return (error);
282 }
283
284 /*
285 * Covert a stream back to a socket. This is invoked when the illusory
286 * sockmod is pushed on a stream (where the stream was "created" by
287 * popping the illusory sockmod).
288 * This routine can not recreate the socket state (certain aspects of
289 * it like urgent data state and the bound/connected addresses for AF_UNIX
290 * sockets can not be recreated by asking the transport for information).
291 * Thus this routine implicitly assumes that the socket is in an initial
292 * state (as if it was just created). It flushes any messages queued on the
293 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
294 */
295 void
so_stream2sock(struct sonode * so)296 so_stream2sock(struct sonode *so)
297 {
298 struct vnode *vp = SOTOV(so);
299 sotpi_info_t *sti = SOTOTPI(so);
300
301 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
302
303 mutex_enter(&so->so_lock);
304 so_lock_single(so);
305 ASSERT(so->so_version == SOV_STREAM);
306 so->so_version = SOV_SOCKSTREAM;
307 sti->sti_pushcnt = 0;
308 mutex_exit(&so->so_lock);
309
310 /*
311 * Set a permenent error to force any thread in sorecvmsg to
312 * return (and drop SOREADLOCKED). Clear the error once
313 * we have SOREADLOCKED.
314 * This makes a read sleeping during the I_PUSH of sockmod return
315 * EIO.
316 */
317 strsetrerror(SOTOV(so), EIO, 1, NULL);
318
319 /*
320 * Get the read lock before flushing data to avoid
321 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
322 */
323 mutex_enter(&so->so_lock);
324 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */
325 mutex_exit(&so->so_lock);
326
327 strsetrerror(SOTOV(so), 0, 0, NULL);
328 so_installhooks(so);
329
330 /*
331 * Flush everything on the read queue.
332 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
333 * remain; those types of messages would confuse sockfs.
334 */
335 strflushrq(vp, FLUSHALL);
336 mutex_enter(&so->so_lock);
337
338 /*
339 * Flush the T_DISCON_IND on sti_discon_ind_mp.
340 */
341 so_flush_discon_ind(so);
342 so_unlock_read(so); /* Clear SOREADLOCKED */
343
344 so_unlock_single(so, SOLOCKED);
345 mutex_exit(&so->so_lock);
346 }
347
348 /*
349 * Install the hooks in the stream head.
350 */
351 void
so_installhooks(struct sonode * so)352 so_installhooks(struct sonode *so)
353 {
354 struct vnode *vp = SOTOV(so);
355
356 strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
357 strsock_proto, strsock_misc);
358 strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
359 }
360
361 /*
362 * Remove the hooks in the stream head.
363 */
364 static void
so_removehooks(struct sonode * so)365 so_removehooks(struct sonode *so)
366 {
367 struct vnode *vp = SOTOV(so);
368
369 strsetrputhooks(vp, 0, NULL, NULL);
370 strsetwputhooks(vp, 0, STRTIMOUT);
371 /*
372 * Leave read behavior as it would have been for a normal
373 * stream i.e. a read of an M_PROTO will fail.
374 */
375 }
376
377 void
so_basic_strinit(struct sonode * so)378 so_basic_strinit(struct sonode *so)
379 {
380 struct vnode *vp = SOTOV(so);
381 struct stdata *stp;
382 mblk_t *mp;
383 sotpi_info_t *sti = SOTOTPI(so);
384
385 /* Preallocate an unbind_req message */
386 mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED());
387 mutex_enter(&so->so_lock);
388 sti->sti_unbind_mp = mp;
389 #ifdef DEBUG
390 so->so_options = so_default_options;
391 #endif /* DEBUG */
392 mutex_exit(&so->so_lock);
393
394 so_installhooks(so);
395
396 stp = vp->v_stream;
397 /*
398 * Have to keep minpsz at zero in order to allow write/send of zero
399 * bytes.
400 */
401 mutex_enter(&stp->sd_lock);
402 if (stp->sd_qn_minpsz == 1)
403 stp->sd_qn_minpsz = 0;
404 mutex_exit(&stp->sd_lock);
405 }
406
407 /*
408 * Initialize the streams side of a socket including
409 * T_info_req/ack processing. If tso is not NULL its values are used thereby
410 * avoiding the T_INFO_REQ.
411 */
412 int
so_strinit(struct sonode * so,struct sonode * tso)413 so_strinit(struct sonode *so, struct sonode *tso)
414 {
415 sotpi_info_t *sti = SOTOTPI(so);
416 sotpi_info_t *tsti;
417 int error;
418
419 so_basic_strinit(so);
420
421 /*
422 * The T_CAPABILITY_REQ should be the first message sent down because
423 * at least TCP has a fast-path for this which avoids timeouts while
424 * waiting for the T_CAPABILITY_ACK under high system load.
425 */
426 if (tso == NULL) {
427 error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
428 if (error)
429 return (error);
430 } else {
431 tsti = SOTOTPI(tso);
432
433 mutex_enter(&so->so_lock);
434 sti->sti_tsdu_size = tsti->sti_tsdu_size;
435 sti->sti_etsdu_size = tsti->sti_etsdu_size;
436 sti->sti_addr_size = tsti->sti_addr_size;
437 sti->sti_opt_size = tsti->sti_opt_size;
438 sti->sti_tidu_size = tsti->sti_tidu_size;
439 sti->sti_serv_type = tsti->sti_serv_type;
440 so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
441 mutex_exit(&so->so_lock);
442
443 /* the following do_tcapability may update so->so_mode */
444 if ((tsti->sti_serv_type != T_CLTS) &&
445 (sti->sti_direct == 0)) {
446 error = do_tcapability(so, TC1_ACCEPTOR_ID);
447 if (error)
448 return (error);
449 }
450 }
451 /*
452 * If the addr_size is 0 we treat it as already bound
453 * and connected. This is used by the routing socket.
454 * We set the addr_size to something to allocate a the address
455 * structures.
456 */
457 if (sti->sti_addr_size == 0) {
458 so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
459 /* Address size can vary with address families. */
460 if (so->so_family == AF_INET6)
461 sti->sti_addr_size =
462 (t_scalar_t)sizeof (struct sockaddr_in6);
463 else
464 sti->sti_addr_size =
465 (t_scalar_t)sizeof (struct sockaddr_in);
466 ASSERT(sti->sti_unbind_mp);
467 }
468
469 so_alloc_addr(so, sti->sti_addr_size);
470
471 return (0);
472 }
473
474 static void
copy_tinfo(struct sonode * so,struct T_info_ack * tia)475 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
476 {
477 sotpi_info_t *sti = SOTOTPI(so);
478
479 sti->sti_tsdu_size = tia->TSDU_size;
480 sti->sti_etsdu_size = tia->ETSDU_size;
481 sti->sti_addr_size = tia->ADDR_size;
482 sti->sti_opt_size = tia->OPT_size;
483 sti->sti_tidu_size = tia->TIDU_size;
484 sti->sti_serv_type = tia->SERV_type;
485 switch (tia->CURRENT_state) {
486 case TS_UNBND:
487 break;
488 case TS_IDLE:
489 so->so_state |= SS_ISBOUND;
490 sti->sti_laddr_len = 0;
491 sti->sti_laddr_valid = 0;
492 break;
493 case TS_DATA_XFER:
494 so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
495 sti->sti_laddr_len = 0;
496 sti->sti_faddr_len = 0;
497 sti->sti_laddr_valid = 0;
498 sti->sti_faddr_valid = 0;
499 break;
500 }
501
502 /*
503 * Heuristics for determining the socket mode flags
504 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
505 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
506 * from the info ack.
507 */
508 if (sti->sti_serv_type == T_CLTS) {
509 so->so_mode |= SM_ATOMIC | SM_ADDR;
510 } else {
511 so->so_mode |= SM_CONNREQUIRED;
512 if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
513 so->so_mode |= SM_EXDATA;
514 }
515 if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
516 /* Semantics are to discard tail end of messages */
517 so->so_mode |= SM_ATOMIC;
518 }
519 if (so->so_family == AF_UNIX) {
520 so->so_mode |= SM_FDPASSING | SM_OPTDATA;
521 if (sti->sti_addr_size == -1) {
522 /* MAXPATHLEN + soun_family + nul termination */
523 sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
524 sizeof (short) + 1);
525 }
526 if (so->so_type == SOCK_STREAM) {
527 /*
528 * Make it into a byte-stream transport.
529 * SOCK_SEQPACKET sockets are unchanged.
530 */
531 sti->sti_tsdu_size = 0;
532 }
533 } else if (sti->sti_addr_size == -1) {
534 /*
535 * Logic extracted from sockmod - have to pick some max address
536 * length in order to preallocate the addresses.
537 */
538 sti->sti_addr_size = SOA_DEFSIZE;
539 }
540 if (sti->sti_tsdu_size == 0)
541 so->so_mode |= SM_BYTESTREAM;
542 }
543
544 static int
check_tinfo(struct sonode * so)545 check_tinfo(struct sonode *so)
546 {
547 sotpi_info_t *sti = SOTOTPI(so);
548
549 /* Consistency checks */
550 if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
551 eprintso(so, ("service type and socket type mismatch\n"));
552 eprintsoline(so, EPROTO);
553 return (EPROTO);
554 }
555 if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
556 eprintso(so, ("service type and socket type mismatch\n"));
557 eprintsoline(so, EPROTO);
558 return (EPROTO);
559 }
560 if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
561 eprintso(so, ("service type and socket type mismatch\n"));
562 eprintsoline(so, EPROTO);
563 return (EPROTO);
564 }
565 if (so->so_family == AF_INET &&
566 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
567 eprintso(so,
568 ("AF_INET must have sockaddr_in address length. Got %d\n",
569 sti->sti_addr_size));
570 eprintsoline(so, EMSGSIZE);
571 return (EMSGSIZE);
572 }
573 if (so->so_family == AF_INET6 &&
574 sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
575 eprintso(so,
576 ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
577 sti->sti_addr_size));
578 eprintsoline(so, EMSGSIZE);
579 return (EMSGSIZE);
580 }
581
582 dprintso(so, 1, (
583 "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
584 sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
585 sti->sti_addr_size, sti->sti_opt_size,
586 sti->sti_tidu_size));
587 dprintso(so, 1, ("tinfo: so_state %s\n",
588 pr_state(so->so_state, so->so_mode)));
589 return (0);
590 }
591
592 /*
593 * Send down T_info_req and wait for the ack.
594 * Record interesting T_info_ack values in the sonode.
595 */
596 static int
do_tinfo(struct sonode * so)597 do_tinfo(struct sonode *so)
598 {
599 struct T_info_req tir;
600 mblk_t *mp;
601 int error;
602
603 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
604
605 if (so_no_tinfo) {
606 SOTOTPI(so)->sti_addr_size = 0;
607 return (0);
608 }
609
610 dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
611
612 /* Send T_INFO_REQ */
613 tir.PRIM_type = T_INFO_REQ;
614 mp = soallocproto1(&tir, sizeof (tir),
615 sizeof (struct T_info_req) + sizeof (struct T_info_ack),
616 _ALLOC_INTR, CRED());
617 if (mp == NULL) {
618 eprintsoline(so, ENOBUFS);
619 return (ENOBUFS);
620 }
621 /* T_INFO_REQ has to be M_PCPROTO */
622 DB_TYPE(mp) = M_PCPROTO;
623
624 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
625 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
626 if (error) {
627 eprintsoline(so, error);
628 return (error);
629 }
630 mutex_enter(&so->so_lock);
631 /* Wait for T_INFO_ACK */
632 if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
633 (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
634 mutex_exit(&so->so_lock);
635 eprintsoline(so, error);
636 return (error);
637 }
638
639 ASSERT(mp);
640 copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
641 mutex_exit(&so->so_lock);
642 freemsg(mp);
643 return (check_tinfo(so));
644 }
645
646 /*
647 * Send down T_capability_req and wait for the ack.
648 * Record interesting T_capability_ack values in the sonode.
649 */
650 static int
do_tcapability(struct sonode * so,t_uscalar_t cap_bits1)651 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
652 {
653 struct T_capability_req tcr;
654 struct T_capability_ack *tca;
655 mblk_t *mp;
656 int error;
657 sotpi_info_t *sti = SOTOTPI(so);
658
659 ASSERT(cap_bits1 != 0);
660 ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
661 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
662
663 if (sti->sti_provinfo->tpi_capability == PI_NO)
664 return (do_tinfo(so));
665
666 if (so_no_tinfo) {
667 sti->sti_addr_size = 0;
668 if ((cap_bits1 &= ~TC1_INFO) == 0)
669 return (0);
670 }
671
672 dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
673
674 /* Send T_CAPABILITY_REQ */
675 tcr.PRIM_type = T_CAPABILITY_REQ;
676 tcr.CAP_bits1 = cap_bits1;
677 mp = soallocproto1(&tcr, sizeof (tcr),
678 sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
679 _ALLOC_INTR, CRED());
680 if (mp == NULL) {
681 eprintsoline(so, ENOBUFS);
682 return (ENOBUFS);
683 }
684 /* T_CAPABILITY_REQ should be M_PCPROTO here */
685 DB_TYPE(mp) = M_PCPROTO;
686
687 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
688 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
689 if (error) {
690 eprintsoline(so, error);
691 return (error);
692 }
693 mutex_enter(&so->so_lock);
694 /* Wait for T_CAPABILITY_ACK */
695 if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
696 (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
697 mutex_exit(&so->so_lock);
698 PI_PROVLOCK(sti->sti_provinfo);
699 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
700 sti->sti_provinfo->tpi_capability = PI_NO;
701 PI_PROVUNLOCK(sti->sti_provinfo);
702 ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
703 if (cap_bits1 & TC1_INFO) {
704 /*
705 * If the T_CAPABILITY_REQ timed out and then a
706 * T_INFO_REQ gets a protocol error, most likely
707 * the capability was slow (vs. unsupported). Return
708 * ENOSR for this case as a best guess.
709 */
710 if (error == ETIME) {
711 return ((error = do_tinfo(so)) == EPROTO ?
712 ENOSR : error);
713 }
714 return (do_tinfo(so));
715 }
716 return (0);
717 }
718
719 ASSERT(mp);
720 tca = (struct T_capability_ack *)mp->b_rptr;
721
722 ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
723 so_proc_tcapability_ack(so, tca);
724
725 cap_bits1 = tca->CAP_bits1;
726
727 mutex_exit(&so->so_lock);
728 freemsg(mp);
729
730 if (cap_bits1 & TC1_INFO)
731 return (check_tinfo(so));
732
733 return (0);
734 }
735
736 /*
737 * Process a T_CAPABILITY_ACK
738 */
739 void
so_proc_tcapability_ack(struct sonode * so,struct T_capability_ack * tca)740 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
741 {
742 sotpi_info_t *sti = SOTOTPI(so);
743
744 if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
745 PI_PROVLOCK(sti->sti_provinfo);
746 sti->sti_provinfo->tpi_capability = PI_YES;
747 PI_PROVUNLOCK(sti->sti_provinfo);
748 }
749
750 if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
751 sti->sti_acceptor_id = tca->ACCEPTOR_id;
752 so->so_mode |= SM_ACCEPTOR_ID;
753 }
754
755 if (tca->CAP_bits1 & TC1_INFO)
756 copy_tinfo(so, &tca->INFO_ack);
757 }
758
759 /*
760 * Retrieve socket error, clear error if not peek.
761 */
762 int
sogeterr(struct sonode * so,boolean_t clear_err)763 sogeterr(struct sonode *so, boolean_t clear_err)
764 {
765 int error;
766
767 ASSERT(MUTEX_HELD(&so->so_lock));
768
769 error = so->so_error;
770 if (clear_err)
771 so->so_error = 0;
772
773 return (error);
774 }
775
776 /*
777 * This routine is registered with the stream head to retrieve read
778 * side errors.
779 * It does not clear the socket error for a peeking read side operation.
780 * It the error is to be cleared it sets *clearerr.
781 */
782 int
sogetrderr(vnode_t * vp,int ispeek,int * clearerr)783 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
784 {
785 struct sonode *so = VTOSO(vp);
786 int error;
787
788 mutex_enter(&so->so_lock);
789 if (ispeek) {
790 error = so->so_error;
791 *clearerr = 0;
792 } else {
793 error = so->so_error;
794 so->so_error = 0;
795 *clearerr = 1;
796 }
797 mutex_exit(&so->so_lock);
798 return (error);
799 }
800
801 /*
802 * This routine is registered with the stream head to retrieve write
803 * side errors.
804 * It does not clear the socket error for a peeking read side operation.
805 * It the error is to be cleared it sets *clearerr.
806 */
807 int
sogetwrerr(vnode_t * vp,int ispeek,int * clearerr)808 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
809 {
810 struct sonode *so = VTOSO(vp);
811 int error;
812
813 mutex_enter(&so->so_lock);
814 if (so->so_state & SS_CANTSENDMORE) {
815 error = EPIPE;
816 *clearerr = 0;
817 } else {
818 error = so->so_error;
819 if (ispeek) {
820 *clearerr = 0;
821 } else {
822 so->so_error = 0;
823 *clearerr = 1;
824 }
825 }
826 mutex_exit(&so->so_lock);
827 return (error);
828 }
829
830 /*
831 * Set a nonpersistent read and write error on the socket.
832 * Used when there is a T_uderror_ind for a connected socket.
833 * The caller also needs to call strsetrerror and strsetwerror
834 * after dropping the lock.
835 */
836 void
soseterror(struct sonode * so,int error)837 soseterror(struct sonode *so, int error)
838 {
839 ASSERT(error != 0);
840
841 ASSERT(MUTEX_HELD(&so->so_lock));
842 so->so_error = (ushort_t)error;
843 }
844
845 void
soisconnecting(struct sonode * so)846 soisconnecting(struct sonode *so)
847 {
848 ASSERT(MUTEX_HELD(&so->so_lock));
849 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
850 so->so_state |= SS_ISCONNECTING;
851 cv_broadcast(&so->so_state_cv);
852 }
853
854 void
soisconnected(struct sonode * so)855 soisconnected(struct sonode *so)
856 {
857 ASSERT(MUTEX_HELD(&so->so_lock));
858 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
859 so->so_state |= SS_ISCONNECTED;
860 cv_broadcast(&so->so_state_cv);
861 }
862
863 /*
864 * The caller also needs to call strsetrerror, strsetwerror and strseteof.
865 */
866 void
soisdisconnected(struct sonode * so,int error)867 soisdisconnected(struct sonode *so, int error)
868 {
869 ASSERT(MUTEX_HELD(&so->so_lock));
870 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
871 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
872 so->so_error = (ushort_t)error;
873 if (so->so_peercred != NULL) {
874 crfree(so->so_peercred);
875 so->so_peercred = NULL;
876 }
877 cv_broadcast(&so->so_state_cv);
878 }
879
880 /*
881 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
882 * Does not affect write side.
883 * The caller also has to call strsetrerror.
884 */
885 static void
sobreakconn(struct sonode * so,int error)886 sobreakconn(struct sonode *so, int error)
887 {
888 ASSERT(MUTEX_HELD(&so->so_lock));
889 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
890 so->so_error = (ushort_t)error;
891 cv_broadcast(&so->so_state_cv);
892 }
893
894 /*
895 * Can no longer send.
896 * Caller must also call strsetwerror.
897 *
898 * We mark the peer address as no longer valid for getpeername, but
899 * leave it around for so_unix_close to notify the peer (that
900 * transport has no addressing held at that layer).
901 */
902 void
socantsendmore(struct sonode * so)903 socantsendmore(struct sonode *so)
904 {
905 ASSERT(MUTEX_HELD(&so->so_lock));
906 so->so_state |= SS_CANTSENDMORE;
907 cv_broadcast(&so->so_state_cv);
908 }
909
910 /*
911 * The caller must call strseteof(,1) as well as this routine
912 * to change the socket state.
913 */
914 void
socantrcvmore(struct sonode * so)915 socantrcvmore(struct sonode *so)
916 {
917 ASSERT(MUTEX_HELD(&so->so_lock));
918 so->so_state |= SS_CANTRCVMORE;
919 cv_broadcast(&so->so_state_cv);
920 }
921
922 /*
923 * The caller has sent down a "request_prim" primitive and wants to wait for
924 * an ack ("ack_prim") or an T_ERROR_ACK for it.
925 * The specified "ack_prim" can be a T_OK_ACK.
926 *
927 * Assumes that all the TPI acks are M_PCPROTO messages.
928 *
929 * Note that the socket is single-threaded (using so_lock_single)
930 * for all operations that generate TPI ack messages. Since
931 * only TPI ack messages are M_PCPROTO we should never receive
932 * anything except either the ack we are expecting or a T_ERROR_ACK
933 * for the same primitive.
934 */
935 int
sowaitprim(struct sonode * so,t_scalar_t request_prim,t_scalar_t ack_prim,t_uscalar_t min_size,mblk_t ** mpp,clock_t wait)936 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
937 t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
938 {
939 mblk_t *mp;
940 union T_primitives *tpr;
941 int error;
942
943 dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
944 (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
945
946 ASSERT(MUTEX_HELD(&so->so_lock));
947
948 error = sowaitack(so, &mp, wait);
949 if (error)
950 return (error);
951
952 dprintso(so, 1, ("got msg %p\n", (void *)mp));
953 if (DB_TYPE(mp) != M_PCPROTO ||
954 MBLKL(mp) < sizeof (tpr->type)) {
955 freemsg(mp);
956 eprintsoline(so, EPROTO);
957 return (EPROTO);
958 }
959 tpr = (union T_primitives *)mp->b_rptr;
960 /*
961 * Did we get the primitive that we were asking for?
962 * For T_OK_ACK we also check that it matches the request primitive.
963 */
964 if (tpr->type == ack_prim &&
965 (ack_prim != T_OK_ACK ||
966 tpr->ok_ack.CORRECT_prim == request_prim)) {
967 if (MBLKL(mp) >= (ssize_t)min_size) {
968 /* Found what we are looking for */
969 *mpp = mp;
970 return (0);
971 }
972 /* Too short */
973 freemsg(mp);
974 eprintsoline(so, EPROTO);
975 return (EPROTO);
976 }
977
978 if (tpr->type == T_ERROR_ACK &&
979 tpr->error_ack.ERROR_prim == request_prim) {
980 /* Error to the primitive we were looking for */
981 if (tpr->error_ack.TLI_error == TSYSERR) {
982 error = tpr->error_ack.UNIX_error;
983 } else {
984 error = proto_tlitosyserr(tpr->error_ack.TLI_error);
985 }
986 dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
987 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
988 tpr->error_ack.UNIX_error, error));
989 freemsg(mp);
990 return (error);
991 }
992 /*
993 * Wrong primitive or T_ERROR_ACK for the wrong primitive
994 */
995 #ifdef DEBUG
996 if (tpr->type == T_ERROR_ACK) {
997 dprintso(so, 0, ("error_ack for %d: %d/%d\n",
998 tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
999 tpr->error_ack.UNIX_error));
1000 } else if (tpr->type == T_OK_ACK) {
1001 dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1002 tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
1003 } else {
1004 dprintso(so, 0,
1005 ("unexpected primitive %d, expected %d for %d\n",
1006 tpr->type, ack_prim, request_prim));
1007 }
1008 #endif /* DEBUG */
1009
1010 freemsg(mp);
1011 eprintsoline(so, EPROTO);
1012 return (EPROTO);
1013 }
1014
1015 /*
1016 * Wait for a T_OK_ACK for the specified primitive.
1017 */
1018 int
sowaitokack(struct sonode * so,t_scalar_t request_prim)1019 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1020 {
1021 mblk_t *mp;
1022 int error;
1023
1024 error = sowaitprim(so, request_prim, T_OK_ACK,
1025 (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1026 if (error)
1027 return (error);
1028 freemsg(mp);
1029 return (0);
1030 }
1031
1032 /*
1033 * Queue a received TPI ack message on sti_ack_mp.
1034 */
1035 void
soqueueack(struct sonode * so,mblk_t * mp)1036 soqueueack(struct sonode *so, mblk_t *mp)
1037 {
1038 sotpi_info_t *sti = SOTOTPI(so);
1039
1040 if (DB_TYPE(mp) != M_PCPROTO) {
1041 zcmn_err(getzoneid(), CE_WARN,
1042 "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1043 *(t_scalar_t *)mp->b_rptr);
1044 freemsg(mp);
1045 return;
1046 }
1047
1048 mutex_enter(&so->so_lock);
1049 if (sti->sti_ack_mp != NULL) {
1050 dprintso(so, 1, ("sti_ack_mp already set\n"));
1051 freemsg(sti->sti_ack_mp);
1052 sti->sti_ack_mp = NULL;
1053 }
1054 sti->sti_ack_mp = mp;
1055 cv_broadcast(&sti->sti_ack_cv);
1056 mutex_exit(&so->so_lock);
1057 }
1058
1059 /*
1060 * Wait for a TPI ack ignoring signals and errors.
1061 */
1062 int
sowaitack(struct sonode * so,mblk_t ** mpp,clock_t wait)1063 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1064 {
1065 sotpi_info_t *sti = SOTOTPI(so);
1066
1067 ASSERT(MUTEX_HELD(&so->so_lock));
1068
1069 while (sti->sti_ack_mp == NULL) {
1070 #ifdef SOCK_TEST
1071 if (wait == 0 && sock_test_timelimit != 0)
1072 wait = sock_test_timelimit;
1073 #endif
1074 if (wait != 0) {
1075 /*
1076 * Only wait for the time limit.
1077 */
1078 if (cv_reltimedwait(&sti->sti_ack_cv, &so->so_lock,
1079 wait, TR_CLOCK_TICK) == -1) {
1080 eprintsoline(so, ETIME);
1081 return (ETIME);
1082 }
1083 }
1084 else
1085 cv_wait(&sti->sti_ack_cv, &so->so_lock);
1086 }
1087 *mpp = sti->sti_ack_mp;
1088 #ifdef DEBUG
1089 {
1090 union T_primitives *tpr;
1091 mblk_t *mp = *mpp;
1092
1093 tpr = (union T_primitives *)mp->b_rptr;
1094 ASSERT(DB_TYPE(mp) == M_PCPROTO);
1095 ASSERT(tpr->type == T_OK_ACK ||
1096 tpr->type == T_ERROR_ACK ||
1097 tpr->type == T_BIND_ACK ||
1098 tpr->type == T_CAPABILITY_ACK ||
1099 tpr->type == T_INFO_ACK ||
1100 tpr->type == T_OPTMGMT_ACK);
1101 }
1102 #endif /* DEBUG */
1103 sti->sti_ack_mp = NULL;
1104 return (0);
1105 }
1106
1107 /*
1108 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
1109 */
1110 void
soqueueconnind(struct sonode * so,mblk_t * mp)1111 soqueueconnind(struct sonode *so, mblk_t *mp)
1112 {
1113 sotpi_info_t *sti = SOTOTPI(so);
1114
1115 if (DB_TYPE(mp) != M_PROTO) {
1116 zcmn_err(getzoneid(), CE_WARN,
1117 "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1118 freemsg(mp);
1119 return;
1120 }
1121
1122 mutex_enter(&so->so_lock);
1123 ASSERT(mp->b_next == NULL);
1124 if (sti->sti_conn_ind_head == NULL) {
1125 sti->sti_conn_ind_head = mp;
1126 } else {
1127 ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
1128 sti->sti_conn_ind_tail->b_next = mp;
1129 }
1130 sti->sti_conn_ind_tail = mp;
1131 /* Wakeup a single consumer of the T_CONN_IND */
1132 cv_signal(&so->so_acceptq_cv);
1133 mutex_exit(&so->so_lock);
1134 }
1135
1136 /*
1137 * Wait for a T_CONN_IND.
1138 * Don't wait if nonblocking.
1139 * Accept signals and socket errors.
1140 */
1141 int
sowaitconnind(struct sonode * so,int fmode,mblk_t ** mpp)1142 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1143 {
1144 mblk_t *mp;
1145 sotpi_info_t *sti = SOTOTPI(so);
1146 int error = 0;
1147
1148 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1149 mutex_enter(&so->so_lock);
1150 check_error:
1151 if (so->so_error) {
1152 error = sogeterr(so, B_TRUE);
1153 if (error) {
1154 mutex_exit(&so->so_lock);
1155 return (error);
1156 }
1157 }
1158
1159 if (sti->sti_conn_ind_head == NULL) {
1160 if (fmode & (FNDELAY|FNONBLOCK)) {
1161 error = EWOULDBLOCK;
1162 goto done;
1163 }
1164
1165 if (so->so_state & SS_CLOSING) {
1166 error = EINTR;
1167 goto done;
1168 }
1169
1170 if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
1171 error = EINTR;
1172 goto done;
1173 }
1174 goto check_error;
1175 }
1176 mp = sti->sti_conn_ind_head;
1177 sti->sti_conn_ind_head = mp->b_next;
1178 mp->b_next = NULL;
1179 if (sti->sti_conn_ind_head == NULL) {
1180 ASSERT(sti->sti_conn_ind_tail == mp);
1181 sti->sti_conn_ind_tail = NULL;
1182 }
1183 *mpp = mp;
1184 done:
1185 mutex_exit(&so->so_lock);
1186 return (error);
1187 }
1188
1189 /*
1190 * Flush a T_CONN_IND matching the sequence number from the list.
1191 * Return zero if found; non-zero otherwise.
1192 * This is called very infrequently thus it is ok to do a linear search.
1193 */
1194 int
soflushconnind(struct sonode * so,t_scalar_t seqno)1195 soflushconnind(struct sonode *so, t_scalar_t seqno)
1196 {
1197 mblk_t *prevmp, *mp;
1198 struct T_conn_ind *tci;
1199 sotpi_info_t *sti = SOTOTPI(so);
1200
1201 mutex_enter(&so->so_lock);
1202 for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
1203 prevmp = mp, mp = mp->b_next) {
1204 tci = (struct T_conn_ind *)mp->b_rptr;
1205 if (tci->SEQ_number == seqno) {
1206 dprintso(so, 1,
1207 ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1208 /* Deleting last? */
1209 if (sti->sti_conn_ind_tail == mp) {
1210 sti->sti_conn_ind_tail = prevmp;
1211 }
1212 if (prevmp == NULL) {
1213 /* Deleting first */
1214 sti->sti_conn_ind_head = mp->b_next;
1215 } else {
1216 prevmp->b_next = mp->b_next;
1217 }
1218 mp->b_next = NULL;
1219
1220 ASSERT((sti->sti_conn_ind_head == NULL &&
1221 sti->sti_conn_ind_tail == NULL) ||
1222 (sti->sti_conn_ind_head != NULL &&
1223 sti->sti_conn_ind_tail != NULL));
1224
1225 so->so_error = ECONNABORTED;
1226 mutex_exit(&so->so_lock);
1227
1228 freemsg(mp);
1229 return (0);
1230 }
1231 }
1232 mutex_exit(&so->so_lock);
1233 dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1234 return (-1);
1235 }
1236
1237 /*
1238 * Wait until the socket is connected or there is an error.
1239 * fmode should contain any nonblocking flags. nosig should be
1240 * set if the caller does not want the wait to be interrupted by a signal.
1241 */
1242 int
sowaitconnected(struct sonode * so,int fmode,int nosig)1243 sowaitconnected(struct sonode *so, int fmode, int nosig)
1244 {
1245 int error;
1246
1247 ASSERT(MUTEX_HELD(&so->so_lock));
1248
1249 while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1250 SS_ISCONNECTING && so->so_error == 0) {
1251
1252 dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
1253 (void *)so));
1254 if (fmode & (FNDELAY|FNONBLOCK))
1255 return (EINPROGRESS);
1256
1257 if (so->so_state & SS_CLOSING)
1258 return (EINTR);
1259
1260 if (nosig)
1261 cv_wait(&so->so_state_cv, &so->so_lock);
1262 else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1263 /*
1264 * Return EINTR and let the application use
1265 * nonblocking techniques for detecting when
1266 * the connection has been established.
1267 */
1268 return (EINTR);
1269 }
1270 dprintso(so, 1, ("awoken on %p\n", (void *)so));
1271 }
1272
1273 if (so->so_error != 0) {
1274 error = sogeterr(so, B_TRUE);
1275 ASSERT(error != 0);
1276 dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1277 return (error);
1278 }
1279 if (!(so->so_state & SS_ISCONNECTED)) {
1280 /*
1281 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1282 * zero errno. Or another thread could have consumed so_error
1283 * e.g. by calling read.
1284 */
1285 error = ECONNREFUSED;
1286 dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1287 return (error);
1288 }
1289 return (0);
1290 }
1291
1292
1293 /*
1294 * Handle the signal generation aspect of urgent data.
1295 */
1296 static void
so_oob_sig(struct sonode * so,int extrasig,strsigset_t * signals,strpollset_t * pollwakeups)1297 so_oob_sig(struct sonode *so, int extrasig,
1298 strsigset_t *signals, strpollset_t *pollwakeups)
1299 {
1300 sotpi_info_t *sti = SOTOTPI(so);
1301
1302 ASSERT(MUTEX_HELD(&so->so_lock));
1303
1304 ASSERT(so_verify_oobstate(so));
1305 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1306 if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
1307 /*
1308 * Signal has already been generated once for this
1309 * urgent "event". However, since TCP can receive updated
1310 * urgent pointers we still generate a signal.
1311 */
1312 ASSERT(so->so_state & SS_OOBPEND);
1313 if (extrasig) {
1314 *signals |= S_RDBAND;
1315 *pollwakeups |= POLLRDBAND;
1316 }
1317 return;
1318 }
1319
1320 sti->sti_oobsigcnt++;
1321 ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */
1322 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
1323
1324 /*
1325 * Record (for select/poll) that urgent data is pending.
1326 */
1327 so->so_state |= SS_OOBPEND;
1328 /*
1329 * New urgent data on the way so forget about any old
1330 * urgent data.
1331 */
1332 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1333 if (so->so_oobmsg != NULL) {
1334 dprintso(so, 1, ("sock: discarding old oob\n"));
1335 freemsg(so->so_oobmsg);
1336 so->so_oobmsg = NULL;
1337 }
1338 *signals |= S_RDBAND;
1339 *pollwakeups |= POLLRDBAND;
1340 ASSERT(so_verify_oobstate(so));
1341 }
1342
1343 /*
1344 * Handle the processing of the T_EXDATA_IND with urgent data.
1345 * Returns the T_EXDATA_IND if it should be queued on the read queue.
1346 */
1347 /* ARGSUSED2 */
1348 static mblk_t *
so_oob_exdata(struct sonode * so,mblk_t * mp,strsigset_t * signals,strpollset_t * pollwakeups)1349 so_oob_exdata(struct sonode *so, mblk_t *mp,
1350 strsigset_t *signals, strpollset_t *pollwakeups)
1351 {
1352 sotpi_info_t *sti = SOTOTPI(so);
1353
1354 ASSERT(MUTEX_HELD(&so->so_lock));
1355
1356 ASSERT(so_verify_oobstate(so));
1357
1358 ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
1359
1360 sti->sti_oobcnt++;
1361 ASSERT(sti->sti_oobcnt > 0); /* wraparound? */
1362 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1363
1364 /*
1365 * Set MSGMARK for SIOCATMARK.
1366 */
1367 mp->b_flag |= MSGMARK;
1368
1369 ASSERT(so_verify_oobstate(so));
1370 return (mp);
1371 }
1372
1373 /*
1374 * Handle the processing of the actual urgent data.
1375 * Returns the data mblk if it should be queued on the read queue.
1376 */
1377 static mblk_t *
so_oob_data(struct sonode * so,mblk_t * mp,strsigset_t * signals,strpollset_t * pollwakeups)1378 so_oob_data(struct sonode *so, mblk_t *mp,
1379 strsigset_t *signals, strpollset_t *pollwakeups)
1380 {
1381 sotpi_info_t *sti = SOTOTPI(so);
1382
1383 ASSERT(MUTEX_HELD(&so->so_lock));
1384
1385 ASSERT(so_verify_oobstate(so));
1386
1387 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1388 ASSERT(mp != NULL);
1389 /*
1390 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1391 * Otherwise we store it in so_oobmsg.
1392 */
1393 ASSERT(so->so_oobmsg == NULL);
1394 if (so->so_options & SO_OOBINLINE) {
1395 *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1396 *signals |= S_INPUT | S_RDNORM;
1397 } else {
1398 *pollwakeups |= POLLRDBAND;
1399 so->so_state |= SS_HAVEOOBDATA;
1400 so->so_oobmsg = mp;
1401 mp = NULL;
1402 }
1403 ASSERT(so_verify_oobstate(so));
1404 return (mp);
1405 }
1406
1407 /*
1408 * Caller must hold the mutex.
1409 * For delayed processing, save the T_DISCON_IND received
1410 * from below on sti_discon_ind_mp.
1411 * When the message is processed the framework will call:
1412 * (*func)(so, mp);
1413 */
1414 static void
so_save_discon_ind(struct sonode * so,mblk_t * mp,void (* func)(struct sonode * so,mblk_t *))1415 so_save_discon_ind(struct sonode *so,
1416 mblk_t *mp,
1417 void (*func)(struct sonode *so, mblk_t *))
1418 {
1419 sotpi_info_t *sti = SOTOTPI(so);
1420
1421 ASSERT(MUTEX_HELD(&so->so_lock));
1422
1423 /*
1424 * Discard new T_DISCON_IND if we have already received another.
1425 * Currently the earlier message can either be on sti_discon_ind_mp
1426 * or being processed.
1427 */
1428 if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1429 zcmn_err(getzoneid(), CE_WARN,
1430 "sockfs: received unexpected additional T_DISCON_IND\n");
1431 freemsg(mp);
1432 return;
1433 }
1434 mp->b_prev = (mblk_t *)func;
1435 mp->b_next = NULL;
1436 sti->sti_discon_ind_mp = mp;
1437 }
1438
1439 /*
1440 * Caller must hold the mutex and make sure that either SOLOCKED
1441 * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1442 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
1443 * Need to ensure that strsock_proto() will not end up sleeping for
1444 * SOASYNC_UNBIND, while executing this function.
1445 */
1446 void
so_drain_discon_ind(struct sonode * so)1447 so_drain_discon_ind(struct sonode *so)
1448 {
1449 mblk_t *bp;
1450 void (*func)(struct sonode *so, mblk_t *);
1451 sotpi_info_t *sti = SOTOTPI(so);
1452
1453 ASSERT(MUTEX_HELD(&so->so_lock));
1454 ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1455
1456 /* Process T_DISCON_IND on sti_discon_ind_mp */
1457 if ((bp = sti->sti_discon_ind_mp) != NULL) {
1458 sti->sti_discon_ind_mp = NULL;
1459 func = (void (*)())bp->b_prev;
1460 bp->b_prev = NULL;
1461
1462 /*
1463 * This (*func) is supposed to generate a message downstream
1464 * and we need to have a flag set until the corresponding
1465 * upstream message reaches stream head.
1466 * When processing T_DISCON_IND in strsock_discon_ind
1467 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1468 * drop the flag after we get the ACK in strsock_proto.
1469 */
1470 (void) (*func)(so, bp);
1471 }
1472 }
1473
1474 /*
1475 * Caller must hold the mutex.
1476 * Remove the T_DISCON_IND on sti_discon_ind_mp.
1477 */
1478 void
so_flush_discon_ind(struct sonode * so)1479 so_flush_discon_ind(struct sonode *so)
1480 {
1481 mblk_t *bp;
1482 sotpi_info_t *sti = SOTOTPI(so);
1483
1484 ASSERT(MUTEX_HELD(&so->so_lock));
1485
1486 /*
1487 * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
1488 */
1489 if ((bp = sti->sti_discon_ind_mp) != NULL) {
1490 sti->sti_discon_ind_mp = NULL;
1491 bp->b_prev = NULL;
1492 freemsg(bp);
1493 }
1494 }
1495
1496 /*
1497 * Caller must hold the mutex.
1498 *
1499 * This function is used to process the T_DISCON_IND message. It does
1500 * immediate processing when called from strsock_proto and delayed
1501 * processing of discon_ind saved on sti_discon_ind_mp when called from
1502 * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1503 * sti_discon_ind_mp for delayed processing, this function is registered
1504 * as the callback function to process the message.
1505 *
1506 * SOASYNC_UNBIND should be held in this function, during the non-blocking
1507 * unbind operation, and should be released only after we receive the ACK
1508 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1509 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1510 * sent from either this function or tcp_unbind(), flushing away any TPI
1511 * message that is being sent down and stays in a lower module's queue.
1512 *
1513 * This function drops so_lock and grabs it again.
1514 */
1515 static void
strsock_discon_ind(struct sonode * so,mblk_t * discon_mp)1516 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1517 {
1518 struct vnode *vp;
1519 struct stdata *stp;
1520 union T_primitives *tpr;
1521 struct T_unbind_req *ubr;
1522 mblk_t *mp;
1523 int error;
1524 sotpi_info_t *sti = SOTOTPI(so);
1525
1526 ASSERT(MUTEX_HELD(&so->so_lock));
1527 ASSERT(discon_mp);
1528 ASSERT(discon_mp->b_rptr);
1529
1530 tpr = (union T_primitives *)discon_mp->b_rptr;
1531 ASSERT(tpr->type == T_DISCON_IND);
1532
1533 vp = SOTOV(so);
1534 stp = vp->v_stream;
1535 ASSERT(stp);
1536
1537 /*
1538 * Not a listener
1539 */
1540 ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1541
1542 /*
1543 * This assumes that the name space for DISCON_reason
1544 * is the errno name space.
1545 */
1546 soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1547 sti->sti_laddr_valid = 0;
1548 sti->sti_faddr_valid = 0;
1549
1550 /*
1551 * Unbind with the transport without blocking.
1552 * If we've already received a T_DISCON_IND do not unbind.
1553 *
1554 * If there is no preallocated unbind message, we have already
1555 * unbound with the transport
1556 *
1557 * If the socket is not bound, no need to unbind.
1558 */
1559 mp = sti->sti_unbind_mp;
1560 if (mp == NULL) {
1561 ASSERT(!(so->so_state & SS_ISBOUND));
1562 mutex_exit(&so->so_lock);
1563 } else if (!(so->so_state & SS_ISBOUND)) {
1564 mutex_exit(&so->so_lock);
1565 } else {
1566 sti->sti_unbind_mp = NULL;
1567
1568 /*
1569 * Is another T_DISCON_IND being processed.
1570 */
1571 ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1572
1573 /*
1574 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1575 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1576 * only after we receive the ACK in strsock_proto.
1577 */
1578 so->so_flag |= SOASYNC_UNBIND;
1579 ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1580 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1581 sti->sti_laddr_valid = 0;
1582 mutex_exit(&so->so_lock);
1583
1584 /*
1585 * Send down T_UNBIND_REQ ignoring flow control.
1586 * XXX Assumes that MSG_IGNFLOW implies that this thread
1587 * does not run service procedures.
1588 */
1589 ASSERT(DB_TYPE(mp) == M_PROTO);
1590 ubr = (struct T_unbind_req *)mp->b_rptr;
1591 mp->b_wptr += sizeof (*ubr);
1592 ubr->PRIM_type = T_UNBIND_REQ;
1593
1594 /*
1595 * Flush the read and write side (except stream head read queue)
1596 * and send down T_UNBIND_REQ.
1597 */
1598 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1599 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1600 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1601 /* LINTED - warning: statement has no consequent: if */
1602 if (error) {
1603 eprintsoline(so, error);
1604 }
1605 }
1606
1607 if (tpr->discon_ind.DISCON_reason != 0)
1608 strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1609 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1610 strseteof(SOTOV(so), 1);
1611 /*
1612 * strseteof takes care of read side wakeups,
1613 * pollwakeups, and signals.
1614 */
1615 dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1616 freemsg(discon_mp);
1617
1618
1619 pollwakeup(&stp->sd_pollist, POLLOUT);
1620 mutex_enter(&stp->sd_lock);
1621
1622 /*
1623 * Wake sleeping write
1624 */
1625 if (stp->sd_flag & WSLEEP) {
1626 stp->sd_flag &= ~WSLEEP;
1627 cv_broadcast(&stp->sd_wrq->q_wait);
1628 }
1629
1630 /*
1631 * strsendsig can handle multiple signals with a
1632 * single call. Send SIGPOLL for S_OUTPUT event.
1633 */
1634 if (stp->sd_sigflags & S_OUTPUT)
1635 strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1636
1637 mutex_exit(&stp->sd_lock);
1638 mutex_enter(&so->so_lock);
1639 }
1640
1641 /*
1642 * This routine is registered with the stream head to receive M_PROTO
1643 * and M_PCPROTO messages.
1644 *
1645 * Returns NULL if the message was consumed.
1646 * Returns an mblk to make that mblk be processed (and queued) by the stream
1647 * head.
1648 *
1649 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1650 * *pollwakeups) for the stream head to take action on. Note that since
1651 * sockets always deliver SIGIO for every new piece of data this routine
1652 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1653 *
1654 * This routine handles all data related TPI messages independent of
1655 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1656 * arrive on a SOCK_STREAM.
1657 */
1658 static mblk_t *
strsock_proto(vnode_t * vp,mblk_t * mp,strwakeup_t * wakeups,strsigset_t * firstmsgsigs,strsigset_t * allmsgsigs,strpollset_t * pollwakeups)1659 strsock_proto(vnode_t *vp, mblk_t *mp,
1660 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1661 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1662 {
1663 union T_primitives *tpr;
1664 struct sonode *so;
1665 sotpi_info_t *sti;
1666 uint32_t auditing = AU_AUDITING();
1667
1668 so = VTOSO(vp);
1669 sti = SOTOTPI(so);
1670
1671 dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
1672
1673 /* Set default return values */
1674 *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1675
1676 ASSERT(DB_TYPE(mp) == M_PROTO ||
1677 DB_TYPE(mp) == M_PCPROTO);
1678
1679 if (MBLKL(mp) < sizeof (tpr->type)) {
1680 /* The message is too short to even contain the primitive */
1681 zcmn_err(getzoneid(), CE_WARN,
1682 "sockfs: Too short TPI message received. Len = %ld\n",
1683 (ptrdiff_t)(MBLKL(mp)));
1684 freemsg(mp);
1685 return (NULL);
1686 }
1687 if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1688 /* The read pointer is not aligned correctly for TPI */
1689 zcmn_err(getzoneid(), CE_WARN,
1690 "sockfs: Unaligned TPI message received. rptr = %p\n",
1691 (void *)mp->b_rptr);
1692 freemsg(mp);
1693 return (NULL);
1694 }
1695 tpr = (union T_primitives *)mp->b_rptr;
1696 dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1697
1698 switch (tpr->type) {
1699
1700 case T_DATA_IND:
1701 if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1702 zcmn_err(getzoneid(), CE_WARN,
1703 "sockfs: Too short T_DATA_IND. Len = %ld\n",
1704 (ptrdiff_t)(MBLKL(mp)));
1705 freemsg(mp);
1706 return (NULL);
1707 }
1708 /*
1709 * Ignore zero-length T_DATA_IND messages. These might be
1710 * generated by some transports.
1711 * This is needed to prevent read (which skips the M_PROTO
1712 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1713 * on a non-blocking socket after select/poll has indicated
1714 * that data is available).
1715 */
1716 if (msgdsize(mp->b_cont) == 0) {
1717 dprintso(so, 0,
1718 ("strsock_proto: zero length T_DATA_IND\n"));
1719 freemsg(mp);
1720 return (NULL);
1721 }
1722 *allmsgsigs = S_INPUT | S_RDNORM;
1723 *pollwakeups = POLLIN | POLLRDNORM;
1724 *wakeups = RSLEEP;
1725 return (mp);
1726
1727 case T_UNITDATA_IND: {
1728 struct T_unitdata_ind *tudi = &tpr->unitdata_ind;
1729 void *addr;
1730 t_uscalar_t addrlen;
1731
1732 if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1733 zcmn_err(getzoneid(), CE_WARN,
1734 "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1735 (ptrdiff_t)(MBLKL(mp)));
1736 freemsg(mp);
1737 return (NULL);
1738 }
1739
1740 /* Is this is not a connected datagram socket? */
1741 if ((so->so_mode & SM_CONNREQUIRED) ||
1742 !(so->so_state & SS_ISCONNECTED)) {
1743 /*
1744 * Not a connected datagram socket. Look for
1745 * the SO_UNIX_CLOSE option. If such an option is found
1746 * discard the message (since it has no meaning
1747 * unless connected).
1748 */
1749 if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1750 tudi->OPT_length != 0) {
1751 void *opt;
1752 t_uscalar_t optlen = tudi->OPT_length;
1753
1754 opt = sogetoff(mp, tudi->OPT_offset,
1755 optlen, __TPI_ALIGN_SIZE);
1756 if (opt == NULL) {
1757 /* The len/off falls outside mp */
1758 freemsg(mp);
1759 mutex_enter(&so->so_lock);
1760 soseterror(so, EPROTO);
1761 mutex_exit(&so->so_lock);
1762 zcmn_err(getzoneid(), CE_WARN,
1763 "sockfs: T_unidata_ind with "
1764 "invalid optlen/offset %u/%d\n",
1765 optlen, tudi->OPT_offset);
1766 return (NULL);
1767 }
1768 if (so_getopt_unix_close(opt, optlen)) {
1769 freemsg(mp);
1770 return (NULL);
1771 }
1772 }
1773 *allmsgsigs = S_INPUT | S_RDNORM;
1774 *pollwakeups = POLLIN | POLLRDNORM;
1775 *wakeups = RSLEEP;
1776 if (auditing)
1777 audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1778 mp, 0);
1779 return (mp);
1780 }
1781
1782 /*
1783 * A connect datagram socket. For AF_INET{,6} we verify that
1784 * the source address matches the "connected to" address.
1785 * The semantics of AF_UNIX sockets is to not verify
1786 * the source address.
1787 * Note that this source address verification is transport
1788 * specific. Thus the real fix would be to extent TPI
1789 * to allow T_CONN_REQ messages to be send to connectionless
1790 * transport providers and always let the transport provider
1791 * do whatever filtering is needed.
1792 *
1793 * The verification/filtering semantics for transports
1794 * other than AF_INET and AF_UNIX are unknown. The choice
1795 * would be to either filter using bcmp or let all messages
1796 * get through. This code does not filter other address
1797 * families since this at least allows the application to
1798 * work around any missing filtering.
1799 *
1800 * XXX Should we move filtering to UDP/ICMP???
1801 * That would require passing e.g. a T_DISCON_REQ to UDP
1802 * when the socket becomes unconnected.
1803 */
1804 addrlen = tudi->SRC_length;
1805 /*
1806 * The alignment restriction is really to strict but
1807 * we want enough alignment to inspect the fields of
1808 * a sockaddr_in.
1809 */
1810 addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1811 __TPI_ALIGN_SIZE);
1812 if (addr == NULL) {
1813 freemsg(mp);
1814 mutex_enter(&so->so_lock);
1815 soseterror(so, EPROTO);
1816 mutex_exit(&so->so_lock);
1817 zcmn_err(getzoneid(), CE_WARN,
1818 "sockfs: T_unidata_ind with invalid "
1819 "addrlen/offset %u/%d\n",
1820 addrlen, tudi->SRC_offset);
1821 return (NULL);
1822 }
1823
1824 if (so->so_family == AF_INET) {
1825 /*
1826 * For AF_INET we allow wildcarding both sin_addr
1827 * and sin_port.
1828 */
1829 struct sockaddr_in *faddr, *sin;
1830
1831 /* Prevent sti_faddr_sa from changing while accessed */
1832 mutex_enter(&so->so_lock);
1833 ASSERT(sti->sti_faddr_len ==
1834 (socklen_t)sizeof (struct sockaddr_in));
1835 faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
1836 sin = (struct sockaddr_in *)addr;
1837 if (addrlen !=
1838 (t_uscalar_t)sizeof (struct sockaddr_in) ||
1839 (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1840 faddr->sin_addr.s_addr != INADDR_ANY) ||
1841 (so->so_type != SOCK_RAW &&
1842 sin->sin_port != faddr->sin_port &&
1843 faddr->sin_port != 0)) {
1844 #ifdef DEBUG
1845 dprintso(so, 0,
1846 ("sockfs: T_UNITDATA_IND mismatch: %s",
1847 pr_addr(so->so_family,
1848 (struct sockaddr *)addr, addrlen)));
1849 dprintso(so, 0, (" - %s\n",
1850 pr_addr(so->so_family, sti->sti_faddr_sa,
1851 (t_uscalar_t)sti->sti_faddr_len)));
1852 #endif /* DEBUG */
1853 mutex_exit(&so->so_lock);
1854 freemsg(mp);
1855 return (NULL);
1856 }
1857 mutex_exit(&so->so_lock);
1858 } else if (so->so_family == AF_INET6) {
1859 /*
1860 * For AF_INET6 we allow wildcarding both sin6_addr
1861 * and sin6_port.
1862 */
1863 struct sockaddr_in6 *faddr6, *sin6;
1864 static struct in6_addr zeroes; /* inits to all zeros */
1865
1866 /* Prevent sti_faddr_sa from changing while accessed */
1867 mutex_enter(&so->so_lock);
1868 ASSERT(sti->sti_faddr_len ==
1869 (socklen_t)sizeof (struct sockaddr_in6));
1870 faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
1871 sin6 = (struct sockaddr_in6 *)addr;
1872 /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1873 if (addrlen !=
1874 (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1875 (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1876 &faddr6->sin6_addr) &&
1877 !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1878 (so->so_type != SOCK_RAW &&
1879 sin6->sin6_port != faddr6->sin6_port &&
1880 faddr6->sin6_port != 0)) {
1881 #ifdef DEBUG
1882 dprintso(so, 0,
1883 ("sockfs: T_UNITDATA_IND mismatch: %s",
1884 pr_addr(so->so_family,
1885 (struct sockaddr *)addr, addrlen)));
1886 dprintso(so, 0, (" - %s\n",
1887 pr_addr(so->so_family, sti->sti_faddr_sa,
1888 (t_uscalar_t)sti->sti_faddr_len)));
1889 #endif /* DEBUG */
1890 mutex_exit(&so->so_lock);
1891 freemsg(mp);
1892 return (NULL);
1893 }
1894 mutex_exit(&so->so_lock);
1895 } else if (so->so_family == AF_UNIX &&
1896 msgdsize(mp->b_cont) == 0 &&
1897 tudi->OPT_length != 0) {
1898 /*
1899 * Attempt to extract AF_UNIX
1900 * SO_UNIX_CLOSE indication from options.
1901 */
1902 void *opt;
1903 t_uscalar_t optlen = tudi->OPT_length;
1904
1905 opt = sogetoff(mp, tudi->OPT_offset,
1906 optlen, __TPI_ALIGN_SIZE);
1907 if (opt == NULL) {
1908 /* The len/off falls outside mp */
1909 freemsg(mp);
1910 mutex_enter(&so->so_lock);
1911 soseterror(so, EPROTO);
1912 mutex_exit(&so->so_lock);
1913 zcmn_err(getzoneid(), CE_WARN,
1914 "sockfs: T_unidata_ind with invalid "
1915 "optlen/offset %u/%d\n",
1916 optlen, tudi->OPT_offset);
1917 return (NULL);
1918 }
1919 /*
1920 * If we received a unix close indication mark the
1921 * socket and discard this message.
1922 */
1923 if (so_getopt_unix_close(opt, optlen)) {
1924 mutex_enter(&so->so_lock);
1925 sobreakconn(so, ECONNRESET);
1926 mutex_exit(&so->so_lock);
1927 strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1928 freemsg(mp);
1929 *pollwakeups = POLLIN | POLLRDNORM;
1930 *allmsgsigs = S_INPUT | S_RDNORM;
1931 *wakeups = RSLEEP;
1932 return (NULL);
1933 }
1934 }
1935 *allmsgsigs = S_INPUT | S_RDNORM;
1936 *pollwakeups = POLLIN | POLLRDNORM;
1937 *wakeups = RSLEEP;
1938 return (mp);
1939 }
1940
1941 case T_OPTDATA_IND: {
1942 struct T_optdata_ind *tdi = &tpr->optdata_ind;
1943
1944 if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1945 zcmn_err(getzoneid(), CE_WARN,
1946 "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1947 (ptrdiff_t)(MBLKL(mp)));
1948 freemsg(mp);
1949 return (NULL);
1950 }
1951 /*
1952 * Allow zero-length messages carrying options.
1953 * This is used when carrying the SO_UNIX_CLOSE option.
1954 */
1955 if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1956 tdi->OPT_length != 0) {
1957 /*
1958 * Attempt to extract AF_UNIX close indication
1959 * from the options. Ignore any other options -
1960 * those are handled once the message is removed
1961 * from the queue.
1962 * The close indication message should not carry data.
1963 */
1964 void *opt;
1965 t_uscalar_t optlen = tdi->OPT_length;
1966
1967 opt = sogetoff(mp, tdi->OPT_offset,
1968 optlen, __TPI_ALIGN_SIZE);
1969 if (opt == NULL) {
1970 /* The len/off falls outside mp */
1971 freemsg(mp);
1972 mutex_enter(&so->so_lock);
1973 soseterror(so, EPROTO);
1974 mutex_exit(&so->so_lock);
1975 zcmn_err(getzoneid(), CE_WARN,
1976 "sockfs: T_optdata_ind with invalid "
1977 "optlen/offset %u/%d\n",
1978 optlen, tdi->OPT_offset);
1979 return (NULL);
1980 }
1981 /*
1982 * If we received a close indication mark the
1983 * socket and discard this message.
1984 */
1985 if (so_getopt_unix_close(opt, optlen)) {
1986 mutex_enter(&so->so_lock);
1987 socantsendmore(so);
1988 sti->sti_faddr_valid = 0;
1989 mutex_exit(&so->so_lock);
1990 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1991 freemsg(mp);
1992 return (NULL);
1993 }
1994 }
1995 *allmsgsigs = S_INPUT | S_RDNORM;
1996 *pollwakeups = POLLIN | POLLRDNORM;
1997 *wakeups = RSLEEP;
1998 return (mp);
1999 }
2000
2001 case T_EXDATA_IND: {
2002 mblk_t *mctl, *mdata;
2003 mblk_t *lbp;
2004 union T_primitives *tprp;
2005 struct stdata *stp;
2006 queue_t *qp;
2007
2008 if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2009 zcmn_err(getzoneid(), CE_WARN,
2010 "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2011 (ptrdiff_t)(MBLKL(mp)));
2012 freemsg(mp);
2013 return (NULL);
2014 }
2015 /*
2016 * Ignore zero-length T_EXDATA_IND messages. These might be
2017 * generated by some transports.
2018 *
2019 * This is needed to prevent read (which skips the M_PROTO
2020 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2021 * on a non-blocking socket after select/poll has indicated
2022 * that data is available).
2023 */
2024 dprintso(so, 1,
2025 ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2026 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2027 pr_state(so->so_state, so->so_mode)));
2028
2029 if (msgdsize(mp->b_cont) == 0) {
2030 dprintso(so, 0,
2031 ("strsock_proto: zero length T_EXDATA_IND\n"));
2032 freemsg(mp);
2033 return (NULL);
2034 }
2035
2036 /*
2037 * Split into the T_EXDATA_IND and the M_DATA part.
2038 * We process these three pieces separately:
2039 * signal generation
2040 * handling T_EXDATA_IND
2041 * handling M_DATA component
2042 */
2043 mctl = mp;
2044 mdata = mctl->b_cont;
2045 mctl->b_cont = NULL;
2046 mutex_enter(&so->so_lock);
2047 so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2048 mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2049 mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2050
2051 stp = vp->v_stream;
2052 ASSERT(stp != NULL);
2053 qp = _RD(stp->sd_wrq);
2054
2055 mutex_enter(QLOCK(qp));
2056 lbp = qp->q_last;
2057
2058 /*
2059 * We want to avoid queueing up a string of T_EXDATA_IND
2060 * messages with no intervening data messages at the stream
2061 * head. These messages contribute to the total message
2062 * count. Eventually this can lead to STREAMS flow contol
2063 * and also cause TCP to advertise a zero window condition
2064 * to the peer. This can happen in the degenerate case where
2065 * the sender and receiver exchange only OOB data. The sender
2066 * only sends messages with MSG_OOB flag and the receiver
2067 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2068 * An example of this scenario has been reported in applications
2069 * that use OOB data to exchange heart beats. Flow control
2070 * relief will never happen if the application only reads OOB
2071 * data which is done directly by sorecvoob() and the
2072 * T_EXDATA_IND messages at the streamhead won't be consumed.
2073 * Note that there is no correctness issue in compressing the
2074 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2075 * message. A single read that does not specify MSG_OOB will
2076 * read across all the marks in a loop in sotpi_recvmsg().
2077 * Each mark is individually distinguishable only if the
2078 * T_EXDATA_IND messages are separated by data messages.
2079 */
2080 if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2081 tprp = (union T_primitives *)lbp->b_rptr;
2082 if ((tprp->type == T_EXDATA_IND) &&
2083 !(so->so_options & SO_OOBINLINE)) {
2084
2085 /*
2086 * free the new M_PROTO message
2087 */
2088 freemsg(mctl);
2089
2090 /*
2091 * adjust the OOB count and OOB signal count
2092 * just incremented for the new OOB data.
2093 */
2094 sti->sti_oobcnt--;
2095 sti->sti_oobsigcnt--;
2096 mutex_exit(QLOCK(qp));
2097 mutex_exit(&so->so_lock);
2098 return (NULL);
2099 }
2100 }
2101 mutex_exit(QLOCK(qp));
2102
2103 /*
2104 * Pass the T_EXDATA_IND and the M_DATA back separately
2105 * by using b_next linkage. (The stream head will queue any
2106 * b_next linked messages separately.) This is needed
2107 * since MSGMARK applies to the last by of the message
2108 * hence we can not have any M_DATA component attached
2109 * to the marked T_EXDATA_IND. Note that the stream head
2110 * will not consolidate M_DATA messages onto an MSGMARK'ed
2111 * message in order to preserve the constraint that
2112 * the T_EXDATA_IND always is a separate message.
2113 */
2114 ASSERT(mctl != NULL);
2115 mctl->b_next = mdata;
2116 mp = mctl;
2117 #ifdef DEBUG
2118 if (mdata == NULL) {
2119 dprintso(so, 1,
2120 ("after outofline T_EXDATA_IND(%p): "
2121 "counts %d/%d poll 0x%x sig 0x%x state %s\n",
2122 (void *)vp, sti->sti_oobsigcnt,
2123 sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
2124 pr_state(so->so_state, so->so_mode)));
2125 } else {
2126 dprintso(so, 1,
2127 ("after inline T_EXDATA_IND(%p): "
2128 "counts %d/%d poll 0x%x sig 0x%x state %s\n",
2129 (void *)vp, sti->sti_oobsigcnt,
2130 sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
2131 pr_state(so->so_state, so->so_mode)));
2132 }
2133 #endif /* DEBUG */
2134 mutex_exit(&so->so_lock);
2135 *wakeups = RSLEEP;
2136 return (mp);
2137 }
2138
2139 case T_CONN_CON: {
2140 struct T_conn_con *conn_con;
2141 void *addr;
2142 t_uscalar_t addrlen;
2143
2144 /*
2145 * Verify the state, update the state to ISCONNECTED,
2146 * record the potentially new address in the message,
2147 * and drop the message.
2148 */
2149 if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2150 zcmn_err(getzoneid(), CE_WARN,
2151 "sockfs: Too short T_CONN_CON. Len = %ld\n",
2152 (ptrdiff_t)(MBLKL(mp)));
2153 freemsg(mp);
2154 return (NULL);
2155 }
2156
2157 mutex_enter(&so->so_lock);
2158 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2159 SS_ISCONNECTING) {
2160 mutex_exit(&so->so_lock);
2161 dprintso(so, 1,
2162 ("T_CONN_CON: state %x\n", so->so_state));
2163 freemsg(mp);
2164 return (NULL);
2165 }
2166
2167 conn_con = &tpr->conn_con;
2168 addrlen = conn_con->RES_length;
2169 /*
2170 * Allow the address to be of different size than sent down
2171 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2172 * For AF_UNIX require the identical length.
2173 */
2174 if (so->so_family == AF_UNIX ?
2175 addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
2176 addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
2177 zcmn_err(getzoneid(), CE_WARN,
2178 "sockfs: T_conn_con with different "
2179 "length %u/%d\n",
2180 addrlen, conn_con->RES_length);
2181 soisdisconnected(so, EPROTO);
2182 sti->sti_laddr_valid = 0;
2183 sti->sti_faddr_valid = 0;
2184 mutex_exit(&so->so_lock);
2185 strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2186 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2187 strseteof(SOTOV(so), 1);
2188 freemsg(mp);
2189 /*
2190 * strseteof takes care of read side wakeups,
2191 * pollwakeups, and signals.
2192 */
2193 *wakeups = WSLEEP;
2194 *allmsgsigs = S_OUTPUT;
2195 *pollwakeups = POLLOUT;
2196 return (NULL);
2197 }
2198 addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2199 if (addr == NULL) {
2200 zcmn_err(getzoneid(), CE_WARN,
2201 "sockfs: T_conn_con with invalid "
2202 "addrlen/offset %u/%d\n",
2203 addrlen, conn_con->RES_offset);
2204 mutex_exit(&so->so_lock);
2205 strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2206 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2207 strseteof(SOTOV(so), 1);
2208 freemsg(mp);
2209 /*
2210 * strseteof takes care of read side wakeups,
2211 * pollwakeups, and signals.
2212 */
2213 *wakeups = WSLEEP;
2214 *allmsgsigs = S_OUTPUT;
2215 *pollwakeups = POLLOUT;
2216 return (NULL);
2217 }
2218
2219 /*
2220 * Save for getpeername.
2221 */
2222 if (so->so_family != AF_UNIX) {
2223 sti->sti_faddr_len = (socklen_t)addrlen;
2224 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
2225 bcopy(addr, sti->sti_faddr_sa, addrlen);
2226 sti->sti_faddr_valid = 1;
2227 }
2228
2229 if (so->so_peercred != NULL)
2230 crfree(so->so_peercred);
2231 so->so_peercred = msg_getcred(mp, &so->so_cpid);
2232 if (so->so_peercred != NULL)
2233 crhold(so->so_peercred);
2234
2235 /* Wakeup anybody sleeping in sowaitconnected */
2236 soisconnected(so);
2237 mutex_exit(&so->so_lock);
2238
2239 /*
2240 * The socket is now available for sending data.
2241 */
2242 *wakeups = WSLEEP;
2243 *allmsgsigs = S_OUTPUT;
2244 *pollwakeups = POLLOUT;
2245 freemsg(mp);
2246 return (NULL);
2247 }
2248
2249 case T_CONN_IND:
2250 /*
2251 * Verify the min size and queue the message on
2252 * the sti_conn_ind_head/tail list.
2253 */
2254 if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2255 zcmn_err(getzoneid(), CE_WARN,
2256 "sockfs: Too short T_CONN_IND. Len = %ld\n",
2257 (ptrdiff_t)(MBLKL(mp)));
2258 freemsg(mp);
2259 return (NULL);
2260 }
2261
2262 if (auditing)
2263 audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2264 if (!(so->so_state & SS_ACCEPTCONN)) {
2265 zcmn_err(getzoneid(), CE_WARN,
2266 "sockfs: T_conn_ind on non-listening socket\n");
2267 freemsg(mp);
2268 return (NULL);
2269 }
2270
2271 soqueueconnind(so, mp);
2272 *allmsgsigs = S_INPUT | S_RDNORM;
2273 *pollwakeups = POLLIN | POLLRDNORM;
2274 *wakeups = RSLEEP;
2275 return (NULL);
2276
2277 case T_ORDREL_IND:
2278 if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2279 zcmn_err(getzoneid(), CE_WARN,
2280 "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2281 (ptrdiff_t)(MBLKL(mp)));
2282 freemsg(mp);
2283 return (NULL);
2284 }
2285
2286 /*
2287 * Some providers send this when not fully connected.
2288 * SunLink X.25 needs to retrieve disconnect reason after
2289 * disconnect for compatibility. It uses T_ORDREL_IND
2290 * instead of T_DISCON_IND so that it may use the
2291 * endpoint after a connect failure to retrieve the
2292 * reason using an ioctl. Thus we explicitly clear
2293 * SS_ISCONNECTING here for SunLink X.25.
2294 * This is a needed TPI violation.
2295 */
2296 mutex_enter(&so->so_lock);
2297 so->so_state &= ~SS_ISCONNECTING;
2298 socantrcvmore(so);
2299 mutex_exit(&so->so_lock);
2300 strseteof(SOTOV(so), 1);
2301 /*
2302 * strseteof takes care of read side wakeups,
2303 * pollwakeups, and signals.
2304 */
2305 freemsg(mp);
2306 return (NULL);
2307
2308 case T_DISCON_IND:
2309 if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2310 zcmn_err(getzoneid(), CE_WARN,
2311 "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2312 (ptrdiff_t)(MBLKL(mp)));
2313 freemsg(mp);
2314 return (NULL);
2315 }
2316 if (so->so_state & SS_ACCEPTCONN) {
2317 /*
2318 * This is a listener. Look for a queued T_CONN_IND
2319 * with a matching sequence number and remove it
2320 * from the list.
2321 * It is normal to not find the sequence number since
2322 * the soaccept might have already dequeued it
2323 * (in which case the T_CONN_RES will fail with
2324 * TBADSEQ).
2325 */
2326 (void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2327 freemsg(mp);
2328 return (0);
2329 }
2330
2331 /*
2332 * Not a listener
2333 *
2334 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2335 * Such a discon_ind appears when the peer has first done
2336 * a shutdown() followed by a close() in which case we just
2337 * want to record socantsendmore.
2338 * In this case sockfs first receives a T_ORDREL_IND followed
2339 * by a T_DISCON_IND.
2340 * Note that for other transports (e.g. TCP) we need to handle
2341 * the discon_ind in this case since it signals an error.
2342 */
2343 mutex_enter(&so->so_lock);
2344 if ((so->so_state & SS_CANTRCVMORE) &&
2345 (so->so_family == AF_UNIX)) {
2346 socantsendmore(so);
2347 sti->sti_faddr_valid = 0;
2348 mutex_exit(&so->so_lock);
2349 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2350 dprintso(so, 1,
2351 ("T_DISCON_IND: error %d\n", so->so_error));
2352 freemsg(mp);
2353 /*
2354 * Set these variables for caller to process them.
2355 * For the else part where T_DISCON_IND is processed,
2356 * this will be done in the function being called
2357 * (strsock_discon_ind())
2358 */
2359 *wakeups = WSLEEP;
2360 *allmsgsigs = S_OUTPUT;
2361 *pollwakeups = POLLOUT;
2362 } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2363 /*
2364 * Deferred processing of T_DISCON_IND
2365 */
2366 so_save_discon_ind(so, mp, strsock_discon_ind);
2367 mutex_exit(&so->so_lock);
2368 } else {
2369 /*
2370 * Process T_DISCON_IND now
2371 */
2372 (void) strsock_discon_ind(so, mp);
2373 mutex_exit(&so->so_lock);
2374 }
2375 return (NULL);
2376
2377 case T_UDERROR_IND: {
2378 struct T_uderror_ind *tudi = &tpr->uderror_ind;
2379 void *addr;
2380 t_uscalar_t addrlen;
2381 int error;
2382
2383 dprintso(so, 0,
2384 ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2385
2386 if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2387 zcmn_err(getzoneid(), CE_WARN,
2388 "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2389 (ptrdiff_t)(MBLKL(mp)));
2390 freemsg(mp);
2391 return (NULL);
2392 }
2393 /* Ignore on connection-oriented transports */
2394 if (so->so_mode & SM_CONNREQUIRED) {
2395 freemsg(mp);
2396 eprintsoline(so, 0);
2397 zcmn_err(getzoneid(), CE_WARN,
2398 "sockfs: T_uderror_ind on connection-oriented "
2399 "transport\n");
2400 return (NULL);
2401 }
2402 addrlen = tudi->DEST_length;
2403 addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2404 if (addr == NULL) {
2405 zcmn_err(getzoneid(), CE_WARN,
2406 "sockfs: T_uderror_ind with invalid "
2407 "addrlen/offset %u/%d\n",
2408 addrlen, tudi->DEST_offset);
2409 freemsg(mp);
2410 return (NULL);
2411 }
2412
2413 /* Verify source address for connected socket. */
2414 mutex_enter(&so->so_lock);
2415 if (so->so_state & SS_ISCONNECTED) {
2416 void *faddr;
2417 t_uscalar_t faddr_len;
2418 boolean_t match = B_FALSE;
2419
2420 switch (so->so_family) {
2421 case AF_INET: {
2422 /* Compare just IP address and port */
2423 struct sockaddr_in *sin1, *sin2;
2424
2425 sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
2426 sin2 = (struct sockaddr_in *)addr;
2427 if (addrlen == sizeof (struct sockaddr_in) &&
2428 sin1->sin_port == sin2->sin_port &&
2429 sin1->sin_addr.s_addr ==
2430 sin2->sin_addr.s_addr)
2431 match = B_TRUE;
2432 break;
2433 }
2434 case AF_INET6: {
2435 /* Compare just IP address and port. Not flow */
2436 struct sockaddr_in6 *sin1, *sin2;
2437
2438 sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
2439 sin2 = (struct sockaddr_in6 *)addr;
2440 if (addrlen == sizeof (struct sockaddr_in6) &&
2441 sin1->sin6_port == sin2->sin6_port &&
2442 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2443 &sin2->sin6_addr))
2444 match = B_TRUE;
2445 break;
2446 }
2447 case AF_UNIX:
2448 faddr = &sti->sti_ux_faddr;
2449 faddr_len =
2450 (t_uscalar_t)sizeof (sti->sti_ux_faddr);
2451 if (faddr_len == addrlen &&
2452 bcmp(addr, faddr, addrlen) == 0)
2453 match = B_TRUE;
2454 break;
2455 default:
2456 faddr = sti->sti_faddr_sa;
2457 faddr_len = (t_uscalar_t)sti->sti_faddr_len;
2458 if (faddr_len == addrlen &&
2459 bcmp(addr, faddr, addrlen) == 0)
2460 match = B_TRUE;
2461 break;
2462 }
2463
2464 if (!match) {
2465 #ifdef DEBUG
2466 dprintso(so, 0,
2467 ("sockfs: T_UDERR_IND mismatch: %s - ",
2468 pr_addr(so->so_family,
2469 (struct sockaddr *)addr, addrlen)));
2470 dprintso(so, 0, ("%s\n",
2471 pr_addr(so->so_family, sti->sti_faddr_sa,
2472 sti->sti_faddr_len)));
2473 #endif /* DEBUG */
2474 mutex_exit(&so->so_lock);
2475 freemsg(mp);
2476 return (NULL);
2477 }
2478 /*
2479 * Make the write error nonpersistent. If the error
2480 * is zero we use ECONNRESET.
2481 * This assumes that the name space for ERROR_type
2482 * is the errno name space.
2483 */
2484 if (tudi->ERROR_type != 0)
2485 error = tudi->ERROR_type;
2486 else
2487 error = ECONNRESET;
2488
2489 soseterror(so, error);
2490 mutex_exit(&so->so_lock);
2491 strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2492 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2493 *wakeups = RSLEEP | WSLEEP;
2494 *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2495 *pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2496 freemsg(mp);
2497 return (NULL);
2498 }
2499 /*
2500 * If the application asked for delayed errors
2501 * record the T_UDERROR_IND sti_eaddr_mp and the reason in
2502 * sti_delayed_error for delayed error posting. If the reason
2503 * is zero use ECONNRESET.
2504 * Note that delayed error indications do not make sense for
2505 * AF_UNIX sockets since sendto checks that the destination
2506 * address is valid at the time of the sendto.
2507 */
2508 if (!(so->so_options & SO_DGRAM_ERRIND)) {
2509 mutex_exit(&so->so_lock);
2510 freemsg(mp);
2511 return (NULL);
2512 }
2513 if (sti->sti_eaddr_mp != NULL)
2514 freemsg(sti->sti_eaddr_mp);
2515
2516 sti->sti_eaddr_mp = mp;
2517 if (tudi->ERROR_type != 0)
2518 error = tudi->ERROR_type;
2519 else
2520 error = ECONNRESET;
2521 sti->sti_delayed_error = (ushort_t)error;
2522 mutex_exit(&so->so_lock);
2523 return (NULL);
2524 }
2525
2526 case T_ERROR_ACK:
2527 dprintso(so, 0,
2528 ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2529 tpr->error_ack.ERROR_prim,
2530 tpr->error_ack.TLI_error,
2531 tpr->error_ack.UNIX_error));
2532
2533 if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2534 zcmn_err(getzoneid(), CE_WARN,
2535 "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2536 (ptrdiff_t)(MBLKL(mp)));
2537 freemsg(mp);
2538 return (NULL);
2539 }
2540 /*
2541 * Check if we were waiting for the async message
2542 */
2543 mutex_enter(&so->so_lock);
2544 if ((so->so_flag & SOASYNC_UNBIND) &&
2545 tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2546 so_unlock_single(so, SOASYNC_UNBIND);
2547 mutex_exit(&so->so_lock);
2548 freemsg(mp);
2549 return (NULL);
2550 }
2551 mutex_exit(&so->so_lock);
2552 soqueueack(so, mp);
2553 return (NULL);
2554
2555 case T_OK_ACK:
2556 if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2557 zcmn_err(getzoneid(), CE_WARN,
2558 "sockfs: Too short T_OK_ACK. Len = %ld\n",
2559 (ptrdiff_t)(MBLKL(mp)));
2560 freemsg(mp);
2561 return (NULL);
2562 }
2563 /*
2564 * Check if we were waiting for the async message
2565 */
2566 mutex_enter(&so->so_lock);
2567 if ((so->so_flag & SOASYNC_UNBIND) &&
2568 tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2569 dprintso(so, 1,
2570 ("strsock_proto: T_OK_ACK async unbind\n"));
2571 so_unlock_single(so, SOASYNC_UNBIND);
2572 mutex_exit(&so->so_lock);
2573 freemsg(mp);
2574 return (NULL);
2575 }
2576 mutex_exit(&so->so_lock);
2577 soqueueack(so, mp);
2578 return (NULL);
2579
2580 case T_INFO_ACK:
2581 if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2582 zcmn_err(getzoneid(), CE_WARN,
2583 "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2584 (ptrdiff_t)(MBLKL(mp)));
2585 freemsg(mp);
2586 return (NULL);
2587 }
2588 soqueueack(so, mp);
2589 return (NULL);
2590
2591 case T_CAPABILITY_ACK:
2592 /*
2593 * A T_capability_ack need only be large enough to hold
2594 * the PRIM_type and CAP_bits1 fields; checking for anything
2595 * larger might reject a correct response from an older
2596 * provider.
2597 */
2598 if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2599 zcmn_err(getzoneid(), CE_WARN,
2600 "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2601 (ptrdiff_t)(MBLKL(mp)));
2602 freemsg(mp);
2603 return (NULL);
2604 }
2605 soqueueack(so, mp);
2606 return (NULL);
2607
2608 case T_BIND_ACK:
2609 if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2610 zcmn_err(getzoneid(), CE_WARN,
2611 "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2612 (ptrdiff_t)(MBLKL(mp)));
2613 freemsg(mp);
2614 return (NULL);
2615 }
2616 soqueueack(so, mp);
2617 return (NULL);
2618
2619 case T_OPTMGMT_ACK:
2620 if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2621 zcmn_err(getzoneid(), CE_WARN,
2622 "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2623 (ptrdiff_t)(MBLKL(mp)));
2624 freemsg(mp);
2625 return (NULL);
2626 }
2627 soqueueack(so, mp);
2628 return (NULL);
2629 default:
2630 #ifdef DEBUG
2631 zcmn_err(getzoneid(), CE_WARN,
2632 "sockfs: unknown TPI primitive %d received\n",
2633 tpr->type);
2634 #endif /* DEBUG */
2635 freemsg(mp);
2636 return (NULL);
2637 }
2638 }
2639
2640 /*
2641 * This routine is registered with the stream head to receive other
2642 * (non-data, and non-proto) messages.
2643 *
2644 * Returns NULL if the message was consumed.
2645 * Returns an mblk to make that mblk be processed by the stream head.
2646 *
2647 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2648 * *pollwakeups) for the stream head to take action on.
2649 */
2650 static mblk_t *
strsock_misc(vnode_t * vp,mblk_t * mp,strwakeup_t * wakeups,strsigset_t * firstmsgsigs,strsigset_t * allmsgsigs,strpollset_t * pollwakeups)2651 strsock_misc(vnode_t *vp, mblk_t *mp,
2652 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2653 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2654 {
2655 struct sonode *so;
2656 sotpi_info_t *sti;
2657
2658 so = VTOSO(vp);
2659 sti = SOTOTPI(so);
2660
2661 dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2662 (void *)vp, (void *)mp, DB_TYPE(mp)));
2663
2664 /* Set default return values */
2665 *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2666
2667 switch (DB_TYPE(mp)) {
2668 case M_PCSIG:
2669 /*
2670 * This assumes that an M_PCSIG for the urgent data arrives
2671 * before the corresponding T_EXDATA_IND.
2672 *
2673 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2674 * awoken before the urgent data shows up.
2675 * For OOBINLINE this can result in select returning
2676 * only exceptions as opposed to except|read.
2677 */
2678 if (*mp->b_rptr == SIGURG) {
2679 mutex_enter(&so->so_lock);
2680 dprintso(so, 1,
2681 ("SIGURG(%p): counts %d/%d state %s\n",
2682 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2683 pr_state(so->so_state, so->so_mode)));
2684 so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2685 dprintso(so, 1,
2686 ("after SIGURG(%p): counts %d/%d "
2687 " poll 0x%x sig 0x%x state %s\n",
2688 (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2689 *pollwakeups, *allmsgsigs,
2690 pr_state(so->so_state, so->so_mode)));
2691 mutex_exit(&so->so_lock);
2692 }
2693 freemsg(mp);
2694 return (NULL);
2695
2696 case M_SIG:
2697 case M_HANGUP:
2698 case M_UNHANGUP:
2699 case M_ERROR:
2700 /* M_ERRORs etc are ignored */
2701 freemsg(mp);
2702 return (NULL);
2703
2704 case M_FLUSH:
2705 /*
2706 * Do not flush read queue. If the M_FLUSH
2707 * arrives because of an impending T_discon_ind
2708 * we still have to keep any queued data - this is part of
2709 * socket semantics.
2710 */
2711 if (*mp->b_rptr & FLUSHW) {
2712 *mp->b_rptr &= ~FLUSHR;
2713 return (mp);
2714 }
2715 freemsg(mp);
2716 return (NULL);
2717
2718 default:
2719 return (mp);
2720 }
2721 }
2722
2723
2724 /* Register to receive signals for certain events */
2725 int
so_set_asyncsigs(vnode_t * vp,pid_t pgrp,int events,int mode,cred_t * cr)2726 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2727 {
2728 struct strsigset ss;
2729 int32_t rval;
2730
2731 /*
2732 * Note that SOLOCKED will be set except for the call from soaccept().
2733 */
2734 ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2735 ss.ss_pid = pgrp;
2736 ss.ss_events = events;
2737 return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2738 &rval));
2739 }
2740
2741
2742 /* Register for events matching the SS_ASYNC flag */
2743 int
so_set_events(struct sonode * so,vnode_t * vp,cred_t * cr)2744 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2745 {
2746 int events = so->so_state & SS_ASYNC ?
2747 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2748 S_RDBAND | S_BANDURG;
2749
2750 return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2751 }
2752
2753
2754 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2755 int
so_flip_async(struct sonode * so,vnode_t * vp,int mode,cred_t * cr)2756 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2757 {
2758 ASSERT(mutex_owned(&so->so_lock));
2759 if (so->so_pgrp != 0) {
2760 int error;
2761 int events = so->so_state & SS_ASYNC ? /* Old flag */
2762 S_RDBAND | S_BANDURG : /* New sigs */
2763 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2764
2765 so_lock_single(so);
2766 mutex_exit(&so->so_lock);
2767
2768 error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2769
2770 mutex_enter(&so->so_lock);
2771 so_unlock_single(so, SOLOCKED);
2772 if (error)
2773 return (error);
2774 }
2775 so->so_state ^= SS_ASYNC;
2776 return (0);
2777 }
2778
2779 /*
2780 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2781 * any existing one. If passed zero, just clear the existing one.
2782 */
2783 int
so_set_siggrp(struct sonode * so,vnode_t * vp,pid_t pgrp,int mode,cred_t * cr)2784 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2785 {
2786 int events = so->so_state & SS_ASYNC ?
2787 S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2788 S_RDBAND | S_BANDURG;
2789 int error;
2790
2791 ASSERT(mutex_owned(&so->so_lock));
2792
2793 /*
2794 * Change socket process (group).
2795 *
2796 * strioctl (via so_set_asyncsigs) will perform permission check and
2797 * also keep a PID_HOLD to prevent the pid from being reused.
2798 */
2799 so_lock_single(so);
2800 mutex_exit(&so->so_lock);
2801
2802 if (pgrp != 0) {
2803 dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2804 pgrp, events));
2805 error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2806 if (error != 0) {
2807 eprintsoline(so, error);
2808 goto bad;
2809 }
2810 }
2811 /* Remove the previously registered process/group */
2812 if (so->so_pgrp != 0) {
2813 dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2814 error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2815 if (error != 0) {
2816 eprintsoline(so, error);
2817 error = 0;
2818 }
2819 }
2820 mutex_enter(&so->so_lock);
2821 so_unlock_single(so, SOLOCKED);
2822 so->so_pgrp = pgrp;
2823 return (0);
2824 bad:
2825 mutex_enter(&so->so_lock);
2826 so_unlock_single(so, SOLOCKED);
2827 return (error);
2828 }
2829
2830 /*
2831 * Wrapper for getmsg. If the socket has been converted to a stream
2832 * pass the request to the stream head.
2833 */
2834 int
sock_getmsg(struct vnode * vp,struct strbuf * mctl,struct strbuf * mdata,uchar_t * prip,int * flagsp,int fmode,rval_t * rvp)2835 sock_getmsg(
2836 struct vnode *vp,
2837 struct strbuf *mctl,
2838 struct strbuf *mdata,
2839 uchar_t *prip,
2840 int *flagsp,
2841 int fmode,
2842 rval_t *rvp
2843 )
2844 {
2845 struct sonode *so;
2846
2847 ASSERT(vp->v_type == VSOCK);
2848 /*
2849 * Use the stream head to find the real socket vnode.
2850 * This is needed when namefs sits above sockfs. Some
2851 * sockets (like SCTP) are not streams.
2852 */
2853 if (!vp->v_stream) {
2854 return (ENOSTR);
2855 }
2856 ASSERT(vp->v_stream->sd_vnode);
2857 vp = vp->v_stream->sd_vnode;
2858 ASSERT(vn_matchops(vp, socket_vnodeops));
2859 so = VTOSO(vp);
2860
2861 dprintso(so, 1, ("sock_getmsg(%p) %s\n",
2862 (void *)so, pr_state(so->so_state, so->so_mode)));
2863
2864 if (so->so_version == SOV_STREAM) {
2865 /* The imaginary "sockmod" has been popped - act as a stream */
2866 return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
2867 }
2868 eprintsoline(so, ENOSTR);
2869 return (ENOSTR);
2870 }
2871
2872 /*
2873 * Wrapper for putmsg. If the socket has been converted to a stream
2874 * pass the request to the stream head.
2875 *
2876 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
2877 * streams ioctl set it does not support putmsg and getmsg.
2878 * Allowing putmsg would prevent sockfs from tracking the state of
2879 * the socket/transport and would also invalidate the locking in sockfs.
2880 */
2881 int
sock_putmsg(struct vnode * vp,struct strbuf * mctl,struct strbuf * mdata,uchar_t pri,int flag,int fmode)2882 sock_putmsg(
2883 struct vnode *vp,
2884 struct strbuf *mctl,
2885 struct strbuf *mdata,
2886 uchar_t pri,
2887 int flag,
2888 int fmode
2889 )
2890 {
2891 struct sonode *so;
2892
2893 ASSERT(vp->v_type == VSOCK);
2894 /*
2895 * Use the stream head to find the real socket vnode.
2896 * This is needed when namefs sits above sockfs.
2897 */
2898 if (!vp->v_stream) {
2899 return (ENOSTR);
2900 }
2901 ASSERT(vp->v_stream->sd_vnode);
2902 vp = vp->v_stream->sd_vnode;
2903 ASSERT(vn_matchops(vp, socket_vnodeops));
2904 so = VTOSO(vp);
2905
2906 dprintso(so, 1, ("sock_putmsg(%p) %s\n",
2907 (void *)so, pr_state(so->so_state, so->so_mode)));
2908
2909 if (so->so_version == SOV_STREAM) {
2910 /* The imaginary "sockmod" has been popped - act as a stream */
2911 return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
2912 }
2913 eprintsoline(so, ENOSTR);
2914 return (ENOSTR);
2915 }
2916
2917 /*
2918 * Special function called only from f_getfl().
2919 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
2920 * No locks are acquired here, so it is safe to use while uf_lock is held.
2921 * This exists solely for BSD fcntl() FASYNC compatibility.
2922 */
2923 int
sock_getfasync(vnode_t * vp)2924 sock_getfasync(vnode_t *vp)
2925 {
2926 struct sonode *so;
2927
2928 ASSERT(vp->v_type == VSOCK);
2929 /*
2930 * For stream model, v_stream is used; For non-stream, v_stream always
2931 * equals NULL
2932 */
2933 if (vp->v_stream != NULL)
2934 so = VTOSO(vp->v_stream->sd_vnode);
2935 else
2936 so = VTOSO(vp);
2937
2938 if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
2939 return (0);
2940
2941 return (FASYNC);
2942 }
2943