1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27 * Copyright 2022 Garrett D'Amore
28 * Copyright 2024 Oxide Computer Company
29 */
30
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/buf.h>
36 #include <sys/conf.h>
37 #include <sys/cred.h>
38 #include <sys/kmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/vnode.h>
43 #include <sys/debug.h>
44 #include <sys/errno.h>
45 #include <sys/time.h>
46 #include <sys/file.h>
47 #include <sys/open.h>
48 #include <sys/user.h>
49 #include <sys/termios.h>
50 #include <sys/stream.h>
51 #include <sys/strsubr.h>
52 #include <sys/strsun.h>
53 #include <sys/esunddi.h>
54 #include <sys/flock.h>
55 #include <sys/modctl.h>
56 #include <sys/cmn_err.h>
57 #include <sys/mkdev.h>
58 #include <sys/pathname.h>
59 #include <sys/ddi.h>
60 #include <sys/stat.h>
61 #include <sys/fs/snode.h>
62 #include <sys/fs/dv_node.h>
63 #include <fs/fs_subr.h>
64 #include <sys/zone.h>
65
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
68 #include <netinet/in.h>
69 #include <sys/un.h>
70 #include <sys/ucred.h>
71
72 #include <sys/tiuser.h>
73 #define _SUN_TPI_VERSION 2
74 #include <sys/tihdr.h>
75
76 #include <c2/audit.h>
77
78 #include <fs/sockfs/sockcommon.h>
79 #include <fs/sockfs/sockfilter_impl.h>
80 #include <fs/sockfs/socktpi.h>
81 #include <fs/sockfs/socktpi_impl.h>
82 #include <fs/sockfs/sodirect.h>
83
84 /*
85 * Macros that operate on struct cmsghdr.
86 * The CMSG_VALID macro does not assume that the last option buffer is padded.
87 */
88 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
89 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
90 #define CMSG_VALID(cmsg, start, end) \
91 (ISALIGNED_cmsghdr(cmsg) && \
92 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
93 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
94 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
95 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
96 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
97
98 dev_t sockdev; /* For fsid in getattr */
99
100 struct socklist socklist;
101
102 struct kmem_cache *socket_cache;
103
104 /*
105 * This is a global vfs_t that we have to maintain as the solitary vfs_t that is
106 * used across all sockfs vnodes. This ensures that we have a reasonable vfs_t
107 * present that points to our ops vectors.
108 */
109 vfs_t *sock_vfsp;
110 static struct vfsops *sockfs_vfsops;
111
112 /*
113 * sockconf_lock protects the socket configuration (socket types and
114 * socket filters) which is changed via the sockconfig system call.
115 */
116 krwlock_t sockconf_lock;
117
118 static int sockfs_update(kstat_t *, int);
119 static int sockfs_snapshot(kstat_t *, void *, int);
120 extern smod_info_t *sotpi_smod_create(void);
121
122 extern void sendfile_init();
123
124 extern int modrootloaded;
125
126 /*
127 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
128 * Returns with the vnode held.
129 */
130 int
sogetvp(char * devpath,vnode_t ** vpp,int uioflag)131 sogetvp(char *devpath, vnode_t **vpp, int uioflag)
132 {
133 struct snode *csp;
134 vnode_t *vp, *dvp;
135 major_t maj;
136 int error;
137
138 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
139
140 /*
141 * Lookup the underlying filesystem vnode.
142 */
143 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
144 if (error)
145 return (error);
146
147 /* Check that it is the correct vnode */
148 if (vp->v_type != VCHR) {
149 VN_RELE(vp);
150 return (ENOTSOCK);
151 }
152
153 /*
154 * If devpath went through devfs, the device should already
155 * be configured. If devpath is a mknod file, however, we
156 * need to make sure the device is properly configured.
157 * To do this, we do something similar to spec_open()
158 * except that we resolve to the minor/leaf level since
159 * we need to return a vnode.
160 */
161 csp = VTOS(VTOS(vp)->s_commonvp);
162 if (!(csp->s_flag & SDIPSET)) {
163 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
164 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
165 if (error == 0)
166 error = devfs_lookupname(pathname, NULLVPP, &dvp);
167 VN_RELE(vp);
168 kmem_free(pathname, MAXPATHLEN);
169 if (error != 0)
170 return (ENXIO);
171 vp = dvp; /* use the devfs vp */
172 }
173
174 /* device is configured at this point */
175 maj = getmajor(vp->v_rdev);
176 if (!STREAMSTAB(maj)) {
177 VN_RELE(vp);
178 return (ENOSTR);
179 }
180
181 *vpp = vp;
182 return (0);
183 }
184
185 /*
186 * Update the accessed, updated, or changed times in an sonode
187 * with the current time.
188 *
189 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
190 * attributes in a fstat call. (They return the current time and 0 for
191 * all timestamps, respectively.) We maintain the current timestamps
192 * here primarily so that should sockmod be popped the resulting
193 * file descriptor will behave like a stream w.r.t. the timestamps.
194 */
195 void
so_update_attrs(struct sonode * so,int flag)196 so_update_attrs(struct sonode *so, int flag)
197 {
198 time_t now = gethrestime_sec();
199
200 if (SOCK_IS_NONSTR(so))
201 return;
202
203 mutex_enter(&so->so_lock);
204 so->so_flag |= flag;
205 if (flag & SOACC)
206 SOTOTPI(so)->sti_atime = now;
207 if (flag & SOMOD)
208 SOTOTPI(so)->sti_mtime = now;
209 mutex_exit(&so->so_lock);
210 }
211
212 extern so_create_func_t sock_comm_create_function;
213 extern so_destroy_func_t sock_comm_destroy_function;
214
215 /*
216 * Init function called when sockfs is loaded.
217 */
218 int
sockinit(int fstype,char * name)219 sockinit(int fstype, char *name)
220 {
221 static const fs_operation_def_t sock_vfsops_template[] = {
222 { VFSNAME_STATVFS, { .vfs_statvfs = sockfs_statvfs } },
223 { NULL, NULL }
224 };
225 int error;
226 major_t dev;
227 char *err_str;
228
229 error = vfs_setfsops(fstype, sock_vfsops_template, &sockfs_vfsops);
230 if (error != 0) {
231 zcmn_err(GLOBAL_ZONEID, CE_WARN,
232 "sockinit: bad vfs ops template");
233 return (error);
234 }
235
236 error = vn_make_ops(name, socket_vnodeops_template,
237 &socket_vnodeops);
238 if (error != 0) {
239 err_str = "sockinit: bad socket vnode ops template";
240 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
241 socket_vnodeops = NULL;
242 goto failure;
243 }
244
245 socket_cache = kmem_cache_create("socket_cache",
246 sizeof (struct sonode), 0, sonode_constructor,
247 sonode_destructor, NULL, NULL, NULL, 0);
248
249 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
250
251 error = socktpi_init();
252 if (error != 0) {
253 err_str = NULL;
254 goto failure;
255 }
256
257 error = sod_init();
258 if (error != 0) {
259 err_str = NULL;
260 goto failure;
261 }
262
263 /*
264 * Set up the default create and destroy functions
265 */
266 sock_comm_create_function = socket_sonode_create;
267 sock_comm_destroy_function = socket_sonode_destroy;
268
269 /*
270 * Build initial list mapping socket parameters to vnode.
271 */
272 smod_init();
273 smod_add(sotpi_smod_create());
274
275 sockparams_init();
276
277 /*
278 * If sockets are needed before init runs /sbin/soconfig
279 * it is possible to preload the sockparams list here using
280 * calls like:
281 * sockconfig(1,2,3, "/dev/tcp", 0);
282 */
283
284 /*
285 * Create a unique dev_t for use in so_fsid.
286 */
287
288 if ((dev = getudev()) == (major_t)-1)
289 dev = 0;
290 sockdev = makedevice(dev, 0);
291
292 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
293 sendfile_init();
294
295 /* Initialize socket filters */
296 sof_init();
297
298 sock_vfsp = fs_vfsp_global(sockfs_vfsops, sockdev, fstype,
299 PAGESIZE);
300
301 return (0);
302
303 failure:
304 (void) vfs_freevfsops_by_type(fstype);
305 if (socket_vnodeops != NULL)
306 vn_freevnodeops(socket_vnodeops);
307 if (err_str != NULL)
308 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
309 return (error);
310 }
311
312 /*
313 * Caller must hold the mutex. Used to set SOLOCKED.
314 */
315 void
so_lock_single(struct sonode * so)316 so_lock_single(struct sonode *so)
317 {
318 ASSERT(MUTEX_HELD(&so->so_lock));
319
320 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
321 cv_wait_stop(&so->so_single_cv, &so->so_lock,
322 SO_LOCK_WAKEUP_TIME);
323 }
324 so->so_flag |= SOLOCKED;
325 }
326
327 /*
328 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
329 * Used to clear SOLOCKED or SOASYNC_UNBIND.
330 */
331 void
so_unlock_single(struct sonode * so,int flag)332 so_unlock_single(struct sonode *so, int flag)
333 {
334 ASSERT(MUTEX_HELD(&so->so_lock));
335 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
336 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
337 ASSERT(so->so_flag & flag);
338 /*
339 * Process the T_DISCON_IND on sti_discon_ind_mp.
340 *
341 * Call to so_drain_discon_ind will result in so_lock
342 * being dropped and re-acquired later.
343 */
344 if (!SOCK_IS_NONSTR(so)) {
345 sotpi_info_t *sti = SOTOTPI(so);
346
347 if (sti->sti_discon_ind_mp != NULL)
348 so_drain_discon_ind(so);
349 }
350
351 cv_signal(&so->so_single_cv);
352 so->so_flag &= ~flag;
353 }
354
355 /*
356 * Caller must hold the mutex. Used to set SOREADLOCKED.
357 * If the caller wants nonblocking behavior it should set fmode.
358 */
359 int
so_lock_read(struct sonode * so,int fmode)360 so_lock_read(struct sonode *so, int fmode)
361 {
362 ASSERT(MUTEX_HELD(&so->so_lock));
363
364 while (so->so_flag & SOREADLOCKED) {
365 if (fmode & (FNDELAY|FNONBLOCK))
366 return (EWOULDBLOCK);
367 cv_wait_stop(&so->so_read_cv, &so->so_lock,
368 SO_LOCK_WAKEUP_TIME);
369 }
370 so->so_flag |= SOREADLOCKED;
371 return (0);
372 }
373
374 /*
375 * Like so_lock_read above but allows signals.
376 */
377 int
so_lock_read_intr(struct sonode * so,int fmode)378 so_lock_read_intr(struct sonode *so, int fmode)
379 {
380 ASSERT(MUTEX_HELD(&so->so_lock));
381
382 while (so->so_flag & SOREADLOCKED) {
383 if (fmode & (FNDELAY|FNONBLOCK))
384 return (EWOULDBLOCK);
385 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
386 return (EINTR);
387 }
388 so->so_flag |= SOREADLOCKED;
389 return (0);
390 }
391
392 /*
393 * Caller must hold the mutex. Used to clear SOREADLOCKED,
394 * set in so_lock_read() or so_lock_read_intr().
395 */
396 void
so_unlock_read(struct sonode * so)397 so_unlock_read(struct sonode *so)
398 {
399 ASSERT(MUTEX_HELD(&so->so_lock));
400 ASSERT(so->so_flag & SOREADLOCKED);
401
402 cv_signal(&so->so_read_cv);
403 so->so_flag &= ~SOREADLOCKED;
404 }
405
406 /*
407 * Verify that the specified offset falls within the mblk and
408 * that the resulting pointer is aligned.
409 * Returns NULL if not.
410 */
411 void *
sogetoff(mblk_t * mp,t_uscalar_t offset,t_uscalar_t length,uint_t align_size)412 sogetoff(mblk_t *mp, t_uscalar_t offset,
413 t_uscalar_t length, uint_t align_size)
414 {
415 uintptr_t ptr1, ptr2;
416
417 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
418 ptr1 = (uintptr_t)mp->b_rptr + offset;
419 ptr2 = (uintptr_t)ptr1 + length;
420 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
421 eprintline(0);
422 return (NULL);
423 }
424 if ((ptr1 & (align_size - 1)) != 0) {
425 eprintline(0);
426 return (NULL);
427 }
428 return ((void *)ptr1);
429 }
430
431 /*
432 * Return the AF_UNIX underlying filesystem vnode matching a given name.
433 * Makes sure the sending and the destination sonodes are compatible.
434 * The vnode is returned held.
435 *
436 * The underlying filesystem VSOCK vnode has a v_stream pointer that
437 * references the actual stream head (hence indirectly the actual sonode).
438 */
439 static int
so_ux_lookup(struct sonode * so,struct sockaddr_un * soun,int checkaccess,vnode_t ** vpp)440 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
441 vnode_t **vpp)
442 {
443 vnode_t *vp; /* Underlying filesystem vnode */
444 vnode_t *rvp; /* real vnode */
445 vnode_t *svp; /* sockfs vnode */
446 struct sonode *so2;
447 int error;
448
449 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
450 soun->sun_path));
451
452 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
453 if (error) {
454 eprintsoline(so, error);
455 return (error);
456 }
457
458 /*
459 * Traverse lofs mounts get the real vnode
460 */
461 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
462 VN_HOLD(rvp); /* hold the real vnode */
463 VN_RELE(vp); /* release hold from lookup */
464 vp = rvp;
465 }
466
467 if (vp->v_type != VSOCK) {
468 error = ENOTSOCK;
469 eprintsoline(so, error);
470 goto done2;
471 }
472
473 if (checkaccess) {
474 /*
475 * Check that we have permissions to access the destination
476 * vnode. This check is not done in BSD but it is required
477 * by X/Open.
478 */
479 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL);
480 if (error != 0) {
481 eprintsoline(so, error);
482 goto done2;
483 }
484 }
485
486 /*
487 * Check if the remote socket has been closed.
488 *
489 * Synchronize with vn_rele_stream by holding v_lock while traversing
490 * v_stream->sd_vnode.
491 */
492 mutex_enter(&vp->v_lock);
493 if (vp->v_stream == NULL) {
494 mutex_exit(&vp->v_lock);
495 if (so->so_type == SOCK_DGRAM)
496 error = EDESTADDRREQ;
497 else
498 error = ECONNREFUSED;
499
500 eprintsoline(so, error);
501 goto done2;
502 }
503 ASSERT(vp->v_stream->sd_vnode);
504 svp = vp->v_stream->sd_vnode;
505 /*
506 * holding v_lock on underlying filesystem vnode and acquiring
507 * it on sockfs vnode. Assumes that no code ever attempts to
508 * acquire these locks in the reverse order.
509 */
510 VN_HOLD(svp);
511 mutex_exit(&vp->v_lock);
512
513 if (svp->v_type != VSOCK) {
514 error = ENOTSOCK;
515 eprintsoline(so, error);
516 goto done;
517 }
518
519 so2 = VTOSO(svp);
520
521 if (so->so_type != so2->so_type) {
522 error = EPROTOTYPE;
523 eprintsoline(so, error);
524 goto done;
525 }
526
527 VN_RELE(svp);
528 *vpp = vp;
529 return (0);
530
531 done:
532 VN_RELE(svp);
533 done2:
534 VN_RELE(vp);
535 return (error);
536 }
537
538 /*
539 * Verify peer address for connect and sendto/sendmsg.
540 * Since sendto/sendmsg would not get synchronous errors from the transport
541 * provider we have to do these ugly checks in the socket layer to
542 * preserve compatibility with SunOS 4.X.
543 */
544 int
so_addr_verify(struct sonode * so,const struct sockaddr * name,socklen_t namelen)545 so_addr_verify(struct sonode *so, const struct sockaddr *name,
546 socklen_t namelen)
547 {
548 int family;
549
550 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
551 (void *)so, (void *)name, namelen));
552
553 ASSERT(name != NULL);
554
555 family = so->so_family;
556 switch (family) {
557 case AF_INET:
558 if (name->sa_family != family) {
559 eprintsoline(so, EAFNOSUPPORT);
560 return (EAFNOSUPPORT);
561 }
562 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
563 eprintsoline(so, EINVAL);
564 return (EINVAL);
565 }
566 break;
567 case AF_INET6: {
568 #ifdef DEBUG
569 struct sockaddr_in6 *sin6;
570 #endif /* DEBUG */
571
572 if (name->sa_family != family) {
573 eprintsoline(so, EAFNOSUPPORT);
574 return (EAFNOSUPPORT);
575 }
576 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
577 eprintsoline(so, EINVAL);
578 return (EINVAL);
579 }
580 #ifdef DEBUG
581 /* Verify that apps don't forget to clear sin6_scope_id etc */
582 sin6 = (struct sockaddr_in6 *)name;
583 if (sin6->sin6_scope_id != 0 &&
584 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
585 zcmn_err(getzoneid(), CE_WARN,
586 "connect/send* with uninitialized sin6_scope_id "
587 "(%d) on socket. Pid = %d\n",
588 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
589 }
590 #endif /* DEBUG */
591 break;
592 }
593 case AF_UNIX:
594 if (SOTOTPI(so)->sti_faddr_noxlate) {
595 return (0);
596 }
597 if (namelen < (socklen_t)sizeof (short)) {
598 eprintsoline(so, ENOENT);
599 return (ENOENT);
600 }
601 if (name->sa_family != family) {
602 eprintsoline(so, EAFNOSUPPORT);
603 return (EAFNOSUPPORT);
604 }
605 /* MAXPATHLEN + soun_family + nul termination */
606 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
607 eprintsoline(so, ENAMETOOLONG);
608 return (ENAMETOOLONG);
609 }
610
611 break;
612
613 default:
614 /*
615 * Default is don't do any length or sa_family check
616 * to allow non-sockaddr style addresses.
617 */
618 break;
619 }
620
621 return (0);
622 }
623
624
625 /*
626 * Translate an AF_UNIX sockaddr_un to the transport internal name.
627 * Assumes caller has called so_addr_verify first. The translated
628 * (internal form) address is stored in sti->sti_ux_taddr.
629 */
630 /*ARGSUSED*/
631 int
so_ux_addr_xlate(struct sonode * so,struct sockaddr * name,socklen_t namelen,int checkaccess,void ** addrp,socklen_t * addrlenp)632 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
633 socklen_t namelen, int checkaccess,
634 void **addrp, socklen_t *addrlenp)
635 {
636 int error;
637 struct sockaddr_un *soun;
638 vnode_t *vp;
639 void *addr;
640 socklen_t addrlen;
641 sotpi_info_t *sti = SOTOTPI(so);
642
643 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
644 (void *)so, (void *)name, namelen, checkaccess));
645
646 ASSERT(name != NULL);
647 ASSERT(so->so_family == AF_UNIX);
648 ASSERT(!sti->sti_faddr_noxlate);
649 ASSERT(namelen >= (socklen_t)sizeof (short));
650 ASSERT(name->sa_family == AF_UNIX);
651 soun = (struct sockaddr_un *)name;
652 /*
653 * Lookup vnode for the specified path name and verify that
654 * it is a socket.
655 */
656 error = so_ux_lookup(so, soun, checkaccess, &vp);
657 if (error) {
658 eprintsoline(so, error);
659 return (error);
660 }
661 /*
662 * Use the address of the peer vnode as the address to send
663 * to. We release the peer vnode here. In case it has been
664 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
665 * transport the message will get an error or be dropped.
666 * Note that that soua_vp is never dereferenced; it's just a
667 * convenient value by which we can identify the peer.
668 */
669 sti->sti_ux_taddr.soua_vp = vp;
670 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT;
671 addr = &sti->sti_ux_taddr;
672 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr);
673 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
674 addrlen, (void *)vp));
675 VN_RELE(vp);
676 *addrp = addr;
677 *addrlenp = (socklen_t)addrlen;
678 return (0);
679 }
680
681 /*
682 * Esballoc free function for messages that contain SO_FILEP option.
683 * Decrement the reference count on the file pointers using closef.
684 */
685 void
fdbuf_free(struct fdbuf * fdbuf)686 fdbuf_free(struct fdbuf *fdbuf)
687 {
688 int i;
689 struct file *fp;
690
691 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
692 for (i = 0; i < fdbuf->fd_numfd; i++) {
693 /*
694 * We need pointer size alignment for fd_fds. On a LP64
695 * kernel, the required alignment is 8 bytes while
696 * the option headers and values are only 4 bytes
697 * aligned. So its safer to do a bcopy compared to
698 * assigning fdbuf->fd_fds[i] to fp.
699 */
700 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
701 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
702 (void) closef(fp);
703 }
704 if (fdbuf->fd_ebuf != NULL)
705 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
706 kmem_free(fdbuf, fdbuf->fd_size);
707 }
708
709 /*
710 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
711 * Waits if memory is not available.
712 */
713 mblk_t *
fdbuf_allocmsg(int size,struct fdbuf * fdbuf)714 fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
715 {
716 uchar_t *buf;
717 mblk_t *mp;
718
719 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
720 buf = kmem_alloc(size, KM_SLEEP);
721 fdbuf->fd_ebuf = (caddr_t)buf;
722 fdbuf->fd_ebuflen = size;
723 fdbuf->fd_frtn.free_func = fdbuf_free;
724 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
725
726 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
727 mp->b_datap->db_type = M_PROTO;
728 return (mp);
729 }
730
731 /*
732 * Extract file descriptors from a fdbuf.
733 * Return list in rights/rightslen.
734 */
735 /*ARGSUSED*/
736 static int
fdbuf_extract(struct fdbuf * fdbuf,void * rights,int rightslen,int msg_flags)737 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen, int msg_flags)
738 {
739 int i, fd;
740 int *rp;
741 struct file *fp;
742 int numfd;
743
744 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
745 fdbuf->fd_numfd, rightslen));
746
747 numfd = fdbuf->fd_numfd;
748 ASSERT(rightslen == numfd * (int)sizeof (int));
749
750 /*
751 * Allocate a file descriptor and increment the f_count.
752 * The latter is needed since we always call fdbuf_free
753 * which performs a closef.
754 */
755 rp = (int *)rights;
756 for (i = 0; i < numfd; i++) {
757 if ((fd = ufalloc(0)) == -1)
758 goto cleanup;
759 /*
760 * We need pointer size alignment for fd_fds. On a LP64
761 * kernel, the required alignment is 8 bytes while
762 * the option headers and values are only 4 bytes
763 * aligned. So its safer to do a bcopy compared to
764 * assigning fdbuf->fd_fds[i] to fp.
765 */
766 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
767 mutex_enter(&fp->f_tlock);
768 fp->f_count++;
769 mutex_exit(&fp->f_tlock);
770 setf(fd, fp);
771 if ((msg_flags & MSG_CMSG_CLOEXEC) != 0) {
772 f_setfd_or(fd, FD_CLOEXEC);
773 }
774 if ((msg_flags & MSG_CMSG_CLOFORK) != 0) {
775 f_setfd_or(fd, FD_CLOFORK);
776 }
777 *rp++ = fd;
778 if (AU_AUDITING())
779 audit_fdrecv(fd, fp);
780 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
781 i, fd, (void *)fp, fp->f_count));
782 }
783 return (0);
784
785 cleanup:
786 /*
787 * Undo whatever partial work the loop above has done.
788 */
789 {
790 int j;
791
792 rp = (int *)rights;
793 for (j = 0; j < i; j++) {
794 dprint(0,
795 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
796 (void) closeandsetf(*rp++, NULL);
797 }
798 }
799
800 return (EMFILE);
801 }
802
803 /*
804 * Insert file descriptors into an fdbuf.
805 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
806 * by calling fdbuf_free().
807 */
808 int
fdbuf_create(void * rights,int rightslen,struct fdbuf ** fdbufp)809 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
810 {
811 int numfd, i;
812 int *fds;
813 struct file *fp;
814 struct fdbuf *fdbuf;
815 int fdbufsize;
816
817 dprint(1, ("fdbuf_create: len %d\n", rightslen));
818
819 numfd = rightslen / (int)sizeof (int);
820
821 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
822 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
823 fdbuf->fd_size = fdbufsize;
824 fdbuf->fd_numfd = 0;
825 fdbuf->fd_ebuf = NULL;
826 fdbuf->fd_ebuflen = 0;
827 fds = (int *)rights;
828 for (i = 0; i < numfd; i++) {
829 if ((fp = getf(fds[i])) == NULL) {
830 fdbuf_free(fdbuf);
831 return (EBADF);
832 }
833 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
834 i, fds[i], (void *)fp, fp->f_count));
835 mutex_enter(&fp->f_tlock);
836 fp->f_count++;
837 mutex_exit(&fp->f_tlock);
838 /*
839 * The maximum alignment for fdbuf (or any option header
840 * and its value) it 4 bytes. On a LP64 kernel, the alignment
841 * is not sufficient for pointers (fd_fds in this case). Since
842 * we just did a kmem_alloc (we get a double word alignment),
843 * we don't need to do anything on the send side (we loose
844 * the double word alignment because fdbuf goes after an
845 * option header (eg T_unitdata_req) which is only 4 byte
846 * aligned). We take care of this when we extract the file
847 * descriptor in fdbuf_extract or fdbuf_free.
848 */
849 fdbuf->fd_fds[i] = fp;
850 fdbuf->fd_numfd++;
851 releasef(fds[i]);
852 if (AU_AUDITING())
853 audit_fdsend(fds[i], fp, 0);
854 }
855 *fdbufp = fdbuf;
856 return (0);
857 }
858
859 static int
fdbuf_optlen(int rightslen)860 fdbuf_optlen(int rightslen)
861 {
862 int numfd;
863
864 numfd = rightslen / (int)sizeof (int);
865
866 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
867 }
868
869 static t_uscalar_t
fdbuf_cmsglen(int fdbuflen)870 fdbuf_cmsglen(int fdbuflen)
871 {
872 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
873 (int)sizeof (struct file *) * (int)sizeof (int));
874 }
875
876
877 /*
878 * Return non-zero if the mblk and fdbuf are consistent.
879 */
880 static int
fdbuf_verify(mblk_t * mp,struct fdbuf * fdbuf,int fdbuflen)881 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
882 {
883 if (fdbuflen >= FDBUF_HDRSIZE &&
884 fdbuflen == fdbuf->fd_size) {
885 frtn_t *frp = mp->b_datap->db_frtnp;
886 /*
887 * Check that the SO_FILEP portion of the
888 * message has not been modified by
889 * the loopback transport. The sending sockfs generates
890 * a message that is esballoc'ed with the free function
891 * being fdbuf_free() and where free_arg contains the
892 * identical information as the SO_FILEP content.
893 *
894 * If any of these constraints are not satisfied we
895 * silently ignore the option.
896 */
897 ASSERT(mp);
898 if (frp != NULL &&
899 frp->free_func == fdbuf_free &&
900 frp->free_arg != NULL &&
901 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
902 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
903 (void *)fdbuf, fdbuflen));
904 return (1);
905 } else {
906 zcmn_err(getzoneid(), CE_WARN,
907 "sockfs: mismatched fdbuf content (%p)",
908 (void *)mp);
909 return (0);
910 }
911 } else {
912 zcmn_err(getzoneid(), CE_WARN,
913 "sockfs: mismatched fdbuf len %d, %d\n",
914 fdbuflen, fdbuf->fd_size);
915 return (0);
916 }
917 }
918
919 /*
920 * When the file descriptors returned by sorecvmsg can not be passed
921 * to the application this routine will cleanup the references on
922 * the files. Start at startoff bytes into the buffer.
923 */
924 static void
close_fds(void * fdbuf,int fdbuflen,int startoff)925 close_fds(void *fdbuf, int fdbuflen, int startoff)
926 {
927 int *fds = (int *)fdbuf;
928 int numfd = fdbuflen / (int)sizeof (int);
929 int i;
930
931 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
932
933 for (i = 0; i < numfd; i++) {
934 if (startoff < 0)
935 startoff = 0;
936 if (startoff < (int)sizeof (int)) {
937 /*
938 * This file descriptor is partially or fully after
939 * the offset
940 */
941 dprint(0,
942 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
943 (void) closeandsetf(fds[i], NULL);
944 }
945 startoff -= (int)sizeof (int);
946 }
947 }
948
949 /*
950 * Close all file descriptors contained in the control part starting at
951 * the startoffset.
952 */
953 void
so_closefds(void * control,t_uscalar_t controllen,int oldflg,int startoff)954 so_closefds(void *control, t_uscalar_t controllen, int oldflg,
955 int startoff)
956 {
957 struct cmsghdr *cmsg;
958
959 if (control == NULL)
960 return;
961
962 if (oldflg) {
963 close_fds(control, controllen, startoff);
964 return;
965 }
966 /* Scan control part for file descriptors. */
967 for (cmsg = (struct cmsghdr *)control;
968 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
969 cmsg = CMSG_NEXT(cmsg)) {
970 if (cmsg->cmsg_level == SOL_SOCKET &&
971 cmsg->cmsg_type == SCM_RIGHTS) {
972 close_fds(CMSG_CONTENT(cmsg),
973 (int)CMSG_CONTENTLEN(cmsg),
974 startoff - (int)sizeof (struct cmsghdr));
975 }
976 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len);
977 }
978 }
979
980 /*
981 * Handle truncation of a cmsg when the receive buffer is not big enough.
982 * Adjust the cmsg_len header field in the last cmsg that will be included in
983 * the buffer to reflect the number of bytes included.
984 */
985 void
so_truncatecmsg(void * control,t_uscalar_t controllen,uint_t maxlen)986 so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen)
987 {
988 struct cmsghdr *cmsg;
989 uint_t len = 0;
990
991 if (control == NULL)
992 return;
993
994 for (cmsg = control;
995 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
996 cmsg = CMSG_NEXT(cmsg)) {
997
998 len += ROUNDUP_cmsglen(cmsg->cmsg_len);
999
1000 if (len > maxlen) {
1001 /*
1002 * This cmsg is the last one that will be included in
1003 * the truncated buffer.
1004 */
1005 socklen_t diff = len - maxlen;
1006
1007 if (diff < CMSG_CONTENTLEN(cmsg)) {
1008 dprint(1, ("so_truncatecmsg: %d -> %d\n",
1009 cmsg->cmsg_len, cmsg->cmsg_len - diff));
1010 cmsg->cmsg_len -= diff;
1011 } else {
1012 cmsg->cmsg_len = sizeof (struct cmsghdr);
1013 }
1014 break;
1015 }
1016 }
1017 }
1018
1019 /*
1020 * Returns a pointer/length for the file descriptors contained
1021 * in the control buffer. Returns with *fdlenp == -1 if there are no
1022 * file descriptor options present. This is different than there being
1023 * a zero-length file descriptor option.
1024 * Fail if there are multiple SCM_RIGHT cmsgs.
1025 */
1026 int
so_getfdopt(void * control,t_uscalar_t controllen,int oldflg,void ** fdsp,int * fdlenp)1027 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
1028 void **fdsp, int *fdlenp)
1029 {
1030 struct cmsghdr *cmsg;
1031 void *fds;
1032 int fdlen;
1033
1034 if (control == NULL) {
1035 *fdsp = NULL;
1036 *fdlenp = -1;
1037 return (0);
1038 }
1039
1040 if (oldflg) {
1041 *fdsp = control;
1042 if (controllen == 0)
1043 *fdlenp = -1;
1044 else
1045 *fdlenp = controllen;
1046 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
1047 return (0);
1048 }
1049
1050 fds = NULL;
1051 fdlen = 0;
1052
1053 for (cmsg = (struct cmsghdr *)control;
1054 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1055 cmsg = CMSG_NEXT(cmsg)) {
1056 if (cmsg->cmsg_level == SOL_SOCKET &&
1057 cmsg->cmsg_type == SCM_RIGHTS) {
1058 if (fds != NULL)
1059 return (EINVAL);
1060 fds = CMSG_CONTENT(cmsg);
1061 fdlen = (int)CMSG_CONTENTLEN(cmsg);
1062 dprint(1, ("so_getfdopt: new %lu\n",
1063 (size_t)CMSG_CONTENTLEN(cmsg)));
1064 }
1065 }
1066 if (fds == NULL) {
1067 dprint(1, ("so_getfdopt: NONE\n"));
1068 *fdlenp = -1;
1069 } else
1070 *fdlenp = fdlen;
1071 *fdsp = fds;
1072 return (0);
1073 }
1074
1075 /*
1076 * Return the length of the options including any file descriptor options.
1077 */
1078 t_uscalar_t
so_optlen(void * control,t_uscalar_t controllen,int oldflg)1079 so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1080 {
1081 struct cmsghdr *cmsg;
1082 t_uscalar_t optlen = 0;
1083 t_uscalar_t len;
1084
1085 if (control == NULL)
1086 return (0);
1087
1088 if (oldflg)
1089 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1090 fdbuf_optlen(controllen)));
1091
1092 for (cmsg = (struct cmsghdr *)control;
1093 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1094 cmsg = CMSG_NEXT(cmsg)) {
1095 if (cmsg->cmsg_level == SOL_SOCKET &&
1096 cmsg->cmsg_type == SCM_RIGHTS) {
1097 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1098 } else {
1099 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1100 }
1101 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1102 sizeof (struct T_opthdr));
1103 }
1104 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1105 controllen, oldflg, optlen));
1106 return (optlen);
1107 }
1108
1109 /*
1110 * Copy options from control to the mblk. Skip any file descriptor options.
1111 */
1112 void
so_cmsg2opt(void * control,t_uscalar_t controllen,int oldflg,mblk_t * mp)1113 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1114 {
1115 struct T_opthdr toh;
1116 struct cmsghdr *cmsg;
1117
1118 if (control == NULL)
1119 return;
1120
1121 if (oldflg) {
1122 /* No real options - caller has handled file descriptors */
1123 return;
1124 }
1125 for (cmsg = (struct cmsghdr *)control;
1126 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1127 cmsg = CMSG_NEXT(cmsg)) {
1128 /*
1129 * Note: The caller handles file descriptors prior
1130 * to calling this function.
1131 */
1132 t_uscalar_t len;
1133
1134 if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SCM_RIGHTS)
1136 continue;
1137
1138 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1139 toh.level = cmsg->cmsg_level;
1140 toh.name = cmsg->cmsg_type;
1141 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1142 toh.status = 0;
1143
1144 soappendmsg(mp, &toh, sizeof (toh));
1145 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1146 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1147 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1148 }
1149 }
1150
1151 /*
1152 * Return the length of the control message derived from the options.
1153 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1154 * When oldflg is set only include SO_FILEP.
1155 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1156 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1157 * also be checked for any possible impacts.
1158 */
1159 t_uscalar_t
so_cmsglen(mblk_t * mp,void * opt,t_uscalar_t optlen,int oldflg)1160 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1161 {
1162 t_uscalar_t cmsglen = 0;
1163 struct T_opthdr *tohp;
1164 t_uscalar_t len;
1165 t_uscalar_t last_roundup = 0;
1166
1167 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1168
1169 for (tohp = (struct T_opthdr *)opt;
1170 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1171 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1172 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1173 tohp->level, tohp->name, tohp->len));
1174 if (tohp->level == SOL_SOCKET &&
1175 (tohp->name == SO_SRCADDR ||
1176 tohp->name == SO_UNIX_CLOSE)) {
1177 continue;
1178 }
1179 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1180 struct fdbuf *fdbuf;
1181 int fdbuflen;
1182
1183 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1184 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1185
1186 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1187 continue;
1188 if (oldflg) {
1189 cmsglen += fdbuf_cmsglen(fdbuflen);
1190 continue;
1191 }
1192 len = fdbuf_cmsglen(fdbuflen);
1193 } else if (tohp->level == SOL_SOCKET &&
1194 tohp->name == SCM_TIMESTAMP) {
1195 if (oldflg)
1196 continue;
1197
1198 if (get_udatamodel() == DATAMODEL_NATIVE) {
1199 len = sizeof (struct timeval);
1200 } else {
1201 len = sizeof (struct timeval32);
1202 }
1203 } else {
1204 if (oldflg)
1205 continue;
1206 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1207 }
1208 /*
1209 * Exclude roundup for last option to not set
1210 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1211 */
1212 last_roundup = (t_uscalar_t)
1213 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1214 (len + (int)sizeof (struct cmsghdr)));
1215 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1216 last_roundup;
1217 }
1218 cmsglen -= last_roundup;
1219 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1220 optlen, oldflg, cmsglen));
1221 return (cmsglen);
1222 }
1223
1224 /*
1225 * Copy options from options to the control. Convert SO_FILEP to
1226 * file descriptors.
1227 * Returns errno or zero.
1228 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1229 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1230 * also be checked for any possible impacts.
1231 */
1232 int
so_opt2cmsg(mblk_t * mp,void * opt,t_uscalar_t optlen,int msg_flags,void * control,t_uscalar_t controllen)1233 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int msg_flags,
1234 void *control, t_uscalar_t controllen)
1235 {
1236 struct T_opthdr *tohp;
1237 struct cmsghdr *cmsg;
1238 struct fdbuf *fdbuf;
1239 int fdbuflen;
1240 int error;
1241 int oldflg = (msg_flags & MSG_XPG4_2) == 0;
1242 #if defined(DEBUG) || defined(__lint)
1243 struct cmsghdr *cend = (struct cmsghdr *)
1244 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1245 #endif
1246 cmsg = (struct cmsghdr *)control;
1247
1248 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1249
1250 for (tohp = (struct T_opthdr *)opt;
1251 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1252 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1253 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1254 tohp->level, tohp->name, tohp->len));
1255
1256 if (tohp->level == SOL_SOCKET &&
1257 (tohp->name == SO_SRCADDR ||
1258 tohp->name == SO_UNIX_CLOSE)) {
1259 continue;
1260 }
1261 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1262 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1263 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1264 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1265
1266 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1267 return (EPROTO);
1268 if (oldflg) {
1269 error = fdbuf_extract(fdbuf, control,
1270 (int)controllen, msg_flags);
1271 if (error != 0)
1272 return (error);
1273 continue;
1274 } else {
1275 int fdlen;
1276
1277 fdlen = (int)fdbuf_cmsglen(
1278 (int)_TPI_TOPT_DATALEN(tohp));
1279
1280 cmsg->cmsg_level = tohp->level;
1281 cmsg->cmsg_type = SCM_RIGHTS;
1282 cmsg->cmsg_len = (socklen_t)(fdlen +
1283 sizeof (struct cmsghdr));
1284
1285 error = fdbuf_extract(fdbuf,
1286 CMSG_CONTENT(cmsg), fdlen, msg_flags);
1287 if (error != 0)
1288 return (error);
1289 }
1290 } else if (tohp->level == SOL_SOCKET &&
1291 tohp->name == SCM_TIMESTAMP) {
1292 timestruc_t *timestamp;
1293
1294 if (oldflg)
1295 continue;
1296
1297 cmsg->cmsg_level = tohp->level;
1298 cmsg->cmsg_type = tohp->name;
1299
1300 timestamp =
1301 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1302 sizeof (intptr_t));
1303
1304 if (get_udatamodel() == DATAMODEL_NATIVE) {
1305 struct timeval tv;
1306
1307 cmsg->cmsg_len = sizeof (struct timeval) +
1308 sizeof (struct cmsghdr);
1309 tv.tv_sec = timestamp->tv_sec;
1310 tv.tv_usec = timestamp->tv_nsec /
1311 (NANOSEC / MICROSEC);
1312 /*
1313 * on LP64 systems, the struct timeval in
1314 * the destination will not be 8-byte aligned,
1315 * so use bcopy to avoid alignment trouble
1316 */
1317 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
1318 } else {
1319 struct timeval32 *time32;
1320
1321 cmsg->cmsg_len = sizeof (struct timeval32) +
1322 sizeof (struct cmsghdr);
1323 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1324 time32->tv_sec = (time32_t)timestamp->tv_sec;
1325 time32->tv_usec =
1326 (int32_t)(timestamp->tv_nsec /
1327 (NANOSEC / MICROSEC));
1328 }
1329
1330 } else {
1331 if (oldflg)
1332 continue;
1333
1334 cmsg->cmsg_level = tohp->level;
1335 cmsg->cmsg_type = tohp->name;
1336 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr);
1337 if (tohp->level == IPPROTO_IP &&
1338 (tohp->name == IP_RECVTOS ||
1339 tohp->name == IP_RECVTTL)) {
1340 /*
1341 * The data for these is a uint8_t but, in
1342 * order to maintain alignment for any
1343 * following TPI primitives in the message,
1344 * there will be some trailing padding bytes
1345 * which are included in the TPI_TOPT_DATALEN.
1346 * For these types, we set the cmsg_len
1347 * explicitly to the correct value.
1348 */
1349 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t);
1350 } else {
1351 cmsg->cmsg_len +=
1352 (socklen_t)(_TPI_TOPT_DATALEN(tohp));
1353 }
1354
1355 /* copy content to control data part */
1356 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
1357 CMSG_CONTENTLEN(cmsg));
1358 }
1359 /* move to next CMSG structure! */
1360 cmsg = CMSG_NEXT(cmsg);
1361 }
1362 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1363 control, controllen, (void *)cend, (void *)cmsg));
1364 ASSERT(cmsg <= cend);
1365 return (0);
1366 }
1367
1368 /*
1369 * Extract the SO_SRCADDR option value if present.
1370 */
1371 void
so_getopt_srcaddr(void * opt,t_uscalar_t optlen,void ** srcp,t_uscalar_t * srclenp)1372 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1373 t_uscalar_t *srclenp)
1374 {
1375 struct T_opthdr *tohp;
1376
1377 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1378
1379 ASSERT(srcp != NULL && srclenp != NULL);
1380 *srcp = NULL;
1381 *srclenp = 0;
1382
1383 for (tohp = (struct T_opthdr *)opt;
1384 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1385 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1386 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1387 tohp->level, tohp->name, tohp->len));
1388 if (tohp->level == SOL_SOCKET &&
1389 tohp->name == SO_SRCADDR) {
1390 *srcp = _TPI_TOPT_DATA(tohp);
1391 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1392 }
1393 }
1394 }
1395
1396 /*
1397 * Verify if the SO_UNIX_CLOSE option is present.
1398 */
1399 int
so_getopt_unix_close(void * opt,t_uscalar_t optlen)1400 so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1401 {
1402 struct T_opthdr *tohp;
1403
1404 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1405
1406 for (tohp = (struct T_opthdr *)opt;
1407 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1408 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1409 dprint(1,
1410 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1411 tohp->level, tohp->name, tohp->len));
1412 if (tohp->level == SOL_SOCKET &&
1413 tohp->name == SO_UNIX_CLOSE)
1414 return (1);
1415 }
1416 return (0);
1417 }
1418
1419 /*
1420 * Allocate an M_PROTO message.
1421 *
1422 * If allocation fails the behavior depends on sleepflg:
1423 * _ALLOC_NOSLEEP fail immediately
1424 * _ALLOC_INTR sleep for memory until a signal is caught
1425 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1426 */
1427 mblk_t *
soallocproto(size_t size,int sleepflg,cred_t * cr)1428 soallocproto(size_t size, int sleepflg, cred_t *cr)
1429 {
1430 mblk_t *mp;
1431
1432 /* Round up size for reuse */
1433 size = MAX(size, 64);
1434 if (cr != NULL)
1435 mp = allocb_cred(size, cr, curproc->p_pid);
1436 else
1437 mp = allocb(size, BPRI_MED);
1438
1439 if (mp == NULL) {
1440 int error; /* Dummy - error not returned to caller */
1441
1442 switch (sleepflg) {
1443 case _ALLOC_SLEEP:
1444 if (cr != NULL) {
1445 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1446 cr, curproc->p_pid);
1447 } else {
1448 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1449 &error);
1450 }
1451 ASSERT(mp);
1452 break;
1453 case _ALLOC_INTR:
1454 if (cr != NULL) {
1455 mp = allocb_cred_wait(size, 0, &error, cr,
1456 curproc->p_pid);
1457 } else {
1458 mp = allocb_wait(size, BPRI_MED, 0, &error);
1459 }
1460 if (mp == NULL) {
1461 /* Caught signal while sleeping for memory */
1462 eprintline(ENOBUFS);
1463 return (NULL);
1464 }
1465 break;
1466 case _ALLOC_NOSLEEP:
1467 default:
1468 eprintline(ENOBUFS);
1469 return (NULL);
1470 }
1471 }
1472 DB_TYPE(mp) = M_PROTO;
1473 return (mp);
1474 }
1475
1476 /*
1477 * Allocate an M_PROTO message with a single component.
1478 * len is the length of buf. size is the amount to allocate.
1479 *
1480 * buf can be NULL with a non-zero len.
1481 * This results in a bzero'ed chunk being placed the message.
1482 */
1483 mblk_t *
soallocproto1(const void * buf,ssize_t len,ssize_t size,int sleepflg,cred_t * cr)1484 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1485 cred_t *cr)
1486 {
1487 mblk_t *mp;
1488
1489 if (size == 0)
1490 size = len;
1491
1492 ASSERT(size >= len);
1493 /* Round up size for reuse */
1494 size = MAX(size, 64);
1495 mp = soallocproto(size, sleepflg, cr);
1496 if (mp == NULL)
1497 return (NULL);
1498 mp->b_datap->db_type = M_PROTO;
1499 if (len != 0) {
1500 if (buf != NULL)
1501 bcopy(buf, mp->b_wptr, len);
1502 else
1503 bzero(mp->b_wptr, len);
1504 mp->b_wptr += len;
1505 }
1506 return (mp);
1507 }
1508
1509 /*
1510 * Append buf/len to mp.
1511 * The caller has to ensure that there is enough room in the mblk.
1512 *
1513 * buf can be NULL with a non-zero len.
1514 * This results in a bzero'ed chunk being placed the message.
1515 */
1516 void
soappendmsg(mblk_t * mp,const void * buf,ssize_t len)1517 soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1518 {
1519 ASSERT(mp);
1520
1521 if (len != 0) {
1522 /* Assert for room left */
1523 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1524 if (buf != NULL)
1525 bcopy(buf, mp->b_wptr, len);
1526 else
1527 bzero(mp->b_wptr, len);
1528 }
1529 mp->b_wptr += len;
1530 }
1531
1532 /*
1533 * Create a message using two kernel buffers.
1534 * If size is set that will determine the allocation size (e.g. for future
1535 * soappendmsg calls). If size is zero it is derived from the buffer
1536 * lengths.
1537 */
1538 mblk_t *
soallocproto2(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,ssize_t size,int sleepflg,cred_t * cr)1539 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1540 ssize_t size, int sleepflg, cred_t *cr)
1541 {
1542 mblk_t *mp;
1543
1544 if (size == 0)
1545 size = len1 + len2;
1546 ASSERT(size >= len1 + len2);
1547
1548 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1549 if (mp)
1550 soappendmsg(mp, buf2, len2);
1551 return (mp);
1552 }
1553
1554 /*
1555 * Create a message using three kernel buffers.
1556 * If size is set that will determine the allocation size (for future
1557 * soappendmsg calls). If size is zero it is derived from the buffer
1558 * lengths.
1559 */
1560 mblk_t *
soallocproto3(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,const void * buf3,ssize_t len3,ssize_t size,int sleepflg,cred_t * cr)1561 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1562 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
1563 {
1564 mblk_t *mp;
1565
1566 if (size == 0)
1567 size = len1 + len2 +len3;
1568 ASSERT(size >= len1 + len2 + len3);
1569
1570 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1571 if (mp != NULL) {
1572 soappendmsg(mp, buf2, len2);
1573 soappendmsg(mp, buf3, len3);
1574 }
1575 return (mp);
1576 }
1577
1578 #ifdef DEBUG
1579 char *
pr_state(uint_t state,uint_t mode)1580 pr_state(uint_t state, uint_t mode)
1581 {
1582 static char buf[1024];
1583
1584 buf[0] = 0;
1585 if (state & SS_ISCONNECTED)
1586 (void) strcat(buf, "ISCONNECTED ");
1587 if (state & SS_ISCONNECTING)
1588 (void) strcat(buf, "ISCONNECTING ");
1589 if (state & SS_ISDISCONNECTING)
1590 (void) strcat(buf, "ISDISCONNECTING ");
1591 if (state & SS_CANTSENDMORE)
1592 (void) strcat(buf, "CANTSENDMORE ");
1593
1594 if (state & SS_CANTRCVMORE)
1595 (void) strcat(buf, "CANTRCVMORE ");
1596 if (state & SS_ISBOUND)
1597 (void) strcat(buf, "ISBOUND ");
1598 if (state & SS_NDELAY)
1599 (void) strcat(buf, "NDELAY ");
1600 if (state & SS_NONBLOCK)
1601 (void) strcat(buf, "NONBLOCK ");
1602
1603 if (state & SS_ASYNC)
1604 (void) strcat(buf, "ASYNC ");
1605 if (state & SS_ACCEPTCONN)
1606 (void) strcat(buf, "ACCEPTCONN ");
1607 if (state & SS_SAVEDEOR)
1608 (void) strcat(buf, "SAVEDEOR ");
1609
1610 if (state & SS_RCVATMARK)
1611 (void) strcat(buf, "RCVATMARK ");
1612 if (state & SS_OOBPEND)
1613 (void) strcat(buf, "OOBPEND ");
1614 if (state & SS_HAVEOOBDATA)
1615 (void) strcat(buf, "HAVEOOBDATA ");
1616 if (state & SS_HADOOBDATA)
1617 (void) strcat(buf, "HADOOBDATA ");
1618
1619 if (mode & SM_PRIV)
1620 (void) strcat(buf, "PRIV ");
1621 if (mode & SM_ATOMIC)
1622 (void) strcat(buf, "ATOMIC ");
1623 if (mode & SM_ADDR)
1624 (void) strcat(buf, "ADDR ");
1625 if (mode & SM_CONNREQUIRED)
1626 (void) strcat(buf, "CONNREQUIRED ");
1627
1628 if (mode & SM_FDPASSING)
1629 (void) strcat(buf, "FDPASSING ");
1630 if (mode & SM_EXDATA)
1631 (void) strcat(buf, "EXDATA ");
1632 if (mode & SM_OPTDATA)
1633 (void) strcat(buf, "OPTDATA ");
1634 if (mode & SM_BYTESTREAM)
1635 (void) strcat(buf, "BYTESTREAM ");
1636 return (buf);
1637 }
1638
1639 char *
pr_addr(int family,struct sockaddr * addr,t_uscalar_t addrlen)1640 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1641 {
1642 static char buf[1024];
1643
1644 if (addr == NULL || addrlen == 0) {
1645 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
1646 return (buf);
1647 }
1648 switch (family) {
1649 case AF_INET: {
1650 struct sockaddr_in sin;
1651
1652 bcopy(addr, &sin, sizeof (sin));
1653
1654 (void) sprintf(buf, "(len %d) %x/%d",
1655 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1656 break;
1657 }
1658 case AF_INET6: {
1659 struct sockaddr_in6 sin6;
1660 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1661
1662 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
1663 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1664 addrlen,
1665 ntohs(piece[0]), ntohs(piece[1]),
1666 ntohs(piece[2]), ntohs(piece[3]),
1667 ntohs(piece[4]), ntohs(piece[5]),
1668 ntohs(piece[6]), ntohs(piece[7]),
1669 ntohs(sin6.sin6_port));
1670 break;
1671 }
1672 case AF_UNIX: {
1673 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1674
1675 (void) sprintf(buf, "(len %d) %s", addrlen,
1676 (soun == NULL) ? "(none)" : soun->sun_path);
1677 break;
1678 }
1679 default:
1680 (void) sprintf(buf, "(unknown af %d)", family);
1681 break;
1682 }
1683 return (buf);
1684 }
1685
1686 /* The logical equivalence operator (a if-and-only-if b) */
1687 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1688
1689 /*
1690 * Verify limitations and invariants on oob state.
1691 * Return 1 if OK, otherwise 0 so that it can be used as
1692 * ASSERT(verify_oobstate(so));
1693 */
1694 int
so_verify_oobstate(struct sonode * so)1695 so_verify_oobstate(struct sonode *so)
1696 {
1697 boolean_t havemark;
1698
1699 ASSERT(MUTEX_HELD(&so->so_lock));
1700
1701 /*
1702 * The possible state combinations are:
1703 * 0
1704 * SS_OOBPEND
1705 * SS_OOBPEND|SS_HAVEOOBDATA
1706 * SS_OOBPEND|SS_HADOOBDATA
1707 * SS_HADOOBDATA
1708 */
1709 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1710 case 0:
1711 case SS_OOBPEND:
1712 case SS_OOBPEND|SS_HAVEOOBDATA:
1713 case SS_OOBPEND|SS_HADOOBDATA:
1714 case SS_HADOOBDATA:
1715 break;
1716 default:
1717 printf("Bad oob state 1 (%p): state %s\n",
1718 (void *)so, pr_state(so->so_state, so->so_mode));
1719 return (0);
1720 }
1721
1722 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1723 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
1724 printf("Bad oob state 2 (%p): state %s\n",
1725 (void *)so, pr_state(so->so_state, so->so_mode));
1726 return (0);
1727 }
1728
1729 /*
1730 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1731 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1732 */
1733 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1734 SOTOTPI(so)->sti_oobsigcnt > 0;
1735
1736 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
1737 so->so_state & SS_OOBPEND)) {
1738 printf("Bad oob state 3 (%p): state %s\n",
1739 (void *)so, pr_state(so->so_state, so->so_mode));
1740 return (0);
1741 }
1742
1743 /*
1744 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1745 */
1746 if (!(so->so_options & SO_OOBINLINE) &&
1747 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
1748 printf("Bad oob state 4 (%p): state %s\n",
1749 (void *)so, pr_state(so->so_state, so->so_mode));
1750 return (0);
1751 }
1752
1753 if (!SOCK_IS_NONSTR(so) &&
1754 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
1755 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1756 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1757 SOTOTPI(so)->sti_oobcnt,
1758 pr_state(so->so_state, so->so_mode));
1759 return (0);
1760 }
1761
1762 return (1);
1763 }
1764 #undef EQUIVALENT
1765 #endif /* DEBUG */
1766
1767 /* initialize sockfs zone specific kstat related items */
1768 void *
sock_kstat_init(zoneid_t zoneid)1769 sock_kstat_init(zoneid_t zoneid)
1770 {
1771 kstat_t *ksp;
1772
1773 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1774 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1775
1776 if (ksp != NULL) {
1777 ksp->ks_update = sockfs_update;
1778 ksp->ks_snapshot = sockfs_snapshot;
1779 ksp->ks_lock = &socklist.sl_lock;
1780 ksp->ks_private = (void *)(uintptr_t)zoneid;
1781 kstat_install(ksp);
1782 }
1783
1784 return (ksp);
1785 }
1786
1787 /* tear down sockfs zone specific kstat related items */
1788 /*ARGSUSED*/
1789 void
sock_kstat_fini(zoneid_t zoneid,void * arg)1790 sock_kstat_fini(zoneid_t zoneid, void *arg)
1791 {
1792 kstat_t *ksp = (kstat_t *)arg;
1793
1794 if (ksp != NULL) {
1795 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1796 kstat_delete(ksp);
1797 }
1798 }
1799
1800 /*
1801 * Zones:
1802 * Note that nactive is going to be different for each zone.
1803 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1804 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1805 * buffer. This is safe, but if the buffer is too small, user will not be
1806 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1807 * driver will keep it locked between the update and the snapshot, so no
1808 * other process (zone) can currently get inbetween resulting in a wrong size
1809 * buffer allocation.
1810 */
1811 static int
sockfs_update(kstat_t * ksp,int rw)1812 sockfs_update(kstat_t *ksp, int rw)
1813 {
1814 uint_t nactive = 0; /* # of active AF_UNIX sockets */
1815 struct sonode *so; /* current sonode on socklist */
1816 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1817
1818 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1819
1820 if (rw == KSTAT_WRITE) { /* bounce all writes */
1821 return (EACCES);
1822 }
1823
1824 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1825 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
1826 nactive++;
1827 }
1828 }
1829 ksp->ks_ndata = nactive;
1830 ksp->ks_data_size = nactive * sizeof (struct sockinfo);
1831
1832 return (0);
1833 }
1834
1835 static int
sockfs_snapshot(kstat_t * ksp,void * buf,int rw)1836 sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1837 {
1838 int ns; /* # of sonodes we've copied */
1839 struct sonode *so; /* current sonode on socklist */
1840 struct sockinfo *psi; /* where we put sockinfo data */
1841 t_uscalar_t sn_len; /* soa_len */
1842 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1843 sotpi_info_t *sti;
1844
1845 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1846
1847 ksp->ks_snaptime = gethrtime();
1848
1849 if (rw == KSTAT_WRITE) { /* bounce all writes */
1850 return (EACCES);
1851 }
1852
1853 /*
1854 * For each sonode on the socklist, we massage the important
1855 * info into buf, in sockinfo format.
1856 */
1857 psi = (struct sockinfo *)buf;
1858 ns = 0;
1859 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1860 vattr_t attr;
1861
1862 /* only stuff active sonodes and the same zone: */
1863 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
1864 continue;
1865 }
1866
1867 /*
1868 * If the sonode was activated between the update and the
1869 * snapshot, we're done - as this is only a snapshot.
1870 */
1871 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) {
1872 break;
1873 }
1874
1875 sti = SOTOTPI(so);
1876 /* copy important info into buf: */
1877 psi->si_size = sizeof (struct sockinfo);
1878 psi->si_family = so->so_family;
1879 psi->si_type = so->so_type;
1880 psi->si_flag = so->so_flag;
1881 psi->si_state = so->so_state;
1882 psi->si_serv_type = sti->sti_serv_type;
1883 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic;
1884 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic;
1885 psi->si_laddr_soa_len = sti->sti_laddr.soa_len;
1886 psi->si_faddr_soa_len = sti->sti_faddr.soa_len;
1887 psi->si_szoneid = so->so_zoneid;
1888 psi->si_faddr_noxlate = sti->sti_faddr_noxlate;
1889
1890 /*
1891 * Grab the inode, if possible.
1892 * This must be done before entering so_lock as VOP_GETATTR
1893 * will acquire it.
1894 */
1895 if (so->so_vnode == NULL ||
1896 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0)
1897 attr.va_nodeid = 0;
1898
1899 psi->si_inode = attr.va_nodeid;
1900
1901 mutex_enter(&so->so_lock);
1902
1903 if (sti->sti_laddr_sa != NULL) {
1904 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1905 sn_len = sti->sti_laddr_len;
1906 ASSERT(sn_len <= sizeof (short) +
1907 sizeof (psi->si_laddr_sun_path));
1908
1909 psi->si_laddr_family =
1910 sti->sti_laddr_sa->sa_family;
1911 if (sn_len != 0) {
1912 /* AF_UNIX socket names are NULL terminated */
1913 (void) strncpy(psi->si_laddr_sun_path,
1914 sti->sti_laddr_sa->sa_data,
1915 sizeof (psi->si_laddr_sun_path));
1916 sn_len = strlen(psi->si_laddr_sun_path);
1917 }
1918 psi->si_laddr_sun_path[sn_len] = 0;
1919 }
1920
1921 if (sti->sti_faddr_sa != NULL) {
1922 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1923 sn_len = sti->sti_faddr_len;
1924 ASSERT(sn_len <= sizeof (short) +
1925 sizeof (psi->si_faddr_sun_path));
1926
1927 psi->si_faddr_family =
1928 sti->sti_faddr_sa->sa_family;
1929 if (sn_len != 0) {
1930 (void) strncpy(psi->si_faddr_sun_path,
1931 sti->sti_faddr_sa->sa_data,
1932 sizeof (psi->si_faddr_sun_path));
1933 sn_len = strlen(psi->si_faddr_sun_path);
1934 }
1935 psi->si_faddr_sun_path[sn_len] = 0;
1936 }
1937
1938 mutex_exit(&so->so_lock);
1939
1940 (void) snprintf(psi->si_son_straddr,
1941 sizeof (psi->si_son_straddr), "%p", (void *)so);
1942 (void) snprintf(psi->si_lvn_straddr,
1943 sizeof (psi->si_lvn_straddr), "%p",
1944 (void *)sti->sti_ux_laddr.soua_vp);
1945 (void) snprintf(psi->si_fvn_straddr,
1946 sizeof (psi->si_fvn_straddr), "%p",
1947 (void *)sti->sti_ux_faddr.soua_vp);
1948
1949 ns++;
1950 psi++;
1951 }
1952
1953 ksp->ks_ndata = ns;
1954 return (0);
1955 }
1956
1957 ssize_t
soreadfile(file_t * fp,uchar_t * buf,u_offset_t fileoff,int * err,size_t size)1958 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1959 {
1960 struct uio auio;
1961 struct iovec aiov[1];
1962 register vnode_t *vp;
1963 int ioflag, rwflag;
1964 ssize_t cnt;
1965 int error = 0;
1966 int iovcnt = 0;
1967 short fflag;
1968
1969 vp = fp->f_vnode;
1970 fflag = fp->f_flag;
1971
1972 rwflag = 0;
1973 aiov[0].iov_base = (caddr_t)buf;
1974 aiov[0].iov_len = size;
1975 iovcnt = 1;
1976 cnt = (ssize_t)size;
1977 (void) VOP_RWLOCK(vp, rwflag, NULL);
1978
1979 auio.uio_loffset = fileoff;
1980 auio.uio_iov = aiov;
1981 auio.uio_iovcnt = iovcnt;
1982 auio.uio_resid = cnt;
1983 auio.uio_segflg = UIO_SYSSPACE;
1984 auio.uio_llimit = MAXOFFSET_T;
1985 auio.uio_fmode = fflag;
1986 auio.uio_extflg = UIO_COPY_CACHED;
1987
1988 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1989
1990 /* If read sync is not asked for, filter sync flags */
1991 if ((ioflag & FRSYNC) == 0)
1992 ioflag &= ~(FSYNC|FDSYNC);
1993 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1994 cnt -= auio.uio_resid;
1995
1996 VOP_RWUNLOCK(vp, rwflag, NULL);
1997
1998 if (error == EINTR && cnt != 0)
1999 error = 0;
2000
2001 if (error != 0) {
2002 *err = error;
2003 return (0);
2004 } else {
2005 *err = 0;
2006 return (cnt);
2007 }
2008 }
2009
2010 int
so_copyin(const void * from,void * to,size_t size,int fromkernel)2011 so_copyin(const void *from, void *to, size_t size, int fromkernel)
2012 {
2013 if (fromkernel) {
2014 bcopy(from, to, size);
2015 return (0);
2016 }
2017 return (xcopyin(from, to, size));
2018 }
2019
2020 int
so_copyout(const void * from,void * to,size_t size,int tokernel)2021 so_copyout(const void *from, void *to, size_t size, int tokernel)
2022 {
2023 if (tokernel) {
2024 bcopy(from, to, size);
2025 return (0);
2026 }
2027 return (xcopyout(from, to, size));
2028 }
2029