1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 * Copyright (c) 2017 Joyent Inc
26 * Copyright 2019 Nexenta by DDN, Inc.
27 * Copyright 2021 Racktop Systems, Inc.
28 */
29
30 /*
31 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
32 * All rights reserved.
33 * Use is subject to license terms.
34 */
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cred.h>
40 #include <sys/proc.h>
41 #include <sys/user.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/pathname.h>
46 #include <sys/uio.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/errno.h>
50 #include <sys/socket.h>
51 #include <sys/sysmacros.h>
52 #include <sys/siginfo.h>
53 #include <sys/tiuser.h>
54 #include <sys/statvfs.h>
55 #include <sys/stream.h>
56 #include <sys/strsun.h>
57 #include <sys/strsubr.h>
58 #include <sys/stropts.h>
59 #include <sys/timod.h>
60 #include <sys/t_kuser.h>
61 #include <sys/kmem.h>
62 #include <sys/kstat.h>
63 #include <sys/dirent.h>
64 #include <sys/cmn_err.h>
65 #include <sys/debug.h>
66 #include <sys/unistd.h>
67 #include <sys/vtrace.h>
68 #include <sys/mode.h>
69 #include <sys/acl.h>
70 #include <sys/sdt.h>
71 #include <sys/debug.h>
72
73 #include <rpc/types.h>
74 #include <rpc/auth.h>
75 #include <rpc/auth_unix.h>
76 #include <rpc/auth_des.h>
77 #include <rpc/svc.h>
78 #include <rpc/xdr.h>
79 #include <rpc/rpc_rdma.h>
80
81 #include <nfs/nfs.h>
82 #include <nfs/export.h>
83 #include <nfs/nfssys.h>
84 #include <nfs/nfs_clnt.h>
85 #include <nfs/nfs_acl.h>
86 #include <nfs/nfs_log.h>
87 #include <nfs/lm.h>
88 #include <nfs/nfs_dispatch.h>
89 #include <nfs/nfs4_drc.h>
90
91 #include <sys/modctl.h>
92 #include <sys/cladm.h>
93 #include <sys/clconf.h>
94
95 #include <sys/tsol/label.h>
96
97 #define MAXHOST 32
98 const char *kinet_ntop6(uchar_t *, char *, size_t);
99
100 /*
101 * Module linkage information.
102 */
103
104 static struct modlmisc modlmisc = {
105 &mod_miscops, "NFS server module"
106 };
107
108 static struct modlinkage modlinkage = {
109 MODREV_1, (void *)&modlmisc, NULL
110 };
111
112 zone_key_t nfssrv_zone_key;
113 list_t nfssrv_globals_list;
114 krwlock_t nfssrv_globals_rwl;
115
116 kmem_cache_t *nfs_xuio_cache;
117 int nfs_loaned_buffers = 0;
118
119 /* array of paths passed-in from nfsd command-line; stored in nvlist */
120 char **rfs4_dss_newpaths;
121 uint_t rfs4_dss_numnewpaths;
122
123 /* nvlists of all DSS paths: current, and before last warmstart */
124 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
125
126 int
_init(void)127 _init(void)
128 {
129 int status;
130
131 nfs_srvinit();
132
133 status = mod_install((struct modlinkage *)&modlinkage);
134 if (status != 0) {
135 /*
136 * Could not load module, cleanup previous
137 * initialization work.
138 */
139 nfs_srvfini();
140
141 return (status);
142 }
143
144 /*
145 * Initialise some placeholders for nfssys() calls. These have
146 * to be declared by the nfs module, since that handles nfssys()
147 * calls - also used by NFS clients - but are provided by this
148 * nfssrv module. These also then serve as confirmation to the
149 * relevant code in nfs that nfssrv has been loaded, as they're
150 * initially NULL.
151 */
152 nfs_srv_quiesce_func = nfs_srv_quiesce_all;
153 nfs_srv_dss_func = rfs4_dss_setpaths;
154
155 /* setup DSS paths here; must be done before initial server startup */
156 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
157
158 /* initialize the copy reduction caches */
159
160 nfs_xuio_cache = kmem_cache_create("nfs_xuio_cache",
161 sizeof (nfs_xuio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
162
163 return (status);
164 }
165
166 int
_fini()167 _fini()
168 {
169 return (EBUSY);
170 }
171
172 int
_info(struct modinfo * modinfop)173 _info(struct modinfo *modinfop)
174 {
175 return (mod_info(&modlinkage, modinfop));
176 }
177
178 /*
179 * PUBLICFH_CHECK() checks if the dispatch routine supports
180 * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
181 * incoming request is using the public filehandle. The check duplicates
182 * the exportmatch() call done in checkexport(), and we should consider
183 * modifying those routines to avoid the duplication. For now, we optimize
184 * by calling exportmatch() only after checking that the dispatch routine
185 * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
186 * public (i.e., not the placeholder).
187 */
188 #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
189 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
190 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
191 (exi == ne->exi_public && exportmatch(ne->exi_root, \
192 fsid, xfid))))
193
194 static void nfs_srv_shutdown_all(int);
195 static void rfs4_server_start(nfs_globals_t *, int);
196 static void nullfree(void);
197 static void rfs_dispatch(struct svc_req *, SVCXPRT *);
198 static void acl_dispatch(struct svc_req *, SVCXPRT *);
199 static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
200 bool_t, bool_t *);
201 static char *client_name(struct svc_req *req);
202 static char *client_addr(struct svc_req *req, char *buf);
203 extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
204 static void *nfs_server_zone_init(zoneid_t);
205 static void nfs_server_zone_fini(zoneid_t, void *);
206 static void nfs_server_zone_shutdown(zoneid_t, void *);
207
208 #define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \
209 (nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \
210 (nb)->len = (xprt)->xp_rtaddr.len; \
211 (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP); \
212 bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len); \
213 }
214
215 /*
216 * Public Filehandle common nfs routines
217 */
218 static int MCLpath(char **);
219 static void URLparse(char *);
220
221 /*
222 * NFS callout table.
223 * This table is used by svc_getreq() to dispatch a request with
224 * a given prog/vers pair to an appropriate service provider
225 * dispatch routine.
226 *
227 * NOTE: ordering is relied upon below when resetting the version min/max
228 * for NFS_PROGRAM. Careful, if this is ever changed.
229 */
230 static SVC_CALLOUT __nfs_sc_clts[] = {
231 { NFS_PROGRAM, NFS_VERSMIN, NFS_VERSMAX, rfs_dispatch },
232 { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX, acl_dispatch }
233 };
234
235 static SVC_CALLOUT_TABLE nfs_sct_clts = {
236 sizeof (__nfs_sc_clts) / sizeof (__nfs_sc_clts[0]), FALSE,
237 __nfs_sc_clts
238 };
239
240 static SVC_CALLOUT __nfs_sc_cots[] = {
241 { NFS_PROGRAM, NFS_VERSMIN, NFS_VERSMAX, rfs_dispatch },
242 { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX, acl_dispatch }
243 };
244
245 static SVC_CALLOUT_TABLE nfs_sct_cots = {
246 sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
247 };
248
249 static SVC_CALLOUT __nfs_sc_rdma[] = {
250 { NFS_PROGRAM, NFS_VERSMIN, NFS_VERSMAX, rfs_dispatch },
251 { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX, acl_dispatch }
252 };
253
254 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
255 sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
256 };
257
258 /*
259 * DSS: distributed stable storage
260 * lists of all DSS paths: current, and before last warmstart
261 */
262 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
263
264 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
265
266 /*
267 * Stash NFS zone globals in TSD to avoid some lock contention
268 * from frequent zone_getspecific calls.
269 */
270 static uint_t nfs_server_tsd_key;
271
272 nfs_globals_t *
nfs_srv_getzg(void)273 nfs_srv_getzg(void)
274 {
275 nfs_globals_t *ng;
276
277 ng = tsd_get(nfs_server_tsd_key);
278 if (ng == NULL) {
279 ng = zone_getspecific(nfssrv_zone_key, curzone);
280 (void) tsd_set(nfs_server_tsd_key, ng);
281 }
282
283 return (ng);
284 }
285
286 /*
287 * Will be called at the point the server pool is being unregistered
288 * from the pool list. From that point onwards, the pool is waiting
289 * to be drained and as such the server state is stale and pertains
290 * to the old instantiation of the NFS server pool.
291 */
292 void
nfs_srv_offline(void)293 nfs_srv_offline(void)
294 {
295 nfs_globals_t *ng;
296
297 ng = nfs_srv_getzg();
298
299 mutex_enter(&ng->nfs_server_upordown_lock);
300 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
301 ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
302 }
303 mutex_exit(&ng->nfs_server_upordown_lock);
304 }
305
306 /*
307 * Will be called at the point the server pool is being destroyed so
308 * all transports have been closed and no service threads are in
309 * existence.
310 *
311 * If we quiesce the server, we're shutting it down without destroying the
312 * server state. This allows it to warm start subsequently.
313 */
314 void
nfs_srv_stop_all(void)315 nfs_srv_stop_all(void)
316 {
317 int quiesce = 0;
318 nfs_srv_shutdown_all(quiesce);
319 }
320
321 /*
322 * This alternative shutdown routine can be requested via nfssys()
323 */
324 void
nfs_srv_quiesce_all(void)325 nfs_srv_quiesce_all(void)
326 {
327 int quiesce = 1;
328 nfs_srv_shutdown_all(quiesce);
329 }
330
331 static void
nfs_srv_shutdown_all(int quiesce)332 nfs_srv_shutdown_all(int quiesce)
333 {
334 nfs_globals_t *ng = nfs_srv_getzg();
335
336 mutex_enter(&ng->nfs_server_upordown_lock);
337 if (quiesce) {
338 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
339 ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
340 ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
341 cv_signal(&ng->nfs_server_upordown_cv);
342
343 /* reset DSS state */
344 rfs4_dss_numnewpaths = 0;
345 rfs4_dss_newpaths = NULL;
346
347 cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
348 "NFSv4 state has been preserved");
349 }
350 } else {
351 if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
352 ng->nfs_server_upordown = NFS_SERVER_STOPPING;
353 mutex_exit(&ng->nfs_server_upordown_lock);
354 rfs4_state_zone_fini();
355 rfs4_fini_drc();
356 mutex_enter(&ng->nfs_server_upordown_lock);
357 ng->nfs_server_upordown = NFS_SERVER_STOPPED;
358
359 /* reset DSS state */
360 rfs4_dss_numnewpaths = 0;
361 rfs4_dss_newpaths = NULL;
362
363 cv_signal(&ng->nfs_server_upordown_cv);
364 }
365 }
366 mutex_exit(&ng->nfs_server_upordown_lock);
367 }
368
369 static int
nfs_srv_set_sc_versions(struct file * fp,SVC_CALLOUT_TABLE ** sctpp,rpcvers_t versmin,rpcvers_t versmax)370 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
371 rpcvers_t versmin, rpcvers_t versmax)
372 {
373 struct strioctl strioc;
374 struct T_info_ack tinfo;
375 int error, retval;
376
377 /*
378 * Find out what type of transport this is.
379 */
380 strioc.ic_cmd = TI_GETINFO;
381 strioc.ic_timout = -1;
382 strioc.ic_len = sizeof (tinfo);
383 strioc.ic_dp = (char *)&tinfo;
384 tinfo.PRIM_type = T_INFO_REQ;
385
386 error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
387 CRED(), &retval);
388 if (error || retval)
389 return (error);
390
391 /*
392 * Based on our query of the transport type...
393 *
394 * Reset the min/max versions based on the caller's request
395 * NOTE: This assumes that NFS_PROGRAM is first in the array!!
396 * And the second entry is the NFS_ACL_PROGRAM.
397 */
398 switch (tinfo.SERV_type) {
399 case T_CLTS:
400 if (versmax == NFS_V4)
401 return (EINVAL);
402 __nfs_sc_clts[0].sc_versmin = versmin;
403 __nfs_sc_clts[0].sc_versmax = versmax;
404 __nfs_sc_clts[1].sc_versmin = versmin;
405 __nfs_sc_clts[1].sc_versmax = versmax;
406 *sctpp = &nfs_sct_clts;
407 break;
408 case T_COTS:
409 case T_COTS_ORD:
410 __nfs_sc_cots[0].sc_versmin = versmin;
411 __nfs_sc_cots[0].sc_versmax = versmax;
412 /* For the NFS_ACL program, check the max version */
413 if (versmax > NFS_ACL_VERSMAX)
414 versmax = NFS_ACL_VERSMAX;
415 __nfs_sc_cots[1].sc_versmin = versmin;
416 __nfs_sc_cots[1].sc_versmax = versmax;
417 *sctpp = &nfs_sct_cots;
418 break;
419 default:
420 error = EINVAL;
421 }
422
423 return (error);
424 }
425
426 /*
427 * NFS Server system call.
428 * Does all of the work of running a NFS server.
429 * uap->fd is the fd of an open transport provider
430 */
431 int
nfs_svc(struct nfs_svc_args * arg,model_t model)432 nfs_svc(struct nfs_svc_args *arg, model_t model)
433 {
434 nfs_globals_t *ng;
435 file_t *fp;
436 SVCMASTERXPRT *xprt;
437 int error;
438 int readsize;
439 char buf[KNC_STRSIZE];
440 size_t len;
441 STRUCT_HANDLE(nfs_svc_args, uap);
442 struct netbuf addrmask;
443 SVC_CALLOUT_TABLE *sctp = NULL;
444
445 ng = nfs_srv_getzg();
446 STRUCT_SET_HANDLE(uap, model, arg);
447
448 /* Check privileges in nfssys() */
449
450 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
451 return (EBADF);
452
453 /* Setup global file handle in nfs_export */
454 if ((error = nfs_export_get_rootfh(ng)) != 0)
455 return (error);
456
457 /*
458 * Set read buffer size to rsize
459 * and add room for RPC headers.
460 */
461 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
462 if (readsize < RPC_MAXDATASIZE)
463 readsize = RPC_MAXDATASIZE;
464
465 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
466 KNC_STRSIZE, &len);
467 if (error) {
468 releasef(STRUCT_FGET(uap, fd));
469 return (error);
470 }
471
472 addrmask.len = STRUCT_FGET(uap, addrmask.len);
473 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
474 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
475 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
476 addrmask.len);
477 if (error) {
478 releasef(STRUCT_FGET(uap, fd));
479 kmem_free(addrmask.buf, addrmask.maxlen);
480 return (error);
481 }
482
483 ng->nfs_versmin = STRUCT_FGET(uap, nfs_versmin);
484 ng->nfs_versmax = STRUCT_FGET(uap, nfs_versmax);
485
486 /* Double check the vers min/max ranges */
487 if ((ng->nfs_versmin > ng->nfs_versmax) ||
488 (ng->nfs_versmin < NFS_SRV_VERS_MIN) ||
489 (ng->nfs_versmax > NFS_SRV_VERS_MAX)) {
490 cmn_err(CE_NOTE, "%s: bad min (%u) or max (%u) version number",
491 "NFS", ng->nfs_versmin, ng->nfs_versmax);
492 ng->nfs_versmin = NFS_SRV_VERSMIN_DEFAULT;
493 ng->nfs_versmax = NFS_SRV_VERSMAX_DEFAULT;
494 }
495
496 error = nfs_srv_set_sc_versions(fp, &sctp,
497 NFS_PROT_VERSION(ng->nfs_versmin),
498 NFS_PROT_VERSION(ng->nfs_versmax));
499 if (error != 0) {
500 releasef(STRUCT_FGET(uap, fd));
501 kmem_free(addrmask.buf, addrmask.maxlen);
502 return (error);
503 }
504
505 /* Initialize nfsv4 server */
506 if (NFS_PROT_VERSION(ng->nfs_versmax) == NFS_V4)
507 rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
508
509 /* Create a transport handle. */
510 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
511 sctp, NULL, NFS_SVCPOOL_ID, TRUE);
512
513 if (error)
514 kmem_free(addrmask.buf, addrmask.maxlen);
515
516 releasef(STRUCT_FGET(uap, fd));
517
518 /* HA-NFSv4: save the cluster nodeid */
519 if (cluster_bootflags & CLUSTER_BOOTED)
520 lm_global_nlmid = clconf_get_nodeid();
521
522 return (error);
523 }
524
525 static void
rfs4_server_start(nfs_globals_t * ng,int nfs4_srv_delegation)526 rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
527 {
528 nfs4_minor_t nfs4_minor_max;
529
530 nfs4_minor_max = NFS_PROT_V4_MINORVERSION(ng->nfs_versmax);
531
532 /*
533 * Determine if the server has previously been "started" and
534 * if not, do the per instance initialization
535 */
536 mutex_enter(&ng->nfs_server_upordown_lock);
537
538 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
539 /* Do we need to stop and wait on the previous server? */
540 while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
541 ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
542 cv_wait(&ng->nfs_server_upordown_cv,
543 &ng->nfs_server_upordown_lock);
544
545 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
546 (void) svc_pool_control(NFS_SVCPOOL_ID,
547 SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
548 (void) svc_pool_control(NFS_SVCPOOL_ID,
549 SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
550
551 rfs4_do_server_start(ng->nfs_server_upordown,
552 nfs4_srv_delegation, nfs4_minor_max,
553 cluster_bootflags & CLUSTER_BOOTED);
554
555 ng->nfs_server_upordown = NFS_SERVER_RUNNING;
556 }
557 cv_signal(&ng->nfs_server_upordown_cv);
558 }
559 mutex_exit(&ng->nfs_server_upordown_lock);
560 }
561
562 /*
563 * If RDMA device available,
564 * start RDMA listener.
565 */
566 int
rdma_start(struct rdma_svc_args * rsa)567 rdma_start(struct rdma_svc_args *rsa)
568 {
569 nfs_globals_t *ng;
570 int error;
571 rdma_xprt_group_t started_rdma_xprts;
572 rdma_stat stat;
573 int svc_state = 0;
574
575 /* Double check the vers min/max ranges */
576 if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
577 (rsa->nfs_versmin < NFS_SRV_VERS_MIN) ||
578 (rsa->nfs_versmax > NFS_SRV_VERS_MAX)) {
579 rsa->nfs_versmin = NFS_SRV_VERSMIN_DEFAULT;
580 rsa->nfs_versmax = NFS_SRV_VERSMAX_DEFAULT;
581 }
582
583 ng = nfs_srv_getzg();
584 ng->nfs_versmin = rsa->nfs_versmin;
585 ng->nfs_versmax = rsa->nfs_versmax;
586
587 /* Set the versions in the callout table */
588 __nfs_sc_rdma[0].sc_versmin = NFS_PROT_VERSION(rsa->nfs_versmin);
589 __nfs_sc_rdma[0].sc_versmax = NFS_PROT_VERSION(rsa->nfs_versmax);
590 /* For the NFS_ACL program, check the max version */
591 __nfs_sc_rdma[1].sc_versmin = NFS_PROT_VERSION(rsa->nfs_versmin);
592 __nfs_sc_rdma[1].sc_versmax =
593 MIN(NFS_PROT_VERSION(rsa->nfs_versmax), NFS_ACL_VERSMAX);
594
595 /* Initialize nfsv4 server */
596 if (NFS_PROT_VERSION(rsa->nfs_versmax) == NFS_V4)
597 rfs4_server_start(ng, rsa->delegation);
598
599 started_rdma_xprts.rtg_count = 0;
600 started_rdma_xprts.rtg_listhead = NULL;
601 started_rdma_xprts.rtg_poolid = rsa->poolid;
602
603 restart:
604 error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
605 &started_rdma_xprts);
606
607 svc_state = !error;
608
609 while (!error) {
610
611 /*
612 * wait till either interrupted by a signal on
613 * nfs service stop/restart or signalled by a
614 * rdma attach/detatch.
615 */
616
617 stat = rdma_kwait();
618
619 /*
620 * stop services if running -- either on a HCA detach event
621 * or if the nfs service is stopped/restarted.
622 */
623
624 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
625 svc_state) {
626 rdma_stop(&started_rdma_xprts);
627 svc_state = 0;
628 }
629
630 /*
631 * nfs service stop/restart, break out of the
632 * wait loop and return;
633 */
634 if (stat == RDMA_INTR)
635 return (0);
636
637 /*
638 * restart stopped services on a HCA attach event
639 * (if not already running)
640 */
641
642 if ((stat == RDMA_HCA_ATTACH) && (svc_state == 0))
643 goto restart;
644
645 /*
646 * loop until a nfs service stop/restart
647 */
648 }
649
650 return (error);
651 }
652
653 /* ARGSUSED */
654 void
rpc_null(caddr_t * argp,caddr_t * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)655 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
656 struct svc_req *req, cred_t *cr, bool_t ro)
657 {
658 }
659
660 /* ARGSUSED */
661 void
rpc_null_v3(caddr_t * argp,caddr_t * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)662 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
663 struct svc_req *req, cred_t *cr, bool_t ro)
664 {
665 DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
666 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
667 DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
668 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
669 }
670
671 /* ARGSUSED */
672 static void
rfs_error(caddr_t * argp,caddr_t * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)673 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
674 struct svc_req *req, cred_t *cr, bool_t ro)
675 {
676 /* return (EOPNOTSUPP); */
677 }
678
679 static void
nullfree(void)680 nullfree(void)
681 {
682 }
683
684 static char *rfscallnames_v2[] = {
685 "RFS2_NULL",
686 "RFS2_GETATTR",
687 "RFS2_SETATTR",
688 "RFS2_ROOT",
689 "RFS2_LOOKUP",
690 "RFS2_READLINK",
691 "RFS2_READ",
692 "RFS2_WRITECACHE",
693 "RFS2_WRITE",
694 "RFS2_CREATE",
695 "RFS2_REMOVE",
696 "RFS2_RENAME",
697 "RFS2_LINK",
698 "RFS2_SYMLINK",
699 "RFS2_MKDIR",
700 "RFS2_RMDIR",
701 "RFS2_READDIR",
702 "RFS2_STATFS"
703 };
704
705 static struct rpcdisp rfsdisptab_v2[] = {
706 /*
707 * NFS VERSION 2
708 */
709
710 /* RFS_NULL = 0 */
711 {rpc_null,
712 xdr_void, NULL_xdrproc_t, 0,
713 xdr_void, NULL_xdrproc_t, 0,
714 nullfree, RPC_IDEMPOTENT,
715 0},
716
717 /* RFS_GETATTR = 1 */
718 {rfs_getattr,
719 xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
720 xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
721 nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
722 rfs_getattr_getfh},
723
724 /* RFS_SETATTR = 2 */
725 {rfs_setattr,
726 xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs),
727 xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
728 nullfree, RPC_MAPRESP,
729 rfs_setattr_getfh},
730
731 /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
732 {rfs_error,
733 xdr_void, NULL_xdrproc_t, 0,
734 xdr_void, NULL_xdrproc_t, 0,
735 nullfree, RPC_IDEMPOTENT,
736 0},
737
738 /* RFS_LOOKUP = 4 */
739 {rfs_lookup,
740 xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
741 xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
742 nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK,
743 rfs_lookup_getfh},
744
745 /* RFS_READLINK = 5 */
746 {rfs_readlink,
747 xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
748 xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres),
749 rfs_rlfree, RPC_IDEMPOTENT,
750 rfs_readlink_getfh},
751
752 /* RFS_READ = 6 */
753 {rfs_read,
754 xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs),
755 xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult),
756 rfs_rdfree, RPC_IDEMPOTENT,
757 rfs_read_getfh},
758
759 /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
760 {rfs_error,
761 xdr_void, NULL_xdrproc_t, 0,
762 xdr_void, NULL_xdrproc_t, 0,
763 nullfree, RPC_IDEMPOTENT,
764 0},
765
766 /* RFS_WRITE = 8 */
767 {rfs_write,
768 xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs),
769 xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
770 nullfree, RPC_MAPRESP,
771 rfs_write_getfh},
772
773 /* RFS_CREATE = 9 */
774 {rfs_create,
775 xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
776 xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
777 nullfree, RPC_MAPRESP,
778 rfs_create_getfh},
779
780 /* RFS_REMOVE = 10 */
781 {rfs_remove,
782 xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
783 #ifdef _LITTLE_ENDIAN
784 xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
785 #else
786 xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
787 #endif
788 nullfree, RPC_MAPRESP,
789 rfs_remove_getfh},
790
791 /* RFS_RENAME = 11 */
792 {rfs_rename,
793 xdr_rnmargs, NULL_xdrproc_t, sizeof (struct nfsrnmargs),
794 #ifdef _LITTLE_ENDIAN
795 xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
796 #else
797 xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
798 #endif
799 nullfree, RPC_MAPRESP,
800 rfs_rename_getfh},
801
802 /* RFS_LINK = 12 */
803 {rfs_link,
804 xdr_linkargs, NULL_xdrproc_t, sizeof (struct nfslinkargs),
805 #ifdef _LITTLE_ENDIAN
806 xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
807 #else
808 xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
809 #endif
810 nullfree, RPC_MAPRESP,
811 rfs_link_getfh},
812
813 /* RFS_SYMLINK = 13 */
814 {rfs_symlink,
815 xdr_slargs, NULL_xdrproc_t, sizeof (struct nfsslargs),
816 #ifdef _LITTLE_ENDIAN
817 xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
818 #else
819 xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
820 #endif
821 nullfree, RPC_MAPRESP,
822 rfs_symlink_getfh},
823
824 /* RFS_MKDIR = 14 */
825 {rfs_mkdir,
826 xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
827 xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
828 nullfree, RPC_MAPRESP,
829 rfs_mkdir_getfh},
830
831 /* RFS_RMDIR = 15 */
832 {rfs_rmdir,
833 xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
834 #ifdef _LITTLE_ENDIAN
835 xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
836 #else
837 xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
838 #endif
839 nullfree, RPC_MAPRESP,
840 rfs_rmdir_getfh},
841
842 /* RFS_READDIR = 16 */
843 {rfs_readdir,
844 xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs),
845 xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres),
846 rfs_rddirfree, RPC_IDEMPOTENT,
847 rfs_readdir_getfh},
848
849 /* RFS_STATFS = 17 */
850 {rfs_statfs,
851 xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
852 xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs),
853 nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
854 rfs_statfs_getfh},
855 };
856
857 static char *rfscallnames_v3[] = {
858 "RFS3_NULL",
859 "RFS3_GETATTR",
860 "RFS3_SETATTR",
861 "RFS3_LOOKUP",
862 "RFS3_ACCESS",
863 "RFS3_READLINK",
864 "RFS3_READ",
865 "RFS3_WRITE",
866 "RFS3_CREATE",
867 "RFS3_MKDIR",
868 "RFS3_SYMLINK",
869 "RFS3_MKNOD",
870 "RFS3_REMOVE",
871 "RFS3_RMDIR",
872 "RFS3_RENAME",
873 "RFS3_LINK",
874 "RFS3_READDIR",
875 "RFS3_READDIRPLUS",
876 "RFS3_FSSTAT",
877 "RFS3_FSINFO",
878 "RFS3_PATHCONF",
879 "RFS3_COMMIT"
880 };
881
882 static struct rpcdisp rfsdisptab_v3[] = {
883 /*
884 * NFS VERSION 3
885 */
886
887 /* RFS_NULL = 0 */
888 {rpc_null_v3,
889 xdr_void, NULL_xdrproc_t, 0,
890 xdr_void, NULL_xdrproc_t, 0,
891 nullfree, RPC_IDEMPOTENT,
892 0},
893
894 /* RFS3_GETATTR = 1 */
895 {rfs3_getattr,
896 xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (GETATTR3args),
897 xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res),
898 nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON),
899 rfs3_getattr_getfh},
900
901 /* RFS3_SETATTR = 2 */
902 {rfs3_setattr,
903 xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args),
904 xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res),
905 nullfree, 0,
906 rfs3_setattr_getfh},
907
908 /* RFS3_LOOKUP = 3 */
909 {rfs3_lookup,
910 xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args),
911 xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res),
912 nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
913 rfs3_lookup_getfh},
914
915 /* RFS3_ACCESS = 4 */
916 {rfs3_access,
917 xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args),
918 xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res),
919 nullfree, RPC_IDEMPOTENT,
920 rfs3_access_getfh},
921
922 /* RFS3_READLINK = 5 */
923 {rfs3_readlink,
924 xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (READLINK3args),
925 xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res),
926 rfs3_readlink_free, RPC_IDEMPOTENT,
927 rfs3_readlink_getfh},
928
929 /* RFS3_READ = 6 */
930 {rfs3_read,
931 xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args),
932 xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res),
933 rfs3_read_free, RPC_IDEMPOTENT,
934 rfs3_read_getfh},
935
936 /* RFS3_WRITE = 7 */
937 {rfs3_write,
938 xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args),
939 xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res),
940 nullfree, 0,
941 rfs3_write_getfh},
942
943 /* RFS3_CREATE = 8 */
944 {rfs3_create,
945 xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args),
946 xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res),
947 nullfree, 0,
948 rfs3_create_getfh},
949
950 /* RFS3_MKDIR = 9 */
951 {rfs3_mkdir,
952 xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args),
953 xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res),
954 nullfree, 0,
955 rfs3_mkdir_getfh},
956
957 /* RFS3_SYMLINK = 10 */
958 {rfs3_symlink,
959 xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args),
960 xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res),
961 nullfree, 0,
962 rfs3_symlink_getfh},
963
964 /* RFS3_MKNOD = 11 */
965 {rfs3_mknod,
966 xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args),
967 xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res),
968 nullfree, 0,
969 rfs3_mknod_getfh},
970
971 /* RFS3_REMOVE = 12 */
972 {rfs3_remove,
973 xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args),
974 xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res),
975 nullfree, 0,
976 rfs3_remove_getfh},
977
978 /* RFS3_RMDIR = 13 */
979 {rfs3_rmdir,
980 xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args),
981 xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res),
982 nullfree, 0,
983 rfs3_rmdir_getfh},
984
985 /* RFS3_RENAME = 14 */
986 {rfs3_rename,
987 xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args),
988 xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res),
989 nullfree, 0,
990 rfs3_rename_getfh},
991
992 /* RFS3_LINK = 15 */
993 {rfs3_link,
994 xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args),
995 xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res),
996 nullfree, 0,
997 rfs3_link_getfh},
998
999 /* RFS3_READDIR = 16 */
1000 {rfs3_readdir,
1001 xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args),
1002 xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res),
1003 rfs3_readdir_free, RPC_IDEMPOTENT,
1004 rfs3_readdir_getfh},
1005
1006 /* RFS3_READDIRPLUS = 17 */
1007 {rfs3_readdirplus,
1008 xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args),
1009 xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res),
1010 rfs3_readdirplus_free, RPC_AVOIDWORK,
1011 rfs3_readdirplus_getfh},
1012
1013 /* RFS3_FSSTAT = 18 */
1014 {rfs3_fsstat,
1015 xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSSTAT3args),
1016 xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res),
1017 nullfree, RPC_IDEMPOTENT,
1018 rfs3_fsstat_getfh},
1019
1020 /* RFS3_FSINFO = 19 */
1021 {rfs3_fsinfo,
1022 xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSINFO3args),
1023 xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res),
1024 nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON,
1025 rfs3_fsinfo_getfh},
1026
1027 /* RFS3_PATHCONF = 20 */
1028 {rfs3_pathconf,
1029 xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (PATHCONF3args),
1030 xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res),
1031 nullfree, RPC_IDEMPOTENT,
1032 rfs3_pathconf_getfh},
1033
1034 /* RFS3_COMMIT = 21 */
1035 {rfs3_commit,
1036 xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args),
1037 xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res),
1038 nullfree, RPC_IDEMPOTENT,
1039 rfs3_commit_getfh},
1040 };
1041
1042 static char *rfscallnames_v4[] = {
1043 "RFS4_NULL",
1044 "RFS4_COMPOUND",
1045 "RFS4_NULL",
1046 "RFS4_NULL",
1047 "RFS4_NULL",
1048 "RFS4_NULL",
1049 "RFS4_NULL",
1050 "RFS4_NULL",
1051 "RFS4_CREATE"
1052 };
1053
1054 static struct rpcdisp rfsdisptab_v4[] = {
1055 /*
1056 * NFS VERSION 4
1057 */
1058
1059 /* RFS_NULL = 0 */
1060 [NFSPROC4_NULL] = {
1061 .dis_proc = NULL,
1062 .dis_xdrargs = xdr_void,
1063 .dis_fastxdrargs = NULL_xdrproc_t,
1064 .dis_argsz = 0,
1065 .dis_xdrres = xdr_void,
1066 .dis_fastxdrres = NULL_xdrproc_t,
1067 .dis_ressz = 0,
1068 .dis_resfree = nullfree,
1069 .dis_flags = RPC_IDEMPOTENT,
1070 .dis_getfh = NULL
1071 },
1072
1073 /* RFS4_compound = 1 */
1074 [NFSPROC4_COMPOUND] = {
1075 .dis_proc = NULL,
1076 .dis_xdrargs = xdr_COMPOUND4args_srv,
1077 .dis_fastxdrargs = NULL_xdrproc_t,
1078 .dis_argsz = sizeof (COMPOUND4args),
1079 .dis_xdrres = xdr_COMPOUND4res_srv,
1080 .dis_fastxdrres = NULL_xdrproc_t,
1081 .dis_ressz = sizeof (COMPOUND4res),
1082 .dis_resfree = rfs4_compound_free,
1083 .dis_flags = 0,
1084 .dis_getfh = NULL
1085 },
1086 };
1087
1088 union rfs_args {
1089 /*
1090 * NFS VERSION 2
1091 */
1092
1093 /* RFS_NULL = 0 */
1094
1095 /* RFS_GETATTR = 1 */
1096 fhandle_t nfs2_getattr_args;
1097
1098 /* RFS_SETATTR = 2 */
1099 struct nfssaargs nfs2_setattr_args;
1100
1101 /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1102
1103 /* RFS_LOOKUP = 4 */
1104 struct nfsdiropargs nfs2_lookup_args;
1105
1106 /* RFS_READLINK = 5 */
1107 fhandle_t nfs2_readlink_args;
1108
1109 /* RFS_READ = 6 */
1110 struct nfsreadargs nfs2_read_args;
1111
1112 /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1113
1114 /* RFS_WRITE = 8 */
1115 struct nfswriteargs nfs2_write_args;
1116
1117 /* RFS_CREATE = 9 */
1118 struct nfscreatargs nfs2_create_args;
1119
1120 /* RFS_REMOVE = 10 */
1121 struct nfsdiropargs nfs2_remove_args;
1122
1123 /* RFS_RENAME = 11 */
1124 struct nfsrnmargs nfs2_rename_args;
1125
1126 /* RFS_LINK = 12 */
1127 struct nfslinkargs nfs2_link_args;
1128
1129 /* RFS_SYMLINK = 13 */
1130 struct nfsslargs nfs2_symlink_args;
1131
1132 /* RFS_MKDIR = 14 */
1133 struct nfscreatargs nfs2_mkdir_args;
1134
1135 /* RFS_RMDIR = 15 */
1136 struct nfsdiropargs nfs2_rmdir_args;
1137
1138 /* RFS_READDIR = 16 */
1139 struct nfsrddirargs nfs2_readdir_args;
1140
1141 /* RFS_STATFS = 17 */
1142 fhandle_t nfs2_statfs_args;
1143
1144 /*
1145 * NFS VERSION 3
1146 */
1147
1148 /* RFS_NULL = 0 */
1149
1150 /* RFS3_GETATTR = 1 */
1151 GETATTR3args nfs3_getattr_args;
1152
1153 /* RFS3_SETATTR = 2 */
1154 SETATTR3args nfs3_setattr_args;
1155
1156 /* RFS3_LOOKUP = 3 */
1157 LOOKUP3args nfs3_lookup_args;
1158
1159 /* RFS3_ACCESS = 4 */
1160 ACCESS3args nfs3_access_args;
1161
1162 /* RFS3_READLINK = 5 */
1163 READLINK3args nfs3_readlink_args;
1164
1165 /* RFS3_READ = 6 */
1166 READ3args nfs3_read_args;
1167
1168 /* RFS3_WRITE = 7 */
1169 WRITE3args nfs3_write_args;
1170
1171 /* RFS3_CREATE = 8 */
1172 CREATE3args nfs3_create_args;
1173
1174 /* RFS3_MKDIR = 9 */
1175 MKDIR3args nfs3_mkdir_args;
1176
1177 /* RFS3_SYMLINK = 10 */
1178 SYMLINK3args nfs3_symlink_args;
1179
1180 /* RFS3_MKNOD = 11 */
1181 MKNOD3args nfs3_mknod_args;
1182
1183 /* RFS3_REMOVE = 12 */
1184 REMOVE3args nfs3_remove_args;
1185
1186 /* RFS3_RMDIR = 13 */
1187 RMDIR3args nfs3_rmdir_args;
1188
1189 /* RFS3_RENAME = 14 */
1190 RENAME3args nfs3_rename_args;
1191
1192 /* RFS3_LINK = 15 */
1193 LINK3args nfs3_link_args;
1194
1195 /* RFS3_READDIR = 16 */
1196 READDIR3args nfs3_readdir_args;
1197
1198 /* RFS3_READDIRPLUS = 17 */
1199 READDIRPLUS3args nfs3_readdirplus_args;
1200
1201 /* RFS3_FSSTAT = 18 */
1202 FSSTAT3args nfs3_fsstat_args;
1203
1204 /* RFS3_FSINFO = 19 */
1205 FSINFO3args nfs3_fsinfo_args;
1206
1207 /* RFS3_PATHCONF = 20 */
1208 PATHCONF3args nfs3_pathconf_args;
1209
1210 /* RFS3_COMMIT = 21 */
1211 COMMIT3args nfs3_commit_args;
1212
1213 /*
1214 * NFS VERSION 4
1215 */
1216
1217 /* RFS_NULL = 0 */
1218
1219 /* COMPUND = 1 */
1220 COMPOUND4args nfs4_compound_args;
1221 };
1222
1223 union rfs_res {
1224 /*
1225 * NFS VERSION 2
1226 */
1227
1228 /* RFS_NULL = 0 */
1229
1230 /* RFS_GETATTR = 1 */
1231 struct nfsattrstat nfs2_getattr_res;
1232
1233 /* RFS_SETATTR = 2 */
1234 struct nfsattrstat nfs2_setattr_res;
1235
1236 /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1237
1238 /* RFS_LOOKUP = 4 */
1239 struct nfsdiropres nfs2_lookup_res;
1240
1241 /* RFS_READLINK = 5 */
1242 struct nfsrdlnres nfs2_readlink_res;
1243
1244 /* RFS_READ = 6 */
1245 struct nfsrdresult nfs2_read_res;
1246
1247 /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1248
1249 /* RFS_WRITE = 8 */
1250 struct nfsattrstat nfs2_write_res;
1251
1252 /* RFS_CREATE = 9 */
1253 struct nfsdiropres nfs2_create_res;
1254
1255 /* RFS_REMOVE = 10 */
1256 enum nfsstat nfs2_remove_res;
1257
1258 /* RFS_RENAME = 11 */
1259 enum nfsstat nfs2_rename_res;
1260
1261 /* RFS_LINK = 12 */
1262 enum nfsstat nfs2_link_res;
1263
1264 /* RFS_SYMLINK = 13 */
1265 enum nfsstat nfs2_symlink_res;
1266
1267 /* RFS_MKDIR = 14 */
1268 struct nfsdiropres nfs2_mkdir_res;
1269
1270 /* RFS_RMDIR = 15 */
1271 enum nfsstat nfs2_rmdir_res;
1272
1273 /* RFS_READDIR = 16 */
1274 struct nfsrddirres nfs2_readdir_res;
1275
1276 /* RFS_STATFS = 17 */
1277 struct nfsstatfs nfs2_statfs_res;
1278
1279 /*
1280 * NFS VERSION 3
1281 */
1282
1283 /* RFS_NULL = 0 */
1284
1285 /* RFS3_GETATTR = 1 */
1286 GETATTR3res nfs3_getattr_res;
1287
1288 /* RFS3_SETATTR = 2 */
1289 SETATTR3res nfs3_setattr_res;
1290
1291 /* RFS3_LOOKUP = 3 */
1292 LOOKUP3res nfs3_lookup_res;
1293
1294 /* RFS3_ACCESS = 4 */
1295 ACCESS3res nfs3_access_res;
1296
1297 /* RFS3_READLINK = 5 */
1298 READLINK3res nfs3_readlink_res;
1299
1300 /* RFS3_READ = 6 */
1301 READ3res nfs3_read_res;
1302
1303 /* RFS3_WRITE = 7 */
1304 WRITE3res nfs3_write_res;
1305
1306 /* RFS3_CREATE = 8 */
1307 CREATE3res nfs3_create_res;
1308
1309 /* RFS3_MKDIR = 9 */
1310 MKDIR3res nfs3_mkdir_res;
1311
1312 /* RFS3_SYMLINK = 10 */
1313 SYMLINK3res nfs3_symlink_res;
1314
1315 /* RFS3_MKNOD = 11 */
1316 MKNOD3res nfs3_mknod_res;
1317
1318 /* RFS3_REMOVE = 12 */
1319 REMOVE3res nfs3_remove_res;
1320
1321 /* RFS3_RMDIR = 13 */
1322 RMDIR3res nfs3_rmdir_res;
1323
1324 /* RFS3_RENAME = 14 */
1325 RENAME3res nfs3_rename_res;
1326
1327 /* RFS3_LINK = 15 */
1328 LINK3res nfs3_link_res;
1329
1330 /* RFS3_READDIR = 16 */
1331 READDIR3res nfs3_readdir_res;
1332
1333 /* RFS3_READDIRPLUS = 17 */
1334 READDIRPLUS3res nfs3_readdirplus_res;
1335
1336 /* RFS3_FSSTAT = 18 */
1337 FSSTAT3res nfs3_fsstat_res;
1338
1339 /* RFS3_FSINFO = 19 */
1340 FSINFO3res nfs3_fsinfo_res;
1341
1342 /* RFS3_PATHCONF = 20 */
1343 PATHCONF3res nfs3_pathconf_res;
1344
1345 /* RFS3_COMMIT = 21 */
1346 COMMIT3res nfs3_commit_res;
1347
1348 /*
1349 * NFS VERSION 4
1350 */
1351
1352 /* RFS_NULL = 0 */
1353
1354 /* RFS4_COMPOUND = 1 */
1355 COMPOUND4res nfs4_compound_res;
1356
1357 };
1358
1359 static struct rpc_disptable rfs_disptable[] = {
1360 {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1361 rfscallnames_v2,
1362 rfsdisptab_v2},
1363 {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1364 rfscallnames_v3,
1365 rfsdisptab_v3},
1366 {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1367 rfscallnames_v4,
1368 rfsdisptab_v4},
1369 };
1370
1371 /*
1372 * If nfs_portmon is set, then clients are required to use privileged
1373 * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1374 *
1375 * N.B.: this attempt to carry forward the already ill-conceived notion
1376 * of privileged ports for TCP/UDP is really quite ineffectual. Not only
1377 * is it transport-dependent, it's laughably easy to spoof. If you're
1378 * really interested in security, you must start with secure RPC instead.
1379 */
1380 static int nfs_portmon = 0;
1381
1382 #ifdef DEBUG
1383 /*
1384 * Debug code to allow disabling of rfs_dispatch() use of
1385 * fastxdrargs() and fastxdrres() calls for testing purposes.
1386 */
1387 static int rfs_no_fast_xdrargs = 0;
1388 static int rfs_no_fast_xdrres = 0;
1389 #endif
1390
1391 union acl_args {
1392 /*
1393 * ACL VERSION 2
1394 */
1395
1396 /* ACL2_NULL = 0 */
1397
1398 /* ACL2_GETACL = 1 */
1399 GETACL2args acl2_getacl_args;
1400
1401 /* ACL2_SETACL = 2 */
1402 SETACL2args acl2_setacl_args;
1403
1404 /* ACL2_GETATTR = 3 */
1405 GETATTR2args acl2_getattr_args;
1406
1407 /* ACL2_ACCESS = 4 */
1408 ACCESS2args acl2_access_args;
1409
1410 /* ACL2_GETXATTRDIR = 5 */
1411 GETXATTRDIR2args acl2_getxattrdir_args;
1412
1413 /*
1414 * ACL VERSION 3
1415 */
1416
1417 /* ACL3_NULL = 0 */
1418
1419 /* ACL3_GETACL = 1 */
1420 GETACL3args acl3_getacl_args;
1421
1422 /* ACL3_SETACL = 2 */
1423 SETACL3args acl3_setacl;
1424
1425 /* ACL3_GETXATTRDIR = 3 */
1426 GETXATTRDIR3args acl3_getxattrdir_args;
1427
1428 };
1429
1430 union acl_res {
1431 /*
1432 * ACL VERSION 2
1433 */
1434
1435 /* ACL2_NULL = 0 */
1436
1437 /* ACL2_GETACL = 1 */
1438 GETACL2res acl2_getacl_res;
1439
1440 /* ACL2_SETACL = 2 */
1441 SETACL2res acl2_setacl_res;
1442
1443 /* ACL2_GETATTR = 3 */
1444 GETATTR2res acl2_getattr_res;
1445
1446 /* ACL2_ACCESS = 4 */
1447 ACCESS2res acl2_access_res;
1448
1449 /* ACL2_GETXATTRDIR = 5 */
1450 GETXATTRDIR2args acl2_getxattrdir_res;
1451
1452 /*
1453 * ACL VERSION 3
1454 */
1455
1456 /* ACL3_NULL = 0 */
1457
1458 /* ACL3_GETACL = 1 */
1459 GETACL3res acl3_getacl_res;
1460
1461 /* ACL3_SETACL = 2 */
1462 SETACL3res acl3_setacl_res;
1463
1464 /* ACL3_GETXATTRDIR = 3 */
1465 GETXATTRDIR3res acl3_getxattrdir_res;
1466
1467 };
1468
1469 static bool_t
auth_tooweak(struct svc_req * req,char * res)1470 auth_tooweak(struct svc_req *req, char *res)
1471 {
1472
1473 if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1474 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1475 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1476 return (TRUE);
1477 } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1478 LOOKUP3res *resp = (LOOKUP3res *)res;
1479 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1480 return (TRUE);
1481 }
1482 return (FALSE);
1483 }
1484
1485 static void
common_dispatch(struct svc_req * req,SVCXPRT * xprt,rpcvers_t min_vers,rpcvers_t max_vers,char * pgmname,struct rpc_disptable * disptable)1486 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1487 rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
1488 {
1489 int which;
1490 rpcvers_t vers;
1491 char *args;
1492 union {
1493 union rfs_args ra;
1494 union acl_args aa;
1495 } args_buf;
1496 char *res;
1497 union {
1498 union rfs_res rr;
1499 union acl_res ar;
1500 } res_buf;
1501 struct rpcdisp *disp = NULL;
1502 int dis_flags = 0;
1503 cred_t *cr;
1504 int error = 0;
1505 int anon_ok;
1506 struct exportinfo *exi = NULL;
1507 unsigned int nfslog_rec_id;
1508 int dupstat;
1509 struct dupreq *dr;
1510 int authres;
1511 bool_t publicfh_ok = FALSE;
1512 enum_t auth_flavor;
1513 bool_t dupcached = FALSE;
1514 struct netbuf nb;
1515 bool_t logging_enabled = FALSE;
1516 struct exportinfo *nfslog_exi = NULL;
1517 char **procnames;
1518 char cbuf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
1519 bool_t ro = FALSE;
1520 nfs_globals_t *ng = nfs_srv_getzg();
1521 nfs_export_t *ne = ng->nfs_export;
1522 kstat_named_t *svstat, *procstat;
1523
1524 ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
1525
1526 vers = req->rq_vers;
1527
1528 svstat = ng->svstat[req->rq_vers];
1529 procstat = (req->rq_prog == NFS_PROGRAM) ?
1530 ng->rfsproccnt[vers] : ng->aclproccnt[vers];
1531
1532 if (vers < min_vers || vers > max_vers) {
1533 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1534 error++;
1535 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1536 goto done;
1537 }
1538 vers -= min_vers;
1539
1540 which = req->rq_proc;
1541 if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1542 svcerr_noproc(req->rq_xprt);
1543 error++;
1544 goto done;
1545 }
1546
1547 procstat[which].value.ui64++;
1548
1549 disp = &disptable[(int)vers].dis_table[which];
1550 procnames = disptable[(int)vers].dis_procnames;
1551
1552 auth_flavor = req->rq_cred.oa_flavor;
1553
1554 /*
1555 * Deserialize into the args struct.
1556 */
1557 args = (char *)&args_buf;
1558
1559 #ifdef DEBUG
1560 if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1561 disp->dis_fastxdrargs == NULL_xdrproc_t ||
1562 !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1563 #else
1564 if ((auth_flavor == RPCSEC_GSS) ||
1565 disp->dis_fastxdrargs == NULL_xdrproc_t ||
1566 !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1567 #endif
1568 {
1569 bzero(args, disp->dis_argsz);
1570 if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) {
1571 error++;
1572 /*
1573 * Check if we are outside our capabilities.
1574 */
1575 if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1576 goto done;
1577
1578 svcerr_decode(xprt);
1579 cmn_err(CE_NOTE,
1580 "Failed to decode arguments for %s version %u "
1581 "procedure %s client %s%s",
1582 pgmname, vers + min_vers, procnames[which],
1583 client_name(req), client_addr(req, cbuf));
1584 goto done;
1585 }
1586 }
1587
1588 /*
1589 * If Version 4 use that specific dispatch function.
1590 */
1591 if (req->rq_vers == 4) {
1592 error += rfs4_dispatch(disp, req, xprt, args);
1593 goto done;
1594 }
1595
1596 dis_flags = disp->dis_flags;
1597
1598 /*
1599 * Find export information and check authentication,
1600 * setting the credential if everything is ok.
1601 */
1602 if (disp->dis_getfh != NULL) {
1603 void *fh;
1604 fsid_t *fsid;
1605 fid_t *fid, *xfid;
1606 fhandle_t *fh2;
1607 nfs_fh3 *fh3;
1608
1609 fh = (*disp->dis_getfh)(args);
1610 switch (req->rq_vers) {
1611 case NFS_VERSION:
1612 fh2 = (fhandle_t *)fh;
1613 fsid = &fh2->fh_fsid;
1614 fid = (fid_t *)&fh2->fh_len;
1615 xfid = (fid_t *)&fh2->fh_xlen;
1616 break;
1617 case NFS_V3:
1618 fh3 = (nfs_fh3 *)fh;
1619 fsid = &fh3->fh3_fsid;
1620 fid = FH3TOFIDP(fh3);
1621 xfid = FH3TOXFIDP(fh3);
1622 break;
1623 }
1624
1625 /*
1626 * Fix for bug 1038302 - corbin
1627 * There is a problem here if anonymous access is
1628 * disallowed. If the current request is part of the
1629 * client's mount process for the requested filesystem,
1630 * then it will carry root (uid 0) credentials on it, and
1631 * will be denied by checkauth if that client does not
1632 * have explicit root=0 permission. This will cause the
1633 * client's mount operation to fail. As a work-around,
1634 * we check here to see if the request is a getattr or
1635 * statfs operation on the exported vnode itself, and
1636 * pass a flag to checkauth with the result of this test.
1637 *
1638 * The filehandle refers to the mountpoint itself if
1639 * the fh_data and fh_xdata portions of the filehandle
1640 * are equal.
1641 *
1642 * Added anon_ok argument to checkauth().
1643 */
1644
1645 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1646 anon_ok = 1;
1647 else
1648 anon_ok = 0;
1649
1650 cr = svc_xprt_cred(xprt);
1651
1652 exi = checkexport(fsid, xfid);
1653
1654 if (exi != NULL) {
1655 publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
1656
1657 /*
1658 * Don't allow non-V4 clients access
1659 * to pseudo exports
1660 */
1661 if (PSEUDO(exi)) {
1662 svcerr_weakauth(xprt);
1663 error++;
1664 goto done;
1665 }
1666
1667 authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1668 &ro);
1669 /*
1670 * authres > 0: authentication OK - proceed
1671 * authres == 0: authentication weak - return error
1672 * authres < 0: authentication timeout - drop
1673 */
1674 if (authres <= 0) {
1675 if (authres == 0) {
1676 svcerr_weakauth(xprt);
1677 error++;
1678 }
1679 goto done;
1680 }
1681 }
1682 } else
1683 cr = NULL;
1684
1685 if ((dis_flags & RPC_MAPRESP) && (auth_flavor != RPCSEC_GSS)) {
1686 res = (char *)SVC_GETRES(xprt, disp->dis_ressz);
1687 if (res == NULL)
1688 res = (char *)&res_buf;
1689 } else
1690 res = (char *)&res_buf;
1691
1692 if (!(dis_flags & RPC_IDEMPOTENT)) {
1693 dupstat = SVC_DUP_EXT(xprt, req, res, disp->dis_ressz, &dr,
1694 &dupcached);
1695
1696 switch (dupstat) {
1697 case DUP_ERROR:
1698 svcerr_systemerr(xprt);
1699 error++;
1700 goto done;
1701 /* NOTREACHED */
1702 case DUP_INPROGRESS:
1703 if (res != (char *)&res_buf)
1704 SVC_FREERES(xprt);
1705 error++;
1706 goto done;
1707 /* NOTREACHED */
1708 case DUP_NEW:
1709 case DUP_DROP:
1710 curthread->t_flag |= T_DONTPEND;
1711
1712 (*disp->dis_proc)(args, res, exi, req, cr, ro);
1713
1714 curthread->t_flag &= ~T_DONTPEND;
1715 if (curthread->t_flag & T_WOULDBLOCK) {
1716 curthread->t_flag &= ~T_WOULDBLOCK;
1717 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1718 disp->dis_ressz, DUP_DROP);
1719 if (res != (char *)&res_buf)
1720 SVC_FREERES(xprt);
1721 error++;
1722 goto done;
1723 }
1724 if (dis_flags & RPC_AVOIDWORK) {
1725 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1726 disp->dis_ressz, DUP_DROP);
1727 } else {
1728 SVC_DUPDONE_EXT(xprt, dr, res,
1729 disp->dis_resfree == nullfree ? NULL :
1730 disp->dis_resfree,
1731 disp->dis_ressz, DUP_DONE);
1732 dupcached = TRUE;
1733 }
1734 break;
1735 case DUP_DONE:
1736 break;
1737 }
1738
1739 } else {
1740 curthread->t_flag |= T_DONTPEND;
1741
1742 (*disp->dis_proc)(args, res, exi, req, cr, ro);
1743
1744 curthread->t_flag &= ~T_DONTPEND;
1745 if (curthread->t_flag & T_WOULDBLOCK) {
1746 curthread->t_flag &= ~T_WOULDBLOCK;
1747 if (res != (char *)&res_buf)
1748 SVC_FREERES(xprt);
1749 error++;
1750 goto done;
1751 }
1752 }
1753
1754 if (auth_tooweak(req, res)) {
1755 svcerr_weakauth(xprt);
1756 error++;
1757 goto done;
1758 }
1759
1760 /*
1761 * Check to see if logging has been enabled on the server.
1762 * If so, then obtain the export info struct to be used for
1763 * the later writing of the log record. This is done for
1764 * the case that a lookup is done across a non-logged public
1765 * file system.
1766 */
1767 if (nfslog_buffer_list != NULL) {
1768 nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
1769 /*
1770 * Is logging enabled?
1771 */
1772 logging_enabled = (nfslog_exi != NULL);
1773
1774 /*
1775 * Copy the netbuf for logging purposes, before it is
1776 * freed by svc_sendreply().
1777 */
1778 if (logging_enabled) {
1779 NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1780 /*
1781 * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1782 * res gets copied directly into the mbuf and
1783 * may be freed soon after the sendreply. So we
1784 * must copy it here to a safe place...
1785 */
1786 if (res != (char *)&res_buf) {
1787 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1788 }
1789 }
1790 }
1791
1792 /*
1793 * Serialize and send results struct
1794 */
1795 #ifdef DEBUG
1796 if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1797 #else
1798 if (res != (char *)&res_buf)
1799 #endif
1800 {
1801 if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1802 cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1803 svcerr_systemerr(xprt);
1804 error++;
1805 }
1806 } else {
1807 if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1808 cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1809 svcerr_systemerr(xprt);
1810 error++;
1811 }
1812 }
1813
1814 /*
1815 * Log if needed
1816 */
1817 if (logging_enabled) {
1818 nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1819 cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1820 exi_rele(nfslog_exi);
1821 kmem_free((&nb)->buf, (&nb)->len);
1822 }
1823
1824 /*
1825 * Free results struct. With the addition of NFS V4 we can
1826 * have non-idempotent procedures with functions.
1827 */
1828 if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1829 (*disp->dis_resfree)(res);
1830 }
1831
1832 done:
1833 /*
1834 * Free arguments struct
1835 */
1836 if (disp) {
1837 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1838 cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1839 error++;
1840 }
1841 } else {
1842 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1843 cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1844 error++;
1845 }
1846 }
1847
1848 if (exi != NULL)
1849 exi_rele(exi);
1850
1851 svstat[NFS_BADCALLS].value.ui64 += error;
1852 svstat[NFS_CALLS].value.ui64++;
1853 }
1854
1855 static void
rfs_dispatch(struct svc_req * req,SVCXPRT * xprt)1856 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1857 {
1858 common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1859 "NFS", rfs_disptable);
1860 }
1861
1862 static char *aclcallnames_v2[] = {
1863 "ACL2_NULL",
1864 "ACL2_GETACL",
1865 "ACL2_SETACL",
1866 "ACL2_GETATTR",
1867 "ACL2_ACCESS",
1868 "ACL2_GETXATTRDIR"
1869 };
1870
1871 static struct rpcdisp acldisptab_v2[] = {
1872 /*
1873 * ACL VERSION 2
1874 */
1875
1876 /* ACL2_NULL = 0 */
1877 {rpc_null,
1878 xdr_void, NULL_xdrproc_t, 0,
1879 xdr_void, NULL_xdrproc_t, 0,
1880 nullfree, RPC_IDEMPOTENT,
1881 0},
1882
1883 /* ACL2_GETACL = 1 */
1884 {acl2_getacl,
1885 xdr_GETACL2args, xdr_fastGETACL2args, sizeof (GETACL2args),
1886 xdr_GETACL2res, NULL_xdrproc_t, sizeof (GETACL2res),
1887 acl2_getacl_free, RPC_IDEMPOTENT,
1888 acl2_getacl_getfh},
1889
1890 /* ACL2_SETACL = 2 */
1891 {acl2_setacl,
1892 xdr_SETACL2args, NULL_xdrproc_t, sizeof (SETACL2args),
1893 #ifdef _LITTLE_ENDIAN
1894 xdr_SETACL2res, xdr_fastSETACL2res, sizeof (SETACL2res),
1895 #else
1896 xdr_SETACL2res, NULL_xdrproc_t, sizeof (SETACL2res),
1897 #endif
1898 nullfree, RPC_MAPRESP,
1899 acl2_setacl_getfh},
1900
1901 /* ACL2_GETATTR = 3 */
1902 {acl2_getattr,
1903 xdr_GETATTR2args, xdr_fastGETATTR2args, sizeof (GETATTR2args),
1904 #ifdef _LITTLE_ENDIAN
1905 xdr_GETATTR2res, xdr_fastGETATTR2res, sizeof (GETATTR2res),
1906 #else
1907 xdr_GETATTR2res, NULL_xdrproc_t, sizeof (GETATTR2res),
1908 #endif
1909 nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
1910 acl2_getattr_getfh},
1911
1912 /* ACL2_ACCESS = 4 */
1913 {acl2_access,
1914 xdr_ACCESS2args, xdr_fastACCESS2args, sizeof (ACCESS2args),
1915 #ifdef _LITTLE_ENDIAN
1916 xdr_ACCESS2res, xdr_fastACCESS2res, sizeof (ACCESS2res),
1917 #else
1918 xdr_ACCESS2res, NULL_xdrproc_t, sizeof (ACCESS2res),
1919 #endif
1920 nullfree, RPC_IDEMPOTENT|RPC_MAPRESP,
1921 acl2_access_getfh},
1922
1923 /* ACL2_GETXATTRDIR = 5 */
1924 {acl2_getxattrdir,
1925 xdr_GETXATTRDIR2args, NULL_xdrproc_t, sizeof (GETXATTRDIR2args),
1926 xdr_GETXATTRDIR2res, NULL_xdrproc_t, sizeof (GETXATTRDIR2res),
1927 nullfree, RPC_IDEMPOTENT,
1928 acl2_getxattrdir_getfh},
1929 };
1930
1931 static char *aclcallnames_v3[] = {
1932 "ACL3_NULL",
1933 "ACL3_GETACL",
1934 "ACL3_SETACL",
1935 "ACL3_GETXATTRDIR"
1936 };
1937
1938 static struct rpcdisp acldisptab_v3[] = {
1939 /*
1940 * ACL VERSION 3
1941 */
1942
1943 /* ACL3_NULL = 0 */
1944 {rpc_null,
1945 xdr_void, NULL_xdrproc_t, 0,
1946 xdr_void, NULL_xdrproc_t, 0,
1947 nullfree, RPC_IDEMPOTENT,
1948 0},
1949
1950 /* ACL3_GETACL = 1 */
1951 {acl3_getacl,
1952 xdr_GETACL3args, NULL_xdrproc_t, sizeof (GETACL3args),
1953 xdr_GETACL3res, NULL_xdrproc_t, sizeof (GETACL3res),
1954 acl3_getacl_free, RPC_IDEMPOTENT,
1955 acl3_getacl_getfh},
1956
1957 /* ACL3_SETACL = 2 */
1958 {acl3_setacl,
1959 xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1960 xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1961 nullfree, 0,
1962 acl3_setacl_getfh},
1963
1964 /* ACL3_GETXATTRDIR = 3 */
1965 {acl3_getxattrdir,
1966 xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1967 xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1968 nullfree, RPC_IDEMPOTENT,
1969 acl3_getxattrdir_getfh},
1970 };
1971
1972 static struct rpc_disptable acl_disptable[] = {
1973 {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1974 aclcallnames_v2,
1975 acldisptab_v2},
1976 {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1977 aclcallnames_v3,
1978 acldisptab_v3},
1979 };
1980
1981 static void
acl_dispatch(struct svc_req * req,SVCXPRT * xprt)1982 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1983 {
1984 common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1985 "ACL", acl_disptable);
1986 }
1987
1988 int
checkwin(int flavor,int window,struct svc_req * req)1989 checkwin(int flavor, int window, struct svc_req *req)
1990 {
1991 struct authdes_cred *adc;
1992
1993 switch (flavor) {
1994 case AUTH_DES:
1995 adc = (struct authdes_cred *)req->rq_clntcred;
1996 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
1997 if (adc->adc_fullname.window > window)
1998 return (0);
1999 break;
2000
2001 default:
2002 break;
2003 }
2004 return (1);
2005 }
2006
2007
2008 /*
2009 * checkauth() will check the access permission against the export
2010 * information. Then map root uid/gid to appropriate uid/gid.
2011 *
2012 * This routine is used by NFS V3 and V2 code.
2013 */
2014 static int
checkauth(struct exportinfo * exi,struct svc_req * req,cred_t * cr,int anon_ok,bool_t publicfh_ok,bool_t * ro)2015 checkauth(struct exportinfo *exi, struct svc_req *req, cred_t *cr, int anon_ok,
2016 bool_t publicfh_ok, bool_t *ro)
2017 {
2018 int i, nfsflavor, rpcflavor, stat, access;
2019 struct secinfo *secp;
2020 caddr_t principal;
2021 char buf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
2022 int anon_res = 0;
2023
2024 uid_t uid;
2025 gid_t gid;
2026 uint_t ngids;
2027 gid_t *gids;
2028
2029 /*
2030 * Check for privileged port number
2031 * N.B.: this assumes that we know the format of a netbuf.
2032 */
2033 if (nfs_portmon) {
2034 struct sockaddr *ca;
2035 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2036
2037 if (ca == NULL)
2038 return (0);
2039
2040 if ((ca->sa_family == AF_INET &&
2041 ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2042 IPPORT_RESERVED) ||
2043 (ca->sa_family == AF_INET6 &&
2044 ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2045 IPPORT_RESERVED)) {
2046 cmn_err(CE_NOTE,
2047 "nfs_server: client %s%ssent NFS request from "
2048 "unprivileged port",
2049 client_name(req), client_addr(req, buf));
2050 return (0);
2051 }
2052 }
2053
2054 /*
2055 * return 1 on success or 0 on failure
2056 */
2057 stat = sec_svc_getcred(req, cr, &principal, &nfsflavor);
2058
2059 /*
2060 * A failed AUTH_UNIX sec_svc_getcred() implies we couldn't set
2061 * the credentials; below we map that to anonymous.
2062 */
2063 if (!stat && nfsflavor != AUTH_UNIX) {
2064 cmn_err(CE_NOTE,
2065 "nfs_server: couldn't get unix cred for %s",
2066 client_name(req));
2067 return (0);
2068 }
2069
2070 /*
2071 * Short circuit checkauth() on operations that support the
2072 * public filehandle, and if the request for that operation
2073 * is using the public filehandle. Note that we must call
2074 * sec_svc_getcred() first so that xp_cookie is set to the
2075 * right value. Normally xp_cookie is just the RPC flavor
2076 * of the the request, but in the case of RPCSEC_GSS it
2077 * could be a pseudo flavor.
2078 */
2079 if (publicfh_ok)
2080 return (1);
2081
2082 rpcflavor = req->rq_cred.oa_flavor;
2083 /*
2084 * Check if the auth flavor is valid for this export
2085 */
2086 access = nfsauth_access(exi, req, cr, &uid, &gid, &ngids, &gids);
2087 if (access & NFSAUTH_DROP)
2088 return (-1); /* drop the request */
2089
2090 if (access & NFSAUTH_RO)
2091 *ro = TRUE;
2092
2093 if (access & NFSAUTH_DENIED) {
2094 /*
2095 * If anon_ok == 1 and we got NFSAUTH_DENIED, it was
2096 * probably due to the flavor not matching during
2097 * the mount attempt. So map the flavor to AUTH_NONE
2098 * so that the credentials get mapped to the anonymous
2099 * user.
2100 */
2101 if (anon_ok == 1)
2102 rpcflavor = AUTH_NONE;
2103 else
2104 return (0); /* deny access */
2105
2106 } else if (access & NFSAUTH_MAPNONE) {
2107 /*
2108 * Access was granted even though the flavor mismatched
2109 * because AUTH_NONE was one of the exported flavors.
2110 */
2111 rpcflavor = AUTH_NONE;
2112
2113 } else if (access & NFSAUTH_WRONGSEC) {
2114 /*
2115 * NFSAUTH_WRONGSEC is used for NFSv4. If we get here,
2116 * it means a client ignored the list of allowed flavors
2117 * returned via the MOUNT protocol. So we just disallow it!
2118 */
2119 return (0);
2120 }
2121
2122 if (rpcflavor != AUTH_SYS)
2123 kmem_free(gids, ngids * sizeof (gid_t));
2124
2125 switch (rpcflavor) {
2126 case AUTH_NONE:
2127 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2128 exi->exi_export.ex_anon);
2129 (void) crsetgroups(cr, 0, NULL);
2130 break;
2131
2132 case AUTH_UNIX:
2133 if (!stat || (crgetuid(cr) == 0 &&
2134 !(access & NFSAUTH_UIDMAP))) {
2135 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2136 exi->exi_export.ex_anon);
2137 (void) crsetgroups(cr, 0, NULL);
2138 } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2139 /*
2140 * It is root, so apply rootid to get real UID
2141 * Find the secinfo structure. We should be able
2142 * to find it by the time we reach here.
2143 * nfsauth_access() has done the checking.
2144 */
2145 secp = NULL;
2146 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2147 struct secinfo *sptr;
2148 sptr = &exi->exi_export.ex_secinfo[i];
2149 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2150 secp = sptr;
2151 break;
2152 }
2153 }
2154 if (secp != NULL) {
2155 (void) crsetugid(cr, secp->s_rootid,
2156 secp->s_rootid);
2157 (void) crsetgroups(cr, 0, NULL);
2158 }
2159 } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2160 if (crsetugid(cr, uid, gid) != 0)
2161 anon_res = crsetugid(cr,
2162 exi->exi_export.ex_anon,
2163 exi->exi_export.ex_anon);
2164 (void) crsetgroups(cr, 0, NULL);
2165 } else if (access & NFSAUTH_GROUPS) {
2166 (void) crsetgroups(cr, ngids, gids);
2167 }
2168
2169 kmem_free(gids, ngids * sizeof (gid_t));
2170
2171 break;
2172
2173 case AUTH_DES:
2174 case RPCSEC_GSS:
2175 /*
2176 * Find the secinfo structure. We should be able
2177 * to find it by the time we reach here.
2178 * nfsauth_access() has done the checking.
2179 */
2180 secp = NULL;
2181 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2182 if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2183 nfsflavor) {
2184 secp = &exi->exi_export.ex_secinfo[i];
2185 break;
2186 }
2187 }
2188
2189 if (!secp) {
2190 cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2191 "no secinfo data for flavor %d",
2192 client_name(req), client_addr(req, buf),
2193 nfsflavor);
2194 return (0);
2195 }
2196
2197 if (!checkwin(rpcflavor, secp->s_window, req)) {
2198 cmn_err(CE_NOTE,
2199 "nfs_server: client %s%sused invalid "
2200 "auth window value",
2201 client_name(req), client_addr(req, buf));
2202 return (0);
2203 }
2204
2205 /*
2206 * Map root principals listed in the share's root= list to root,
2207 * and map any others principals that were mapped to root by RPC
2208 * to anon.
2209 */
2210 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2211 secp->s_rootcnt, secp->s_rootnames)) {
2212 if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2213 return (1);
2214
2215
2216 (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2217
2218 /*
2219 * NOTE: If and when kernel-land privilege tracing is
2220 * added this may have to be replaced with code that
2221 * retrieves root's supplementary groups (e.g., using
2222 * kgss_get_group_info(). In the meantime principals
2223 * mapped to uid 0 get all privileges, so setting cr's
2224 * supplementary groups for them does nothing.
2225 */
2226 (void) crsetgroups(cr, 0, NULL);
2227
2228 return (1);
2229 }
2230
2231 /*
2232 * Not a root princ, or not in root list, map UID 0/nobody to
2233 * the anon ID for the share. (RPC sets cr's UIDs and GIDs to
2234 * UID_NOBODY and GID_NOBODY, respectively.)
2235 */
2236 if (crgetuid(cr) != 0 &&
2237 (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2238 return (1);
2239
2240 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2241 exi->exi_export.ex_anon);
2242 (void) crsetgroups(cr, 0, NULL);
2243 break;
2244 default:
2245 return (0);
2246 } /* switch on rpcflavor */
2247
2248 /*
2249 * Even if anon access is disallowed via ex_anon == -1, we allow
2250 * this access if anon_ok is set. So set creds to the default
2251 * "nobody" id.
2252 */
2253 if (anon_res != 0) {
2254 if (anon_ok == 0) {
2255 cmn_err(CE_NOTE,
2256 "nfs_server: client %s%ssent wrong "
2257 "authentication for %s",
2258 client_name(req), client_addr(req, buf),
2259 exi->exi_export.ex_path ?
2260 exi->exi_export.ex_path : "?");
2261 return (0);
2262 }
2263
2264 if (crsetugid(cr, UID_NOBODY, GID_NOBODY) != 0)
2265 return (0);
2266 }
2267
2268 return (1);
2269 }
2270
2271 /*
2272 * returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
2273 * and 1 on success
2274 */
2275 int
checkauth4(struct compound_state * cs,struct svc_req * req)2276 checkauth4(struct compound_state *cs, struct svc_req *req)
2277 {
2278 int i, rpcflavor, access;
2279 struct secinfo *secp;
2280 char buf[MAXHOST + 1];
2281 int anon_res = 0, nfsflavor;
2282 struct exportinfo *exi;
2283 cred_t *cr;
2284 caddr_t principal;
2285
2286 uid_t uid;
2287 gid_t gid;
2288 uint_t ngids;
2289 gid_t *gids;
2290
2291 exi = cs->exi;
2292 cr = cs->cr;
2293 principal = cs->principal;
2294 nfsflavor = cs->nfsflavor;
2295
2296 ASSERT(cr != NULL);
2297
2298 rpcflavor = req->rq_cred.oa_flavor;
2299 cs->access &= ~CS_ACCESS_LIMITED;
2300
2301 /*
2302 * Check for privileged port number
2303 * N.B.: this assumes that we know the format of a netbuf.
2304 */
2305 if (nfs_portmon) {
2306 struct sockaddr *ca;
2307 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2308
2309 if (ca == NULL)
2310 return (0);
2311
2312 if ((ca->sa_family == AF_INET &&
2313 ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2314 IPPORT_RESERVED) ||
2315 (ca->sa_family == AF_INET6 &&
2316 ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2317 IPPORT_RESERVED)) {
2318 cmn_err(CE_NOTE,
2319 "nfs_server: client %s%ssent NFSv4 request from "
2320 "unprivileged port",
2321 client_name(req), client_addr(req, buf));
2322 return (0);
2323 }
2324 }
2325
2326 /*
2327 * Check the access right per auth flavor on the vnode of
2328 * this export for the given request.
2329 */
2330 access = nfsauth4_access(cs->exi, cs->vp, req, cr, &uid, &gid, &ngids,
2331 &gids);
2332
2333 if (access & NFSAUTH_WRONGSEC)
2334 return (-2); /* no access for this security flavor */
2335
2336 if (access & NFSAUTH_DROP)
2337 return (-1); /* drop the request */
2338
2339 if (access & NFSAUTH_DENIED) {
2340
2341 if (exi->exi_export.ex_seccnt > 0)
2342 return (0); /* deny access */
2343
2344 } else if (access & NFSAUTH_LIMITED) {
2345
2346 cs->access |= CS_ACCESS_LIMITED;
2347
2348 } else if (access & NFSAUTH_MAPNONE) {
2349 /*
2350 * Access was granted even though the flavor mismatched
2351 * because AUTH_NONE was one of the exported flavors.
2352 */
2353 rpcflavor = AUTH_NONE;
2354 }
2355
2356 /*
2357 * XXX probably need to redo some of it for nfsv4?
2358 * return 1 on success or 0 on failure
2359 */
2360
2361 if (rpcflavor != AUTH_SYS)
2362 kmem_free(gids, ngids * sizeof (gid_t));
2363
2364 switch (rpcflavor) {
2365 case AUTH_NONE:
2366 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2367 exi->exi_export.ex_anon);
2368 (void) crsetgroups(cr, 0, NULL);
2369 break;
2370
2371 case AUTH_UNIX:
2372 if (crgetuid(cr) == 0 && !(access & NFSAUTH_UIDMAP)) {
2373 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2374 exi->exi_export.ex_anon);
2375 (void) crsetgroups(cr, 0, NULL);
2376 } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2377 /*
2378 * It is root, so apply rootid to get real UID
2379 * Find the secinfo structure. We should be able
2380 * to find it by the time we reach here.
2381 * nfsauth_access() has done the checking.
2382 */
2383 secp = NULL;
2384 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2385 struct secinfo *sptr;
2386 sptr = &exi->exi_export.ex_secinfo[i];
2387 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2388 secp = &exi->exi_export.ex_secinfo[i];
2389 break;
2390 }
2391 }
2392 if (secp != NULL) {
2393 (void) crsetugid(cr, secp->s_rootid,
2394 secp->s_rootid);
2395 (void) crsetgroups(cr, 0, NULL);
2396 }
2397 } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2398 if (crsetugid(cr, uid, gid) != 0)
2399 anon_res = crsetugid(cr,
2400 exi->exi_export.ex_anon,
2401 exi->exi_export.ex_anon);
2402 (void) crsetgroups(cr, 0, NULL);
2403 } if (access & NFSAUTH_GROUPS) {
2404 (void) crsetgroups(cr, ngids, gids);
2405 }
2406
2407 kmem_free(gids, ngids * sizeof (gid_t));
2408
2409 break;
2410
2411 default:
2412 /*
2413 * Find the secinfo structure. We should be able
2414 * to find it by the time we reach here.
2415 * nfsauth_access() has done the checking.
2416 */
2417 secp = NULL;
2418 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2419 if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2420 nfsflavor) {
2421 secp = &exi->exi_export.ex_secinfo[i];
2422 break;
2423 }
2424 }
2425
2426 if (!secp) {
2427 cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2428 "no secinfo data for flavor %d",
2429 client_name(req), client_addr(req, buf),
2430 nfsflavor);
2431 return (0);
2432 }
2433
2434 if (!checkwin(rpcflavor, secp->s_window, req)) {
2435 cmn_err(CE_NOTE,
2436 "nfs_server: client %s%sused invalid "
2437 "auth window value",
2438 client_name(req), client_addr(req, buf));
2439 return (0);
2440 }
2441
2442 /*
2443 * Map root principals listed in the share's root= list to root,
2444 * and map any others principals that were mapped to root by RPC
2445 * to anon. If not going to anon, set to rootid (root_mapping).
2446 */
2447 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2448 secp->s_rootcnt, secp->s_rootnames)) {
2449 if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2450 return (1);
2451
2452 (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2453
2454 /*
2455 * NOTE: If and when kernel-land privilege tracing is
2456 * added this may have to be replaced with code that
2457 * retrieves root's supplementary groups (e.g., using
2458 * kgss_get_group_info(). In the meantime principals
2459 * mapped to uid 0 get all privileges, so setting cr's
2460 * supplementary groups for them does nothing.
2461 */
2462 (void) crsetgroups(cr, 0, NULL);
2463
2464 return (1);
2465 }
2466
2467 /*
2468 * Not a root princ, or not in root list, map UID 0/nobody to
2469 * the anon ID for the share. (RPC sets cr's UIDs and GIDs to
2470 * UID_NOBODY and GID_NOBODY, respectively.)
2471 */
2472 if (crgetuid(cr) != 0 &&
2473 (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2474 return (1);
2475
2476 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2477 exi->exi_export.ex_anon);
2478 (void) crsetgroups(cr, 0, NULL);
2479 break;
2480 } /* switch on rpcflavor */
2481
2482 /*
2483 * Even if anon access is disallowed via ex_anon == -1, we allow
2484 * this access if anon_ok is set. So set creds to the default
2485 * "nobody" id.
2486 */
2487
2488 if (anon_res != 0) {
2489 cmn_err(CE_NOTE,
2490 "nfs_server: client %s%ssent wrong "
2491 "authentication for %s",
2492 client_name(req), client_addr(req, buf),
2493 exi->exi_export.ex_path ?
2494 exi->exi_export.ex_path : "?");
2495 return (0);
2496 }
2497
2498 return (1);
2499 }
2500
2501
2502 static char *
client_name(struct svc_req * req)2503 client_name(struct svc_req *req)
2504 {
2505 char *hostname = NULL;
2506
2507 /*
2508 * If it's a Unix cred then use the
2509 * hostname from the credential.
2510 */
2511 if (req->rq_cred.oa_flavor == AUTH_UNIX) {
2512 hostname = ((struct authunix_parms *)
2513 req->rq_clntcred)->aup_machname;
2514 }
2515 if (hostname == NULL)
2516 hostname = "";
2517
2518 return (hostname);
2519 }
2520
2521 static char *
client_addr(struct svc_req * req,char * buf)2522 client_addr(struct svc_req *req, char *buf)
2523 {
2524 struct sockaddr *ca;
2525 uchar_t *b;
2526 char *frontspace = "";
2527
2528 /*
2529 * We assume we are called in tandem with client_name and the
2530 * format string looks like "...client %s%sblah blah..."
2531 *
2532 * If it's a Unix cred then client_name returned
2533 * a host name, so we need insert a space between host name
2534 * and IP address.
2535 */
2536 if (req->rq_cred.oa_flavor == AUTH_UNIX)
2537 frontspace = " ";
2538
2539 /*
2540 * Convert the caller's IP address to a dotted string
2541 */
2542 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2543
2544 if (ca->sa_family == AF_INET) {
2545 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
2546 (void) sprintf(buf, "%s(%d.%d.%d.%d) ", frontspace,
2547 b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
2548 } else if (ca->sa_family == AF_INET6) {
2549 struct sockaddr_in6 *sin6;
2550 sin6 = (struct sockaddr_in6 *)ca;
2551 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
2552 buf, INET6_ADDRSTRLEN);
2553
2554 } else {
2555
2556 /*
2557 * No IP address to print. If there was a host name
2558 * printed, then we print a space.
2559 */
2560 (void) sprintf(buf, frontspace);
2561 }
2562
2563 return (buf);
2564 }
2565
2566 /*
2567 * NFS Server initialization routine. This routine should only be called
2568 * once. It performs the following tasks:
2569 * - Call sub-initialization routines (localize access to variables)
2570 * - Initialize all locks
2571 * - initialize the version 3 write verifier
2572 */
2573 void
nfs_srvinit(void)2574 nfs_srvinit(void)
2575 {
2576
2577 /* Truly global stuff in this module (not per zone) */
2578 rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
2579 list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
2580 offsetof(nfs_globals_t, nfs_g_link));
2581 tsd_create(&nfs_server_tsd_key, NULL);
2582
2583 /* The order here is important */
2584 nfs_exportinit();
2585 rfs_srvrinit();
2586 rfs3_srvrinit();
2587 rfs4_srvrinit();
2588 nfsauth_init();
2589
2590 /*
2591 * NFS server zone-specific global variables
2592 * Note the zone_init is called for the GZ here.
2593 */
2594 zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
2595 nfs_server_zone_shutdown, nfs_server_zone_fini);
2596 }
2597
2598 /*
2599 * NFS Server finalization routine. This routine is called to cleanup the
2600 * initialization work previously performed if the NFS server module could
2601 * not be loaded correctly.
2602 */
2603 void
nfs_srvfini(void)2604 nfs_srvfini(void)
2605 {
2606
2607 /*
2608 * NFS server zone-specific global variables
2609 * Note the zone_fini is called for the GZ here.
2610 */
2611 (void) zone_key_delete(nfssrv_zone_key);
2612
2613 /* The order here is important (reverse of init) */
2614 nfsauth_fini();
2615 rfs4_srvrfini();
2616 rfs3_srvrfini();
2617 rfs_srvrfini();
2618 nfs_exportfini();
2619
2620 /* Truly global stuff in this module (not per zone) */
2621 tsd_destroy(&nfs_server_tsd_key);
2622 list_destroy(&nfssrv_globals_list);
2623 rw_destroy(&nfssrv_globals_rwl);
2624 }
2625
2626 /*
2627 * Zone init, shutdown, fini functions for the NFS server
2628 *
2629 * This design is careful to create the entire hierarhcy of
2630 * NFS server "globals" (including those created by various
2631 * per-module *_zone_init functions, etc.) so that all these
2632 * objects have exactly the same lifetime.
2633 *
2634 * These objects are also kept on a list for two reasons:
2635 * 1: It makes finding these in mdb _much_ easier.
2636 * 2: It allows operating across all zone globals for
2637 * functions like nfs_auth.c:exi_cache_reclaim
2638 */
2639 static void *
nfs_server_zone_init(zoneid_t zoneid)2640 nfs_server_zone_init(zoneid_t zoneid)
2641 {
2642 nfs_globals_t *ng;
2643
2644 ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
2645
2646 ng->nfs_versmin = NFS_SRV_VERSMIN_DEFAULT;
2647 ng->nfs_versmax = NFS_SRV_VERSMAX_DEFAULT;
2648
2649 /* Init the stuff to control start/stop */
2650 ng->nfs_server_upordown = NFS_SERVER_STOPPED;
2651 mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2652 cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2653 mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2654 cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2655
2656 ng->nfs_zoneid = zoneid;
2657
2658 /*
2659 * Order here is important.
2660 * export init must precede srv init calls.
2661 */
2662 nfs_export_zone_init(ng);
2663 rfs_stat_zone_init(ng);
2664 rfs_srv_zone_init(ng);
2665 rfs3_srv_zone_init(ng);
2666 rfs4_srv_zone_init(ng);
2667 nfsauth_zone_init(ng);
2668
2669 rw_enter(&nfssrv_globals_rwl, RW_WRITER);
2670 list_insert_tail(&nfssrv_globals_list, ng);
2671 rw_exit(&nfssrv_globals_rwl);
2672
2673 return (ng);
2674 }
2675
2676 /* ARGSUSED */
2677 static void
nfs_server_zone_shutdown(zoneid_t zoneid,void * data)2678 nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
2679 {
2680 nfs_globals_t *ng;
2681
2682 ng = (nfs_globals_t *)data;
2683
2684 /*
2685 * Order is like _fini, but only
2686 * some modules need this hook.
2687 */
2688 nfsauth_zone_shutdown(ng);
2689 nfs_export_zone_shutdown(ng);
2690 }
2691
2692 /* ARGSUSED */
2693 static void
nfs_server_zone_fini(zoneid_t zoneid,void * data)2694 nfs_server_zone_fini(zoneid_t zoneid, void *data)
2695 {
2696 nfs_globals_t *ng;
2697
2698 ng = (nfs_globals_t *)data;
2699
2700 rw_enter(&nfssrv_globals_rwl, RW_WRITER);
2701 list_remove(&nfssrv_globals_list, ng);
2702 rw_exit(&nfssrv_globals_rwl);
2703
2704 /*
2705 * Order here is important.
2706 * reverse order from init
2707 */
2708 nfsauth_zone_fini(ng);
2709 rfs4_srv_zone_fini(ng);
2710 rfs3_srv_zone_fini(ng);
2711 rfs_srv_zone_fini(ng);
2712 rfs_stat_zone_fini(ng);
2713 nfs_export_zone_fini(ng);
2714
2715 mutex_destroy(&ng->nfs_server_upordown_lock);
2716 cv_destroy(&ng->nfs_server_upordown_cv);
2717 mutex_destroy(&ng->rdma_wait_mutex);
2718 cv_destroy(&ng->rdma_wait_cv);
2719
2720 kmem_free(ng, sizeof (*ng));
2721 }
2722
2723 /*
2724 * Set up an iovec array of up to cnt pointers.
2725 */
2726 void
mblk_to_iov(mblk_t * m,int cnt,struct iovec * iovp)2727 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2728 {
2729 while (m != NULL && cnt-- > 0) {
2730 iovp->iov_base = (caddr_t)m->b_rptr;
2731 iovp->iov_len = (m->b_wptr - m->b_rptr);
2732 iovp++;
2733 m = m->b_cont;
2734 }
2735 }
2736
2737 /*
2738 * Common code between NFS Version 2 and NFS Version 3 for the public
2739 * filehandle multicomponent lookups.
2740 */
2741
2742 /*
2743 * Public filehandle evaluation of a multi-component lookup, following
2744 * symbolic links, if necessary. This may result in a vnode in another
2745 * filesystem, which is OK as long as the other filesystem is exported.
2746 *
2747 * Note that the exi will be set either to NULL or a new reference to the
2748 * exportinfo struct that corresponds to the vnode of the multi-component path.
2749 * It is the callers responsibility to release this reference.
2750 */
2751 int
rfs_publicfh_mclookup(char * p,vnode_t * dvp,cred_t * cr,vnode_t ** vpp,struct exportinfo ** exi,struct sec_ol * sec)2752 rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
2753 struct exportinfo **exi, struct sec_ol *sec)
2754 {
2755 int pathflag;
2756 vnode_t *mc_dvp = NULL;
2757 vnode_t *realvp;
2758 int error;
2759
2760 *exi = NULL;
2761
2762 /*
2763 * check if the given path is a url or native path. Since p is
2764 * modified by MCLpath(), it may be empty after returning from
2765 * there, and should be checked.
2766 */
2767 if ((pathflag = MCLpath(&p)) == -1)
2768 return (EIO);
2769
2770 /*
2771 * If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
2772 * on in sec->sec_flags. This bit will later serve as an
2773 * indication in makefh_ol() or makefh3_ol() to overload the
2774 * filehandle to contain the sec modes used by the server for
2775 * the path.
2776 */
2777 if (pathflag == SECURITY_QUERY) {
2778 if ((sec->sec_index = (uint_t)(*p)) > 0) {
2779 sec->sec_flags |= SEC_QUERY;
2780 p++;
2781 if ((pathflag = MCLpath(&p)) == -1)
2782 return (EIO);
2783 } else {
2784 cmn_err(CE_NOTE,
2785 "nfs_server: invalid security index %d, "
2786 "violating WebNFS SNEGO protocol.", sec->sec_index);
2787 return (EIO);
2788 }
2789 }
2790
2791 if (p[0] == '\0') {
2792 error = ENOENT;
2793 goto publicfh_done;
2794 }
2795
2796 error = rfs_pathname(p, &mc_dvp, vpp, dvp, cr, pathflag);
2797
2798 /*
2799 * If name resolves to "/" we get EINVAL since we asked for
2800 * the vnode of the directory that the file is in. Try again
2801 * with NULL directory vnode.
2802 */
2803 if (error == EINVAL) {
2804 error = rfs_pathname(p, NULL, vpp, dvp, cr, pathflag);
2805 if (!error) {
2806 ASSERT(*vpp != NULL);
2807 if ((*vpp)->v_type == VDIR) {
2808 VN_HOLD(*vpp);
2809 mc_dvp = *vpp;
2810 } else {
2811 /*
2812 * This should not happen, the filesystem is
2813 * in an inconsistent state. Fail the lookup
2814 * at this point.
2815 */
2816 VN_RELE(*vpp);
2817 error = EINVAL;
2818 }
2819 }
2820 }
2821
2822 if (error)
2823 goto publicfh_done;
2824
2825 if (*vpp == NULL) {
2826 error = ENOENT;
2827 goto publicfh_done;
2828 }
2829
2830 ASSERT(mc_dvp != NULL);
2831 ASSERT(*vpp != NULL);
2832
2833 if ((*vpp)->v_type == VDIR) {
2834 do {
2835 /*
2836 * *vpp may be an AutoFS node, so we perform
2837 * a VOP_ACCESS() to trigger the mount of the intended
2838 * filesystem, so we can perform the lookup in the
2839 * intended filesystem.
2840 */
2841 (void) VOP_ACCESS(*vpp, 0, 0, cr, NULL);
2842
2843 /*
2844 * If vnode is covered, get the
2845 * the topmost vnode.
2846 */
2847 if (vn_mountedvfs(*vpp) != NULL) {
2848 error = traverse(vpp);
2849 if (error) {
2850 VN_RELE(*vpp);
2851 goto publicfh_done;
2852 }
2853 }
2854
2855 if (VOP_REALVP(*vpp, &realvp, NULL) == 0 &&
2856 realvp != *vpp) {
2857 /*
2858 * If realvp is different from *vpp
2859 * then release our reference on *vpp, so that
2860 * the export access check be performed on the
2861 * real filesystem instead.
2862 */
2863 VN_HOLD(realvp);
2864 VN_RELE(*vpp);
2865 *vpp = realvp;
2866 } else {
2867 break;
2868 }
2869 /* LINTED */
2870 } while (TRUE);
2871
2872 /*
2873 * Let nfs_vptexi() figure what the real parent is.
2874 */
2875 VN_RELE(mc_dvp);
2876 mc_dvp = NULL;
2877
2878 } else {
2879 /*
2880 * If vnode is covered, get the
2881 * the topmost vnode.
2882 */
2883 if (vn_mountedvfs(mc_dvp) != NULL) {
2884 error = traverse(&mc_dvp);
2885 if (error) {
2886 VN_RELE(*vpp);
2887 goto publicfh_done;
2888 }
2889 }
2890
2891 if (VOP_REALVP(mc_dvp, &realvp, NULL) == 0 &&
2892 realvp != mc_dvp) {
2893 /*
2894 * *vpp is a file, obtain realvp of the parent
2895 * directory vnode.
2896 */
2897 VN_HOLD(realvp);
2898 VN_RELE(mc_dvp);
2899 mc_dvp = realvp;
2900 }
2901 }
2902
2903 /*
2904 * The pathname may take us from the public filesystem to another.
2905 * If that's the case then just set the exportinfo to the new export
2906 * and build filehandle for it. Thanks to per-access checking there's
2907 * no security issues with doing this. If the client is not allowed
2908 * access to this new export then it will get an access error when it
2909 * tries to use the filehandle
2910 */
2911 error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi);
2912 if (error != 0) {
2913 VN_RELE(*vpp);
2914 goto publicfh_done;
2915 }
2916
2917 /*
2918 * Not allowed access to pseudo exports.
2919 */
2920 if (PSEUDO(*exi)) {
2921 error = ENOENT;
2922 VN_RELE(*vpp);
2923 goto publicfh_done;
2924 }
2925
2926 /*
2927 * Do a lookup for the index file. We know the index option doesn't
2928 * allow paths through handling in the share command, so mc_dvp will
2929 * be the parent for the index file vnode, if its present. Use
2930 * temporary pointers to preserve and reuse the vnode pointers of the
2931 * original directory in case there's no index file. Note that the
2932 * index file is a native path, and should not be interpreted by
2933 * the URL parser in rfs_pathname()
2934 */
2935 if (((*exi)->exi_export.ex_flags & EX_INDEX) &&
2936 ((*vpp)->v_type == VDIR) && (pathflag == URLPATH)) {
2937 vnode_t *tvp, *tmc_dvp; /* temporary vnode pointers */
2938
2939 tmc_dvp = mc_dvp;
2940 mc_dvp = tvp = *vpp;
2941
2942 error = rfs_pathname((*exi)->exi_export.ex_index, NULL, vpp,
2943 mc_dvp, cr, NATIVEPATH);
2944
2945 if (error == ENOENT) {
2946 *vpp = tvp;
2947 mc_dvp = tmc_dvp;
2948 error = 0;
2949 } else { /* ok or error other than ENOENT */
2950 if (tmc_dvp)
2951 VN_RELE(tmc_dvp);
2952 if (error)
2953 goto publicfh_done;
2954
2955 /*
2956 * Found a valid vp for index "filename". Sanity check
2957 * for odd case where a directory is provided as index
2958 * option argument and leads us to another filesystem
2959 */
2960
2961 /* Release the reference on the old exi value */
2962 ASSERT(*exi != NULL);
2963 exi_rele(*exi);
2964 *exi = NULL;
2965
2966 error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi);
2967 if (error != 0) {
2968 VN_RELE(*vpp);
2969 goto publicfh_done;
2970 }
2971 /* Have a new *exi */
2972 }
2973 }
2974
2975 publicfh_done:
2976 if (mc_dvp)
2977 VN_RELE(mc_dvp);
2978
2979 return (error);
2980 }
2981
2982 /*
2983 * Evaluate a multi-component path
2984 */
2985 int
rfs_pathname(char * path,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startdvp,cred_t * cr,int pathflag)2986 rfs_pathname(
2987 char *path, /* pathname to evaluate */
2988 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */
2989 vnode_t **compvpp, /* ret for ptr to component vnode */
2990 vnode_t *startdvp, /* starting vnode */
2991 cred_t *cr, /* user's credential */
2992 int pathflag) /* flag to identify path, e.g. URL */
2993 {
2994 char namebuf[TYPICALMAXPATHLEN];
2995 struct pathname pn;
2996 int error;
2997
2998 ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
2999
3000 /*
3001 * If pathname starts with '/', then set startdvp to root.
3002 */
3003 if (*path == '/') {
3004 while (*path == '/')
3005 path++;
3006
3007 startdvp = ZONE_ROOTVP();
3008 }
3009
3010 error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
3011 if (error == 0) {
3012 /*
3013 * Call the URL parser for URL paths to modify the original
3014 * string to handle any '%' encoded characters that exist.
3015 * Done here to avoid an extra bcopy in the lookup.
3016 * We need to be careful about pathlen's. We know that
3017 * rfs_pathname() is called with a non-empty path. However,
3018 * it could be emptied due to the path simply being all /'s,
3019 * which is valid to proceed with the lookup, or due to the
3020 * URL parser finding an encoded null character at the
3021 * beginning of path which should not proceed with the lookup.
3022 */
3023 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
3024 URLparse(pn.pn_path);
3025 if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
3026 return (ENOENT);
3027 }
3028 VN_HOLD(startdvp);
3029 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3030 ZONE_ROOTVP(), startdvp, cr);
3031 }
3032 if (error == ENAMETOOLONG) {
3033 /*
3034 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
3035 */
3036 error = pn_get(path, UIO_SYSSPACE, &pn);
3037 if (error != 0)
3038 return (error);
3039 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
3040 URLparse(pn.pn_path);
3041 if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
3042 pn_free(&pn);
3043 return (ENOENT);
3044 }
3045 }
3046 VN_HOLD(startdvp);
3047 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3048 ZONE_ROOTVP(), startdvp, cr);
3049 pn_free(&pn);
3050 }
3051
3052 return (error);
3053 }
3054
3055 /*
3056 * Adapt the multicomponent lookup path depending on the pathtype
3057 */
3058 static int
MCLpath(char ** path)3059 MCLpath(char **path)
3060 {
3061 unsigned char c = (unsigned char)**path;
3062
3063 /*
3064 * If the MCL path is between 0x20 and 0x7E (graphic printable
3065 * character of the US-ASCII coded character set), its a URL path,
3066 * per RFC 1738.
3067 */
3068 if (c >= 0x20 && c <= 0x7E)
3069 return (URLPATH);
3070
3071 /*
3072 * If the first octet of the MCL path is not an ASCII character
3073 * then it must be interpreted as a tag value that describes the
3074 * format of the remaining octets of the MCL path.
3075 *
3076 * If the first octet of the MCL path is 0x81 it is a query
3077 * for the security info.
3078 */
3079 switch (c) {
3080 case 0x80: /* native path, i.e. MCL via mount protocol */
3081 (*path)++;
3082 return (NATIVEPATH);
3083 case 0x81: /* security query */
3084 (*path)++;
3085 return (SECURITY_QUERY);
3086 default:
3087 return (-1);
3088 }
3089 }
3090
3091 #define fromhex(c) ((c >= '0' && c <= '9') ? (c - '0') : \
3092 ((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
3093 ((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
3094
3095 /*
3096 * The implementation of URLparse guarantees that the final string will
3097 * fit in the original one. Replaces '%' occurrences followed by 2 characters
3098 * with its corresponding hexadecimal character.
3099 */
3100 static void
URLparse(char * str)3101 URLparse(char *str)
3102 {
3103 char *p, *q;
3104
3105 p = q = str;
3106 while (*p) {
3107 *q = *p;
3108 if (*p++ == '%') {
3109 if (*p) {
3110 *q = fromhex(*p) * 16;
3111 p++;
3112 if (*p) {
3113 *q += fromhex(*p);
3114 p++;
3115 }
3116 }
3117 }
3118 q++;
3119 }
3120 *q = '\0';
3121 }
3122
3123
3124 /*
3125 * Get the export information for the lookup vnode, and verify its
3126 * useable.
3127 */
3128 int
nfs_check_vpexi(vnode_t * mc_dvp,vnode_t * vp,cred_t * cr,struct exportinfo ** exi)3129 nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
3130 struct exportinfo **exi)
3131 {
3132 int walk;
3133 int error = 0;
3134
3135 *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3136 if (*exi == NULL)
3137 error = EACCES;
3138 else {
3139 /*
3140 * If nosub is set for this export then
3141 * a lookup relative to the public fh
3142 * must not terminate below the
3143 * exported directory.
3144 */
3145 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3146 error = EACCES;
3147 }
3148
3149 return (error);
3150 }
3151
3152 /*
3153 * Used by NFSv3 and NFSv4 server to query label of
3154 * a pathname component during lookup/access ops.
3155 */
3156 ts_label_t *
nfs_getflabel(vnode_t * vp,struct exportinfo * exi)3157 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3158 {
3159 zone_t *zone;
3160 ts_label_t *zone_label;
3161 char *path;
3162
3163 mutex_enter(&vp->v_lock);
3164 if (vp->v_path != vn_vpath_empty) {
3165 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3166 mutex_exit(&vp->v_lock);
3167 } else {
3168 /*
3169 * v_path not cached. Fall back on pathname of exported
3170 * file system as we rely on pathname from which we can
3171 * derive a label. The exported file system portion of
3172 * path is sufficient to obtain a label.
3173 */
3174 path = exi->exi_export.ex_path;
3175 if (path == NULL) {
3176 mutex_exit(&vp->v_lock);
3177 return (NULL);
3178 }
3179 zone = zone_find_by_any_path(path, B_FALSE);
3180 mutex_exit(&vp->v_lock);
3181 }
3182 /*
3183 * Caller has verified that the file is either
3184 * exported or visible. So if the path falls in
3185 * global zone, admin_low is returned; otherwise
3186 * the zone's label is returned.
3187 */
3188 zone_label = zone->zone_slabel;
3189 label_hold(zone_label);
3190 zone_rele(zone);
3191 return (zone_label);
3192 }
3193
3194 /*
3195 * TX NFS routine used by NFSv3 and NFSv4 to do label check
3196 * on client label and server's file object lable.
3197 */
3198 boolean_t
do_rfs_label_check(bslabel_t * clabel,vnode_t * vp,int flag,struct exportinfo * exi)3199 do_rfs_label_check(bslabel_t *clabel, vnode_t *vp, int flag,
3200 struct exportinfo *exi)
3201 {
3202 bslabel_t *slabel;
3203 ts_label_t *tslabel;
3204 boolean_t result;
3205
3206 if ((tslabel = nfs_getflabel(vp, exi)) == NULL) {
3207 return (B_FALSE);
3208 }
3209 slabel = label2bslabel(tslabel);
3210 DTRACE_PROBE4(tx__rfs__log__info__labelcheck, char *,
3211 "comparing server's file label(1) with client label(2) (vp(3))",
3212 bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
3213
3214 if (flag == EQUALITY_CHECK)
3215 result = blequal(clabel, slabel);
3216 else
3217 result = bldominates(clabel, slabel);
3218 label_rele(tslabel);
3219 return (result);
3220 }
3221
3222 /*
3223 * Callback function to return the loaned buffers.
3224 * Calls VOP_RETZCBUF() only after all uio_iov[]
3225 * buffers are returned. nu_ref maintains the count.
3226 */
3227 void
rfs_free_xuio(void * free_arg)3228 rfs_free_xuio(void *free_arg)
3229 {
3230 uint_t ref;
3231 nfs_xuio_t *nfsuiop = (nfs_xuio_t *)free_arg;
3232
3233 ref = atomic_dec_uint_nv(&nfsuiop->nu_ref);
3234
3235 /*
3236 * Call VOP_RETZCBUF() only when all the iov buffers
3237 * are sent OTW.
3238 */
3239 if (ref != 0)
3240 return;
3241
3242 if (((uio_t *)nfsuiop)->uio_extflg & UIO_XUIO) {
3243 (void) VOP_RETZCBUF(nfsuiop->nu_vp, (xuio_t *)free_arg, NULL,
3244 NULL);
3245 VN_RELE(nfsuiop->nu_vp);
3246 }
3247
3248 kmem_cache_free(nfs_xuio_cache, free_arg);
3249 }
3250
3251 xuio_t *
rfs_setup_xuio(vnode_t * vp)3252 rfs_setup_xuio(vnode_t *vp)
3253 {
3254 nfs_xuio_t *nfsuiop;
3255
3256 nfsuiop = kmem_cache_alloc(nfs_xuio_cache, KM_SLEEP);
3257
3258 bzero(nfsuiop, sizeof (nfs_xuio_t));
3259 nfsuiop->nu_vp = vp;
3260
3261 /*
3262 * ref count set to 1. more may be added
3263 * if multiple mblks refer to multiple iov's.
3264 * This is done in uio_to_mblk().
3265 */
3266
3267 nfsuiop->nu_ref = 1;
3268
3269 nfsuiop->nu_frtn.free_func = rfs_free_xuio;
3270 nfsuiop->nu_frtn.free_arg = (char *)nfsuiop;
3271
3272 nfsuiop->nu_uio.xu_type = UIOTYPE_ZEROCOPY;
3273
3274 return (&nfsuiop->nu_uio);
3275 }
3276
3277 mblk_t *
uio_to_mblk(uio_t * uiop)3278 uio_to_mblk(uio_t *uiop)
3279 {
3280 struct iovec *iovp;
3281 int i;
3282 mblk_t *mp, *mp1;
3283 nfs_xuio_t *nfsuiop = (nfs_xuio_t *)uiop;
3284
3285 if (uiop->uio_iovcnt == 0)
3286 return (NULL);
3287
3288 iovp = uiop->uio_iov;
3289 mp = mp1 = esballoca((uchar_t *)iovp->iov_base, iovp->iov_len,
3290 BPRI_MED, &nfsuiop->nu_frtn);
3291 ASSERT(mp != NULL);
3292
3293 mp->b_wptr += iovp->iov_len;
3294 mp->b_datap->db_type = M_DATA;
3295
3296 for (i = 1; i < uiop->uio_iovcnt; i++) {
3297 iovp = (uiop->uio_iov + i);
3298
3299 mp1->b_cont = esballoca(
3300 (uchar_t *)iovp->iov_base, iovp->iov_len, BPRI_MED,
3301 &nfsuiop->nu_frtn);
3302
3303 mp1 = mp1->b_cont;
3304 ASSERT(mp1 != NULL);
3305 mp1->b_wptr += iovp->iov_len;
3306 mp1->b_datap->db_type = M_DATA;
3307 }
3308
3309 nfsuiop->nu_ref = uiop->uio_iovcnt;
3310
3311 return (mp);
3312 }
3313
3314 /*
3315 * Allocate memory to hold data for a read request of len bytes.
3316 *
3317 * We don't allocate buffers greater than kmem_max_cached in size to avoid
3318 * allocating memory from the kmem_oversized arena. If we allocate oversized
3319 * buffers, we incur heavy cross-call activity when freeing these large buffers
3320 * in the TCP receive path. Note that we can't set b_wptr here since the
3321 * length of the data returned may differ from the length requested when
3322 * reading the end of a file; we set b_wptr in rfs_rndup_mblks() once the
3323 * length of the read is known.
3324 */
3325 mblk_t *
rfs_read_alloc(uint_t len,struct iovec ** iov,int * iovcnt)3326 rfs_read_alloc(uint_t len, struct iovec **iov, int *iovcnt)
3327 {
3328 struct iovec *iovarr;
3329 mblk_t *mp, **mpp = ∓
3330 size_t mpsize;
3331 uint_t remain = len;
3332 int i, err = 0;
3333
3334 *iovcnt = howmany(len, kmem_max_cached);
3335
3336 iovarr = kmem_alloc(*iovcnt * sizeof (struct iovec), KM_SLEEP);
3337 *iov = iovarr;
3338
3339 for (i = 0; i < *iovcnt; remain -= mpsize, i++) {
3340 ASSERT(remain <= len);
3341 /*
3342 * We roundup the size we allocate to a multiple of
3343 * BYTES_PER_XDR_UNIT (4 bytes) so that the call to
3344 * xdrmblk_putmblk() never fails.
3345 */
3346 ASSERT(kmem_max_cached % BYTES_PER_XDR_UNIT == 0);
3347 mpsize = MIN(kmem_max_cached, remain);
3348 *mpp = allocb_wait(RNDUP(mpsize), BPRI_MED, STR_NOSIG, &err);
3349 ASSERT(*mpp != NULL);
3350 ASSERT(err == 0);
3351
3352 iovarr[i].iov_base = (caddr_t)(*mpp)->b_rptr;
3353 iovarr[i].iov_len = mpsize;
3354 mpp = &(*mpp)->b_cont;
3355 }
3356 return (mp);
3357 }
3358
3359 void
rfs_rndup_mblks(mblk_t * mp,uint_t len,int buf_loaned)3360 rfs_rndup_mblks(mblk_t *mp, uint_t len, int buf_loaned)
3361 {
3362 int i;
3363 int alloc_err = 0;
3364 mblk_t *rmp;
3365 uint_t mpsize, remainder;
3366
3367 remainder = P2NPHASE(len, BYTES_PER_XDR_UNIT);
3368
3369 /*
3370 * Non copy-reduction case. This function assumes that blocks were
3371 * allocated in multiples of BYTES_PER_XDR_UNIT bytes, which makes this
3372 * padding safe without bounds checking.
3373 */
3374 if (!buf_loaned) {
3375 /*
3376 * Set the size of each mblk in the chain until we've consumed
3377 * the specified length for all but the last one.
3378 */
3379 while ((mpsize = MBLKSIZE(mp)) < len) {
3380 ASSERT(mpsize % BYTES_PER_XDR_UNIT == 0);
3381 mp->b_wptr += mpsize;
3382 len -= mpsize;
3383 mp = mp->b_cont;
3384 ASSERT(mp != NULL);
3385 }
3386
3387 ASSERT(len + remainder <= mpsize);
3388 mp->b_wptr += len;
3389 for (i = 0; i < remainder; i++)
3390 *mp->b_wptr++ = '\0';
3391 return;
3392 }
3393
3394 /*
3395 * No remainder mblk required.
3396 */
3397 if (remainder == 0)
3398 return;
3399
3400 /*
3401 * Get to the last mblk in the chain.
3402 */
3403 while (mp->b_cont != NULL)
3404 mp = mp->b_cont;
3405
3406 /*
3407 * In case of copy-reduction mblks, the size of the mblks are fixed
3408 * and are of the size of the loaned buffers. Allocate a remainder
3409 * mblk and chain it to the data buffers. This is sub-optimal, but not
3410 * expected to happen commonly.
3411 */
3412 rmp = allocb_wait(remainder, BPRI_MED, STR_NOSIG, &alloc_err);
3413 ASSERT(rmp != NULL);
3414 ASSERT(alloc_err == 0);
3415
3416 for (i = 0; i < remainder; i++)
3417 *rmp->b_wptr++ = '\0';
3418
3419 rmp->b_datap->db_type = M_DATA;
3420 mp->b_cont = rmp;
3421 }
3422