1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36 #include <sys/capsicum.h>
37 #include <sys/extattr.h>
38
39 /*
40 * Functions that perform the vfs operations required by the routines in
41 * nfsd_serv.c. It is hoped that this change will make the server more
42 * portable.
43 */
44
45 #include <fs/nfs/nfsport.h>
46 #include <security/mac/mac_framework.h>
47 #include <sys/callout.h>
48 #include <sys/filio.h>
49 #include <sys/hash.h>
50 #include <sys/netexport.h>
51 #include <sys/osd.h>
52 #include <sys/sysctl.h>
53 #include <nlm/nlm_prot.h>
54 #include <nlm/nlm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vnode_pager.h>
57
58 FEATURE(nfsd, "NFSv4 server");
59
60 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
61 extern int nfsrv_useacl;
62 extern int newnfs_numnfsd;
63 extern int nfsrv_sessionhashsize;
64 extern struct nfslayouthash *nfslayouthash;
65 extern int nfsrv_layouthashsize;
66 extern struct mtx nfsrv_dslock_mtx;
67 extern int nfs_pnfsiothreads;
68 extern volatile int nfsrv_devidcnt;
69 extern int nfsrv_maxpnfsmirror;
70 extern uint32_t nfs_srvmaxio;
71 extern int nfs_bufpackets;
72 extern u_long sb_max_adj;
73 extern struct nfsv4lock nfsv4rootfs_lock;
74
75 uint64_t nfsrv_stripesiz = 0;
76 static int nfsrv_maxstripecnt = 1;
77
78 VNET_DECLARE(int, nfsrv_numnfsd);
79 VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst);
80 VNET_DECLARE(SVCPOOL *, nfsrvd_pool);
81 VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash);
82 VNET_DECLARE(struct nfslockhashhead *, nfslockhash);
83 VNET_DECLARE(struct nfssessionhash *, nfssessionhash);
84 VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock);
85 VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
86
87 NFSDLOCKMUTEX;
88 NFSSTATESPINLOCK;
89 struct mtx nfsrc_udpmtx;
90 struct mtx nfs_v4root_mutex;
91 struct mtx nfsrv_dontlistlock_mtx;
92 struct mtx nfsrv_recalllock_mtx;
93 struct nfsrvfh nfs_pubfh;
94 int nfs_pubfhset = 0;
95 int nfsd_debuglevel = 0;
96 static pid_t nfsd_master_pid = (pid_t)-1;
97 static char nfsd_master_comm[MAXCOMLEN + 1];
98 static struct timeval nfsd_master_start;
99 static uint32_t nfsv4_sysid = 0;
100 static fhandle_t zerofh;
101
102 VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL;
103 VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl);
104 VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table);
105 VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table);
106 VNET_DEFINE(struct nfsrvfh, nfs_rootfh);
107 VNET_DEFINE(int, nfs_rootfhset) = 0;
108 VNET_DEFINE(struct callout, nfsd_callout);
109 VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt);
110 VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt);
111 VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt);
112 VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false;
113 VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false;
114
115 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
116 struct ucred *);
117 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *);
118 static void nfsvno_pnfsreplenish(void *);
119 static int nfsvno_pnfsusenumfile(struct nameidata *, struct vattr *);
120
121 int nfsrv_enable_crossmntpt = 1;
122 static int nfs_commit_blks;
123 static int nfs_commit_miss;
124 extern int nfsrv_issuedelegs;
125 extern int nfsrv_dolocallocks;
126 extern struct nfsdevicehead nfsrv_devidhead;
127
128 /* Map d_type to vnode type. */
129 static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR,
130 VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON };
131
132 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
133 struct iovec **);
134 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
135 struct mbuf **, struct iovec **);
136 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
137 int *);
138 static void nfs_dtypetovtype(struct nfsvattr *, struct vnode *, uint8_t);
139 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
140 NFSPROC_T *);
141 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode ***,
142 int *, char *, fhandle_t *);
143 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
144 NFSPROC_T *);
145 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
146 struct thread *, int, struct mbuf **, char *, struct mbuf **,
147 struct nfsvattr *, struct acl *, off_t *, int, bool *);
148 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
149 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
150 NFSPROC_T *, struct nfsmount **, int, int, uint64_t, struct mbuf **,
151 struct mbuf **, int *);
152 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
153 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int, uint64_t,
154 struct mbuf **, char *, int *);
155 #ifdef notnow
156 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
157 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
158 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
159 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
160 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
161 struct vnode *, struct nfsmount **, int, struct acl *, int *);
162 #endif
163 static int nfsrv_setattrdsrpc(fhandle_t *, struct vnode *, struct ucred *,
164 NFSPROC_T *, struct nfsmount **, int, int, struct nfsvattr *, int *);
165 static int nfsrv_setattrdsdorpc(fhandle_t *, struct vnode *, struct ucred *,
166 NFSPROC_T *, struct nfsmount *, struct nfsvattr *, struct nfsvattr *);
167 static int nfsrv_getattrdsrpc(fhandle_t *, struct vnode *, struct ucred *,
168 NFSPROC_T *, struct nfsmount **, int, struct nfsvattr *, int *);
169 static int nfsrv_getattrdsdorpc(fhandle_t *, struct vnode *, struct ucred *,
170 NFSPROC_T *, struct nfsmount *, struct nfsvattr *);
171 #ifdef notnow
172 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
173 NFSPROC_T *, struct nfsmount *);
174 #endif
175 static int nfsrv_putfhname(fhandle_t *, char *);
176 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
177 struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
178 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char **,
179 char *, struct vnode *, NFSPROC_T *);
180 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
181 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
182 NFSPROC_T *);
183 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
184
185 int nfs_pnfsio(task_fn_t *, void *);
186
187 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
188 "NFS server");
189 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
190 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
191 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
192 0, "");
193 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
194 0, "");
195 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
196 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
197 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
198 0, "Debug level for NFS server");
199 VNET_DECLARE(int, nfsd_enable_stringtouid);
200 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid,
201 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfsd_enable_stringtouid),
202 0, "Enable nfsd to accept numeric owner_names");
203 /*
204 * vfs.nfsd.pnfsswitchforw and vfs.nfsd.pnfsnumfilemiss are writable so that
205 * statistics can be reset.
206 */
207 static uint64_t nfsrv_pnfsswitchforw = 0;
208 SYSCTL_U64(_vfs_nfsd, OID_AUTO, pnfsswitchforw, CTLFLAG_RW,
209 &nfsrv_pnfsswitchforw, 0, "Number of times replenish switches to forward");
210 static uint64_t nfsrv_pnfsnumfilemiss = 0;
211 SYSCTL_U64(_vfs_nfsd, OID_AUTO, pnfsnumfilemiss, CTLFLAG_RW,
212 &nfsrv_pnfsnumfilemiss, 0, "Number of numfile misses");
213 static u_int nfsrv_pnfsforwcnt = 5;
214 SYSCTL_UINT(_vfs_nfsd, OID_AUTO, pnfsreplenishforwcnt, CTLFLAG_RW,
215 &nfsrv_pnfsforwcnt, 0, "Forward replenish cnt before switch to back");
216 static int nfsrv_pnfsgetdsattr = 1;
217 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
218 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
219 static bool nfsrv_recalldeleg = false;
220 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, recalldeleg, CTLFLAG_RW,
221 &nfsrv_recalldeleg, 0,
222 "When set remove/rename recalls delegations for same client");
223
224 /*
225 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
226 * not running.
227 * The dsN subdirectories for the increased values must have been created
228 * on all DS servers before this increase is done.
229 */
230 u_int nfsrv_dsdirsize = 20;
231 static int
sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)232 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
233 {
234 int error, newdsdirsize;
235
236 newdsdirsize = nfsrv_dsdirsize;
237 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
238 if (error != 0 || req->newptr == NULL)
239 return (error);
240 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
241 newnfs_numnfsd != 0)
242 return (EINVAL);
243 nfsrv_dsdirsize = newdsdirsize;
244 return (0);
245 }
246 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
247 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
248 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
249
250 /*
251 * nfsrv_pnfsmaxnumfiles can only be decreased when the nfsd is not
252 * running. It can be increased when the nfsd is running, but the
253 * additional numfiles should have been precreated in .pnfshide/numfiles
254 * for all file systems before it is increased.
255 */
256 static u_int nfsrv_pnfsmaxnumfiles = 1000;
257 static int
sysctl_pnfsmaxnumfiles(SYSCTL_HANDLER_ARGS)258 sysctl_pnfsmaxnumfiles(SYSCTL_HANDLER_ARGS)
259 {
260 int error, new_maxnumfiles;
261
262 new_maxnumfiles = nfsrv_pnfsmaxnumfiles;
263 error = sysctl_handle_int(oidp, &new_maxnumfiles, 0, req);
264 if (error != 0 || req->newptr == NULL)
265 return (error);
266 if (new_maxnumfiles < nfsrv_pnfsmaxnumfiles && newnfs_numnfsd != 0)
267 return (EBUSY);
268 if (new_maxnumfiles > 10000 || new_maxnumfiles < 100)
269 return (EINVAL);
270 nfsrv_pnfsmaxnumfiles = new_maxnumfiles;
271 return (0);
272 }
273 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsmaxnumfiles,
274 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0,
275 sizeof(nfsrv_pnfsmaxnumfiles), sysctl_pnfsmaxnumfiles,
276 "IU", "Maximum number of entries in .pnfshide/numfiles");
277
278 /*
279 * nfs_srvmaxio can only be increased and only when the nfsd threads are
280 * not running. The setting must be a power of 2, with the current limit of
281 * 1Mbyte.
282 */
283 static int
sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)284 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
285 {
286 int error;
287 u_int newsrvmaxio;
288 uint64_t tval;
289
290 newsrvmaxio = nfs_srvmaxio;
291 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
292 if (error != 0 || req->newptr == NULL)
293 return (error);
294 if (newsrvmaxio == nfs_srvmaxio)
295 return (0);
296 if (newsrvmaxio < nfs_srvmaxio) {
297 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
298 return (EINVAL);
299 }
300 if (newsrvmaxio > NFS_SRVLIMITIO) {
301 printf("nfsd: vfs.nfsd.srvmaxio cannot be > %d\n",
302 NFS_SRVLIMITIO);
303 return (EINVAL);
304 }
305 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
306 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
307 return (EINVAL);
308 }
309
310 /*
311 * Check that kern.ipc.maxsockbuf is large enough for
312 * newsrviomax, given the setting of vfs.nfs.bufpackets.
313 */
314 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
315 sb_max_adj) {
316 /*
317 * Suggest vfs.nfs.bufpackets * maximum RPC message for
318 * sb_max_adj.
319 */
320 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
321
322 /*
323 * Convert suggested sb_max_adj value to a suggested
324 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
325 * Perform the inverse calculation of (from uipc_sockbuf.c):
326 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
327 * (MSIZE + MCLBYTES);
328 * XXX If the calculation of sb_max_adj from sb_max changes,
329 * this calculation must be changed as well.
330 */
331 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */
332 tval += MCLBYTES - 1; /* Round up divide. */
333 tval /= MCLBYTES;
334 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
335 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
336 newsrvmaxio);
337 return (EINVAL);
338 }
339
340 NFSD_LOCK();
341 if (newnfs_numnfsd != 0) {
342 NFSD_UNLOCK();
343 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
344 "threads are running\n");
345 return (EINVAL);
346 }
347
348
349 nfs_srvmaxio = newsrvmaxio;
350 NFSD_UNLOCK();
351 return (0);
352 }
353 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
354 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
355 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
356
357 static int
sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)358 sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)
359 {
360 int error, igotlock, newdolocallocks;
361
362 newdolocallocks = nfsrv_dolocallocks;
363 error = sysctl_handle_int(oidp, &newdolocallocks, 0, req);
364 if (error != 0 || req->newptr == NULL)
365 return (error);
366 if (newdolocallocks == nfsrv_dolocallocks)
367 return (0);
368 if (jailed(curthread->td_ucred))
369 return (EINVAL);
370
371 NFSLOCKV4ROOTMUTEX();
372 do {
373 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
374 NFSV4ROOTLOCKMUTEXPTR, NULL);
375 } while (!igotlock);
376 NFSUNLOCKV4ROOTMUTEX();
377
378 nfsrv_dolocallocks = newdolocallocks;
379
380 NFSLOCKV4ROOTMUTEX();
381 nfsv4_unlock(&nfsv4rootfs_lock, 0);
382 NFSUNLOCKV4ROOTMUTEX();
383 return (0);
384 }
385 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks,
386 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
387 sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files");
388
389 static int
sysctl_stripecnt(SYSCTL_HANDLER_ARGS)390 sysctl_stripecnt(SYSCTL_HANDLER_ARGS)
391 {
392 int error, newmaxstripecnt;
393
394 newmaxstripecnt = nfsrv_maxstripecnt;
395 error = sysctl_handle_int(oidp, &newmaxstripecnt, 0, req);
396 if (error != 0 || req->newptr == NULL)
397 return (error);
398 if (newmaxstripecnt == nfsrv_maxstripecnt)
399 return (0);
400 if (newnfs_numnfsd > 0)
401 return (EPERM);
402 if (jailed(curthread->td_ucred))
403 return (EINVAL);
404 if (newmaxstripecnt <= 0 || newmaxstripecnt > NFSDEV_MAXSTRIPE)
405 return (EINVAL);
406 nfsrv_maxstripecnt = newmaxstripecnt;
407 return (0);
408 }
409 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsstripecnt,
410 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
411 sysctl_stripecnt, "IU", "Set the #stripes for a pNFS server");
412
413 static int
sysctl_stripeunit(SYSCTL_HANDLER_ARGS)414 sysctl_stripeunit(SYSCTL_HANDLER_ARGS)
415 {
416 uint64_t newstripesiz;
417 int error;
418
419 newstripesiz = nfsrv_stripesiz;
420 error = sysctl_handle_64(oidp, &newstripesiz, 0, req);
421 if (error != 0 || req->newptr == NULL)
422 return (error);
423 if (newstripesiz == nfsrv_stripesiz)
424 return (0);
425 if (newnfs_numnfsd > 0)
426 return (EPERM);
427 if (jailed(curthread->td_ucred))
428 return (EINVAL);
429 nfsrv_stripesiz = newstripesiz;
430 if (newstripesiz == 0)
431 nfsrv_maxstripecnt = 1;
432 return (0);
433 }
434 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsstripeunit,
435 CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
436 sysctl_stripeunit, "QU", "Set the stripe unit length for a pNFS server");
437
438 #define MAX_REORDERED_RPC 16
439 #define NUM_HEURISTIC 1031
440 #define NHUSE_INIT 64
441 #define NHUSE_INC 16
442 #define NHUSE_MAX 2048
443
444 static struct nfsheur {
445 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
446 off_t nh_nextoff; /* next offset for sequential detection */
447 int nh_use; /* use count for selection */
448 int nh_seqcount; /* heuristic */
449 } nfsheur[NUM_HEURISTIC];
450
451 /*
452 * Heuristic to detect sequential operation.
453 */
454 static struct nfsheur *
nfsrv_sequential_heuristic(struct uio * uio,struct vnode * vp)455 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
456 {
457 struct nfsheur *nh;
458 int hi, try;
459
460 /* Locate best candidate. */
461 try = 32;
462 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
463 nh = &nfsheur[hi];
464 while (try--) {
465 if (nfsheur[hi].nh_vp == vp) {
466 nh = &nfsheur[hi];
467 break;
468 }
469 if (nfsheur[hi].nh_use > 0)
470 --nfsheur[hi].nh_use;
471 hi = (hi + 1) % NUM_HEURISTIC;
472 if (nfsheur[hi].nh_use < nh->nh_use)
473 nh = &nfsheur[hi];
474 }
475
476 /* Initialize hint if this is a new file. */
477 if (nh->nh_vp != vp) {
478 nh->nh_vp = vp;
479 nh->nh_nextoff = uio->uio_offset;
480 nh->nh_use = NHUSE_INIT;
481 if (uio->uio_offset == 0)
482 nh->nh_seqcount = 4;
483 else
484 nh->nh_seqcount = 1;
485 }
486
487 /* Calculate heuristic. */
488 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
489 uio->uio_offset == nh->nh_nextoff) {
490 /* See comments in vfs_vnops.c:sequential_heuristic(). */
491 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
492 if (nh->nh_seqcount > IO_SEQMAX)
493 nh->nh_seqcount = IO_SEQMAX;
494 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
495 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
496 /* Probably a reordered RPC, leave seqcount alone. */
497 } else if (nh->nh_seqcount > 1) {
498 nh->nh_seqcount /= 2;
499 } else {
500 nh->nh_seqcount = 0;
501 }
502 nh->nh_use += NHUSE_INC;
503 if (nh->nh_use > NHUSE_MAX)
504 nh->nh_use = NHUSE_MAX;
505 return (nh);
506 }
507
508 /*
509 * Get attributes into nfsvattr structure.
510 */
511 int
nfsvno_getattr(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p,int vpislocked,nfsattrbit_t * attrbitp)512 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
513 struct nfsrv_descript *nd, struct thread *p, int vpislocked,
514 nfsattrbit_t *attrbitp)
515 {
516 int error, gotattr, lockedit = 0;
517 struct nfsvattr na;
518
519 if (vpislocked == 0) {
520 /*
521 * When vpislocked == 0, the vnode is either exclusively
522 * locked by this thread or not locked by this thread.
523 * As such, shared lock it, if not exclusively locked.
524 */
525 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
526 lockedit = 1;
527 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
528 }
529 }
530
531 /*
532 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
533 * attributes, as required.
534 * This needs to be done for regular files if:
535 * - non-NFSv4 RPCs or
536 * - when attrbitp == NULL or
537 * - an NFSv4 RPC with any of the above attributes in attrbitp.
538 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
539 * these attributes. nfsrv_proxyds() will return an error if the
540 * server is not a pNFS one.
541 */
542 gotattr = 0;
543 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
544 (nd->nd_flag & ND_NFSV4) == 0 ||
545 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
546 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
547 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
548 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
549 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
550 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
551 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
552 NULL);
553 if (error == 0)
554 gotattr = 1;
555 }
556
557 nvap->na_bsdflags = 0;
558 nvap->na_flags = 0;
559 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
560 if (lockedit != 0)
561 NFSVOPUNLOCK(vp);
562
563 /*
564 * If we got the Change, Size and Modify Time from the DS,
565 * replace them.
566 */
567 if (gotattr != 0) {
568 nvap->na_atime = na.na_atime;
569 nvap->na_mtime = na.na_mtime;
570 nvap->na_filerev = na.na_filerev;
571 nvap->na_size = na.na_size;
572 nvap->na_bytes = na.na_bytes;
573 }
574 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
575 error, (uintmax_t)na.na_filerev);
576
577 NFSEXITCODE(error);
578 return (error);
579 }
580
581 /*
582 * Get a file handle for a vnode.
583 */
584 int
nfsvno_getfh(struct vnode * vp,fhandle_t * fhp,struct thread * p)585 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
586 {
587 int error;
588
589 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
590 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
591 error = VOP_VPTOFH(vp, &fhp->fh_fid);
592
593 NFSEXITCODE(error);
594 return (error);
595 }
596
597 /*
598 * Perform access checking for vnodes obtained from file handles that would
599 * refer to files already opened by a Unix client. You cannot just use
600 * vn_writechk() and VOP_ACCESSX() for two reasons.
601 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
602 * case.
603 * 2 - The owner is to be given access irrespective of mode bits for some
604 * operations, so that processes that chmod after opening a file don't
605 * break.
606 */
607 int
nfsvno_accchk(struct vnode * vp,accmode_t accmode,struct ucred * cred,struct nfsexstuff * exp,struct thread * p,int override,int vpislocked,u_int32_t * supportedtypep)608 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
609 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
610 u_int32_t *supportedtypep)
611 {
612 struct vattr vattr;
613 int error = 0, getret = 0;
614
615 if (vpislocked == 0) {
616 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
617 error = EPERM;
618 goto out;
619 }
620 }
621 if (accmode & VWRITE) {
622 /* Just vn_writechk() changed to check rdonly */
623 /*
624 * Disallow write attempts on read-only file systems;
625 * unless the file is a socket or a block or character
626 * device resident on the file system.
627 */
628 if (NFSVNO_EXRDONLY(exp) ||
629 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
630 switch (vp->v_type) {
631 case VREG:
632 case VDIR:
633 case VLNK:
634 error = EROFS;
635 default:
636 break;
637 }
638 }
639 /*
640 * If there's shared text associated with
641 * the inode, try to free it up once. If
642 * we fail, we can't allow writing.
643 */
644 if (VOP_IS_TEXT(vp) && error == 0)
645 error = ETXTBSY;
646 }
647 if (error != 0) {
648 if (vpislocked == 0)
649 NFSVOPUNLOCK(vp);
650 goto out;
651 }
652
653 /*
654 * Should the override still be applied when ACLs are enabled?
655 */
656 error = VOP_ACCESSX(vp, accmode, cred, p);
657 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
658 /*
659 * Try again with VEXPLICIT_DENY, to see if the test for
660 * deletion is supported.
661 */
662 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
663 if (error == 0) {
664 if (vp->v_type == VDIR) {
665 accmode &= ~(VDELETE | VDELETE_CHILD);
666 accmode |= VWRITE;
667 error = VOP_ACCESSX(vp, accmode, cred, p);
668 } else if (supportedtypep != NULL) {
669 *supportedtypep &= ~NFSACCESS_DELETE;
670 }
671 }
672 }
673
674 /*
675 * Allow certain operations for the owner (reads and writes
676 * on files that are already open).
677 */
678 if (override != NFSACCCHK_NOOVERRIDE &&
679 (error == EPERM || error == EACCES)) {
680 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
681 error = 0;
682 else if (override & NFSACCCHK_ALLOWOWNER) {
683 getret = VOP_GETATTR(vp, &vattr, cred);
684 if (getret == 0 && cred->cr_uid == vattr.va_uid)
685 error = 0;
686 }
687 }
688 if (vpislocked == 0)
689 NFSVOPUNLOCK(vp);
690
691 out:
692 NFSEXITCODE(error);
693 return (error);
694 }
695
696 /*
697 * Set attribute(s) vnop.
698 */
699 int
nfsvno_setattr(struct vnode * vp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)700 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
701 struct thread *p, struct nfsexstuff *exp)
702 {
703 u_quad_t savsize = 0;
704 int error, savedit;
705 time_t savbtime;
706
707 /*
708 * If this is an exported file system and a pNFS service is running,
709 * don't VOP_SETATTR() of size for the MDS file system.
710 */
711 savedit = 0;
712 error = 0;
713 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
714 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
715 nvap->na_vattr.va_size > 0) {
716 savsize = nvap->na_vattr.va_size;
717 nvap->na_vattr.va_size = VNOVAL;
718 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
719 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
720 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
721 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
722 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
723 savedit = 1;
724 else
725 savedit = 2;
726 }
727 if (savedit != 2)
728 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
729 if (savedit != 0)
730 nvap->na_vattr.va_size = savsize;
731 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
732 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
733 nvap->na_vattr.va_size != VNOVAL ||
734 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
735 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
736 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
737 /* Never modify birthtime on a DS file. */
738 savbtime = nvap->na_vattr.va_birthtime.tv_sec;
739 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL;
740 /* For a pNFS server, set the attributes on the DS file. */
741 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
742 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
743 nvap->na_vattr.va_birthtime.tv_sec = savbtime;
744 if (error == ENOENT)
745 error = 0;
746 }
747 NFSEXITCODE(error);
748 return (error);
749 }
750
751 /*
752 * Set up nameidata for a lookup() call and do it.
753 */
754 int
nfsvno_namei(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode * dp,int islocked,struct nfsexstuff * exp,struct vnode ** retdirp)755 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
756 struct vnode *dp, int islocked, struct nfsexstuff *exp,
757 struct vnode **retdirp)
758 {
759 struct componentname *cnp = &ndp->ni_cnd;
760 int i;
761 struct iovec aiov;
762 struct uio auio;
763 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
764 int error = 0;
765 char *cp;
766
767 *retdirp = NULL;
768 cnp->cn_nameptr = cnp->cn_pnbuf;
769 ndp->ni_lcf = 0;
770 /*
771 * Extract and set starting directory.
772 */
773 if (dp->v_type != VDIR) {
774 if (islocked)
775 vput(dp);
776 else
777 vrele(dp);
778 nfsvno_relpathbuf(ndp);
779 error = ENOTDIR;
780 goto out1;
781 }
782 if (islocked)
783 NFSVOPUNLOCK(dp);
784 vref(dp);
785 *retdirp = dp;
786 if (NFSVNO_EXRDONLY(exp))
787 cnp->cn_flags |= RDONLY;
788 ndp->ni_segflg = UIO_SYSSPACE;
789
790 if (nd->nd_flag & ND_PUBLOOKUP) {
791 ndp->ni_loopcnt = 0;
792 if (cnp->cn_pnbuf[0] == '/') {
793 vrele(dp);
794 /*
795 * Check for degenerate pathnames here, since lookup()
796 * panics on them.
797 */
798 for (i = 1; i < ndp->ni_pathlen; i++)
799 if (cnp->cn_pnbuf[i] != '/')
800 break;
801 if (i == ndp->ni_pathlen) {
802 error = NFSERR_ACCES;
803 goto out;
804 }
805 dp = rootvnode;
806 vref(dp);
807 }
808 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
809 (nd->nd_flag & ND_NFSV4) == 0) {
810 /*
811 * Only cross mount points for NFSv4 when doing a
812 * mount while traversing the file system above
813 * the mount point, unless nfsrv_enable_crossmntpt is set.
814 */
815 cnp->cn_flags |= NOCROSSMOUNT;
816 }
817
818 /*
819 * Initialize for scan, set ni_startdir and bump ref on dp again
820 * because lookup() will dereference ni_startdir.
821 */
822
823 ndp->ni_startdir = dp;
824 ndp->ni_rootdir = rootvnode;
825 ndp->ni_topdir = NULL;
826
827 if (!lockleaf)
828 cnp->cn_flags |= LOCKLEAF;
829 for (;;) {
830 cnp->cn_nameptr = cnp->cn_pnbuf;
831 /*
832 * Call lookup() to do the real work. If an error occurs,
833 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
834 * we do not have to dereference anything before returning.
835 * In either case ni_startdir will be dereferenced and NULLed
836 * out.
837 */
838 error = vfs_lookup(ndp);
839 if (error)
840 break;
841
842 /*
843 * Check for encountering a symbolic link. Trivial
844 * termination occurs if no symlink encountered.
845 */
846 if ((cnp->cn_flags & ISSYMLINK) == 0) {
847 if (ndp->ni_vp && !lockleaf)
848 NFSVOPUNLOCK(ndp->ni_vp);
849 break;
850 }
851
852 /*
853 * Validate symlink
854 */
855 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
856 NFSVOPUNLOCK(ndp->ni_dvp);
857 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
858 error = EINVAL;
859 goto badlink2;
860 }
861
862 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
863 error = ELOOP;
864 goto badlink2;
865 }
866 if (ndp->ni_pathlen > 1)
867 cp = uma_zalloc(namei_zone, M_WAITOK);
868 else
869 cp = cnp->cn_pnbuf;
870 aiov.iov_base = cp;
871 aiov.iov_len = MAXPATHLEN;
872 auio.uio_iov = &aiov;
873 auio.uio_iovcnt = 1;
874 auio.uio_offset = 0;
875 auio.uio_rw = UIO_READ;
876 auio.uio_segflg = UIO_SYSSPACE;
877 auio.uio_td = NULL;
878 auio.uio_resid = MAXPATHLEN;
879 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
880 if (error) {
881 badlink1:
882 if (ndp->ni_pathlen > 1)
883 uma_zfree(namei_zone, cp);
884 badlink2:
885 vrele(ndp->ni_dvp);
886 vput(ndp->ni_vp);
887 break;
888 }
889 linklen = MAXPATHLEN - auio.uio_resid;
890 if (linklen == 0) {
891 error = ENOENT;
892 goto badlink1;
893 }
894 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
895 error = ENAMETOOLONG;
896 goto badlink1;
897 }
898
899 /*
900 * Adjust or replace path
901 */
902 if (ndp->ni_pathlen > 1) {
903 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
904 uma_zfree(namei_zone, cnp->cn_pnbuf);
905 cnp->cn_pnbuf = cp;
906 } else
907 cnp->cn_pnbuf[linklen] = '\0';
908 ndp->ni_pathlen += linklen;
909
910 /*
911 * Cleanup refs for next loop and check if root directory
912 * should replace current directory. Normally ni_dvp
913 * becomes the new base directory and is cleaned up when
914 * we loop. Explicitly null pointers after invalidation
915 * to clarify operation.
916 */
917 vput(ndp->ni_vp);
918 ndp->ni_vp = NULL;
919
920 if (cnp->cn_pnbuf[0] == '/') {
921 vrele(ndp->ni_dvp);
922 ndp->ni_dvp = ndp->ni_rootdir;
923 vref(ndp->ni_dvp);
924 }
925 ndp->ni_startdir = ndp->ni_dvp;
926 ndp->ni_dvp = NULL;
927 }
928 if (!lockleaf)
929 cnp->cn_flags &= ~LOCKLEAF;
930
931 out:
932 if (error) {
933 nfsvno_relpathbuf(ndp);
934 ndp->ni_vp = NULL;
935 ndp->ni_dvp = NULL;
936 ndp->ni_startdir = NULL;
937 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
938 ndp->ni_dvp = NULL;
939 }
940
941 out1:
942 NFSEXITCODE2(error, nd);
943 return (error);
944 }
945
946 /*
947 * Set up a pathname buffer and return a pointer to it and, optionally
948 * set a hash pointer.
949 */
950 void
nfsvno_setpathbuf(struct nameidata * ndp,char ** bufpp,u_long ** hashpp)951 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
952 {
953 struct componentname *cnp = &ndp->ni_cnd;
954
955 cnp->cn_flags |= (NOMACCHECK);
956 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
957 if (hashpp != NULL)
958 *hashpp = NULL;
959 *bufpp = cnp->cn_pnbuf;
960 }
961
962 /*
963 * Release the above path buffer, if not released by nfsvno_namei().
964 */
965 void
nfsvno_relpathbuf(struct nameidata * ndp)966 nfsvno_relpathbuf(struct nameidata *ndp)
967 {
968
969 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
970 ndp->ni_cnd.cn_pnbuf = NULL;
971 }
972
973 /*
974 * Readlink vnode op into an mbuf list.
975 */
976 int
nfsvno_readlink(struct vnode * vp,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)977 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz,
978 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
979 {
980 struct iovec *iv;
981 struct uio io, *uiop = &io;
982 struct mbuf *mp, *mp3;
983 int len, tlen, error = 0;
984
985 len = NFS_MAXPATHLEN;
986 if (maxextsiz > 0)
987 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
988 &mp3, &mp, &iv);
989 else
990 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
991 uiop->uio_iov = iv;
992 uiop->uio_offset = 0;
993 uiop->uio_resid = len;
994 uiop->uio_rw = UIO_READ;
995 uiop->uio_segflg = UIO_SYSSPACE;
996 uiop->uio_td = NULL;
997 error = VOP_READLINK(vp, uiop, cred);
998 free(iv, M_TEMP);
999 if (error) {
1000 m_freem(mp3);
1001 *lenp = 0;
1002 goto out;
1003 }
1004 if (uiop->uio_resid > 0) {
1005 len -= uiop->uio_resid;
1006 tlen = NFSM_RNDUP(len);
1007 if (tlen == 0) {
1008 m_freem(mp3);
1009 mp3 = mp = NULL;
1010 } else if (tlen != NFS_MAXPATHLEN || tlen != len)
1011 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen,
1012 tlen - len);
1013 }
1014 *lenp = len;
1015 *mpp = mp3;
1016 *mpendp = mp;
1017
1018 out:
1019 NFSEXITCODE(error);
1020 return (error);
1021 }
1022
1023 /*
1024 * Create an mbuf chain and an associated iovec that can be used to Read
1025 * or Getextattr of data.
1026 * Upon success, return pointers to the first and last mbufs in the chain
1027 * plus the malloc'd iovec and its iovlen.
1028 */
1029 static int
nfsrv_createiovec(int len,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)1030 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
1031 struct iovec **ivp)
1032 {
1033 struct mbuf *m, *m2 = NULL, *m3;
1034 struct iovec *iv;
1035 int i, left, siz;
1036
1037 left = len;
1038 m3 = NULL;
1039 /*
1040 * Generate the mbuf list with the uio_iov ref. to it.
1041 */
1042 i = 0;
1043 while (left > 0) {
1044 NFSMGET(m);
1045 MCLGET(m, M_WAITOK);
1046 m->m_len = 0;
1047 siz = min(M_TRAILINGSPACE(m), left);
1048 left -= siz;
1049 i++;
1050 if (m3)
1051 m2->m_next = m;
1052 else
1053 m3 = m;
1054 m2 = m;
1055 }
1056 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
1057 m = m3;
1058 left = len;
1059 i = 0;
1060 while (left > 0) {
1061 if (m == NULL)
1062 panic("nfsrv_createiovec iov");
1063 siz = min(M_TRAILINGSPACE(m), left);
1064 if (siz > 0) {
1065 iv->iov_base = mtod(m, caddr_t) + m->m_len;
1066 iv->iov_len = siz;
1067 m->m_len += siz;
1068 left -= siz;
1069 iv++;
1070 i++;
1071 }
1072 m = m->m_next;
1073 }
1074 *mpp = m3;
1075 *mpendp = m2;
1076 return (i);
1077 }
1078
1079 /*
1080 * Create an mbuf chain and an associated iovec that can be used to Read
1081 * or Getextattr of data.
1082 * Upon success, return pointers to the first and last mbufs in the chain
1083 * plus the malloc'd iovec and its iovlen.
1084 * Same as above, but creates ext_pgs mbuf(s).
1085 */
1086 static int
nfsrv_createiovec_extpgs(int len,int maxextsiz,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)1087 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp,
1088 struct mbuf **mpendp, struct iovec **ivp)
1089 {
1090 struct mbuf *m, *m2 = NULL, *m3;
1091 struct iovec *iv;
1092 int i, left, pgno, siz;
1093
1094 left = len;
1095 m3 = NULL;
1096 /*
1097 * Generate the mbuf list with the uio_iov ref. to it.
1098 */
1099 i = 0;
1100 while (left > 0) {
1101 siz = min(left, maxextsiz);
1102 m = mb_alloc_ext_plus_pages(siz, M_WAITOK);
1103 left -= siz;
1104 i += m->m_epg_npgs;
1105 if (m3 != NULL)
1106 m2->m_next = m;
1107 else
1108 m3 = m;
1109 m2 = m;
1110 }
1111 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
1112 m = m3;
1113 left = len;
1114 i = 0;
1115 pgno = 0;
1116 while (left > 0) {
1117 if (m == NULL)
1118 panic("nfsvno_createiovec_extpgs iov");
1119 siz = min(PAGE_SIZE, left);
1120 if (siz > 0) {
1121 iv->iov_base = PHYS_TO_DMAP(m->m_epg_pa[pgno]);
1122 iv->iov_len = siz;
1123 m->m_len += siz;
1124 if (pgno == m->m_epg_npgs - 1)
1125 m->m_epg_last_len = siz;
1126 left -= siz;
1127 iv++;
1128 i++;
1129 pgno++;
1130 }
1131 if (pgno == m->m_epg_npgs && left > 0) {
1132 m = m->m_next;
1133 if (m == NULL)
1134 panic("nfsvno_createiovec_extpgs iov");
1135 pgno = 0;
1136 }
1137 }
1138 *mpp = m3;
1139 *mpendp = m2;
1140 return (i);
1141 }
1142
1143 /*
1144 * Read vnode op call into mbuf list.
1145 */
1146 int
nfsvno_read(struct vnode * vp,off_t off,int cnt,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp)1147 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
1148 int maxextsiz, struct thread *p, struct mbuf **mpp,
1149 struct mbuf **mpendp)
1150 {
1151 struct mbuf *m;
1152 struct iovec *iv;
1153 int error = 0, len, tlen, ioflag = 0;
1154 struct mbuf *m3;
1155 struct uio io, *uiop = &io;
1156 struct nfsheur *nh;
1157
1158 /*
1159 * Attempt to read from a DS file. A return of ENOENT implies
1160 * there is no DS file to read.
1161 */
1162 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
1163 NULL, mpendp, NULL, NULL, NULL, 0, NULL);
1164 if (error != ENOENT)
1165 return (error);
1166
1167 len = NFSM_RNDUP(cnt);
1168 if (maxextsiz > 0)
1169 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
1170 &m3, &m, &iv);
1171 else
1172 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
1173 uiop->uio_iov = iv;
1174 uiop->uio_offset = off;
1175 uiop->uio_resid = len;
1176 uiop->uio_rw = UIO_READ;
1177 uiop->uio_segflg = UIO_SYSSPACE;
1178 uiop->uio_td = NULL;
1179 nh = nfsrv_sequential_heuristic(uiop, vp);
1180 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
1181 /* XXX KDM make this more systematic? */
1182 VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid;
1183 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
1184 free(iv, M_TEMP);
1185 if (error) {
1186 m_freem(m3);
1187 *mpp = NULL;
1188 goto out;
1189 }
1190 nh->nh_nextoff = uiop->uio_offset;
1191 tlen = len - uiop->uio_resid;
1192 cnt = cnt < tlen ? cnt : tlen;
1193 tlen = NFSM_RNDUP(cnt);
1194 if (tlen == 0) {
1195 m_freem(m3);
1196 m3 = m = NULL;
1197 } else if (len != tlen || tlen != cnt)
1198 m = nfsrv_adj(m3, len - tlen, tlen - cnt);
1199 *mpp = m3;
1200 *mpendp = m;
1201
1202 out:
1203 NFSEXITCODE(error);
1204 return (error);
1205 }
1206
1207 /*
1208 * Create the iovec for the mbuf chain passed in as an argument.
1209 * The "cp" argument is where the data starts within the first mbuf in
1210 * the chain. It returns the iovec and the iovcnt.
1211 */
1212 static int
nfsrv_createiovecw(int retlen,struct mbuf * m,char * cp,struct iovec ** ivpp,int * iovcntp)1213 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
1214 int *iovcntp)
1215 {
1216 struct mbuf *mp;
1217 struct iovec *ivp;
1218 int cnt, i, len;
1219
1220 /*
1221 * Loop through the mbuf chain, counting how many mbufs are a
1222 * part of this write operation, so the iovec size is known.
1223 */
1224 cnt = 0;
1225 len = retlen;
1226 mp = m;
1227 i = mtod(mp, caddr_t) + mp->m_len - cp;
1228 while (len > 0) {
1229 if (i > 0) {
1230 len -= i;
1231 cnt++;
1232 }
1233 mp = mp->m_next;
1234 if (!mp) {
1235 if (len > 0)
1236 return (EBADRPC);
1237 } else
1238 i = mp->m_len;
1239 }
1240
1241 /* Now, create the iovec. */
1242 mp = m;
1243 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
1244 M_WAITOK);
1245 *iovcntp = cnt;
1246 i = mtod(mp, caddr_t) + mp->m_len - cp;
1247 len = retlen;
1248 while (len > 0) {
1249 if (mp == NULL)
1250 panic("nfsrv_createiovecw");
1251 if (i > 0) {
1252 i = min(i, len);
1253 ivp->iov_base = cp;
1254 ivp->iov_len = i;
1255 ivp++;
1256 len -= i;
1257 }
1258 mp = mp->m_next;
1259 if (mp) {
1260 i = mp->m_len;
1261 cp = mtod(mp, caddr_t);
1262 }
1263 }
1264 return (0);
1265 }
1266
1267 /*
1268 * Write vnode op from an mbuf list.
1269 */
1270 int
nfsvno_write(struct vnode * vp,off_t off,int retlen,int * stable,struct mbuf * mp,char * cp,struct ucred * cred,struct thread * p)1271 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
1272 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
1273 {
1274 struct iovec *iv;
1275 int cnt, ioflags, error;
1276 struct uio io, *uiop = &io;
1277 struct nfsheur *nh;
1278
1279 /*
1280 * Attempt to write to a DS file. A return of ENOENT implies
1281 * there is no DS file to write.
1282 */
1283 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
1284 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
1285 if (error != ENOENT) {
1286 *stable = NFSWRITE_FILESYNC;
1287 return (error);
1288 }
1289
1290 if (*stable == NFSWRITE_UNSTABLE)
1291 ioflags = IO_NODELOCKED;
1292 else
1293 ioflags = (IO_SYNC | IO_NODELOCKED);
1294 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
1295 if (error != 0)
1296 return (error);
1297 uiop->uio_iov = iv;
1298 uiop->uio_iovcnt = cnt;
1299 uiop->uio_resid = retlen;
1300 uiop->uio_rw = UIO_WRITE;
1301 uiop->uio_segflg = UIO_SYSSPACE;
1302 NFSUIOPROC(uiop, p);
1303 uiop->uio_offset = off;
1304 nh = nfsrv_sequential_heuristic(uiop, vp);
1305 ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1306 /* XXX KDM make this more systematic? */
1307 VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
1308 error = VOP_WRITE(vp, uiop, ioflags, cred);
1309 if (error == 0)
1310 nh->nh_nextoff = uiop->uio_offset;
1311 free(iv, M_TEMP);
1312
1313 NFSEXITCODE(error);
1314 return (error);
1315 }
1316
1317 /*
1318 * Common code for creating a regular file (plus special files for V2).
1319 */
1320 int
nfsvno_createsub(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode ** vpp,struct nfsvattr * nvap,int * exclusive_flagp,int32_t * cverf,NFSDEV_T rdev,struct nfsexstuff * exp)1321 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
1322 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
1323 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
1324 {
1325 u_quad_t tempsize;
1326 int error;
1327 struct thread *p = curthread;
1328
1329 error = nd->nd_repstat;
1330 if (!error && ndp->ni_vp == NULL) {
1331 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
1332 error = ENOENT;
1333 if (nvap->na_type == VREG &&
1334 !TAILQ_EMPTY(&nfsrv_devidhead))
1335 error = nfsvno_pnfsusenumfile(ndp,
1336 &nvap->na_vattr);
1337 if (error == ENOENT) {
1338 error = VOP_CREATE(ndp->ni_dvp,
1339 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1340 /*
1341 * For a pNFS server, create the data file
1342 * on a DS.
1343 */
1344 if (error == 0 && nvap->na_type == VREG) {
1345 /*
1346 * Create a data file on a DS for a
1347 * pNFS server.
1348 * This function just returns if not
1349 * running a pNFS DS or the creation
1350 * fails.
1351 */
1352 nfsrv_pnfscreate(ndp->ni_vp,
1353 &nvap->na_vattr, nd->nd_cred, p);
1354 }
1355 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ?
1356 &ndp->ni_vp : NULL, false);
1357 }
1358 nfsvno_relpathbuf(ndp);
1359 if (!error) {
1360 if (*exclusive_flagp) {
1361 *exclusive_flagp = 0;
1362 NFSVNO_ATTRINIT(nvap);
1363 nvap->na_atime.tv_sec = cverf[0];
1364 nvap->na_atime.tv_nsec = cverf[1];
1365 error = VOP_SETATTR(ndp->ni_vp,
1366 &nvap->na_vattr, nd->nd_cred);
1367 if (error != 0) {
1368 vput(ndp->ni_vp);
1369 ndp->ni_vp = NULL;
1370 error = NFSERR_NOTSUPP;
1371 }
1372 }
1373 }
1374 /*
1375 * NFS V2 Only. nfsrvd_mknod() does this for V3.
1376 * (This implies, just get out on an error.)
1377 */
1378 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
1379 nvap->na_type == VFIFO) {
1380 if (nvap->na_type == VCHR && rdev == 0xffffffff)
1381 nvap->na_type = VFIFO;
1382 if (nvap->na_type != VFIFO &&
1383 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
1384 nfsvno_relpathbuf(ndp);
1385 vput(ndp->ni_dvp);
1386 goto out;
1387 }
1388 nvap->na_rdev = rdev;
1389 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1390 &ndp->ni_cnd, &nvap->na_vattr);
1391 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
1392 NULL, false);
1393 nfsvno_relpathbuf(ndp);
1394 if (error)
1395 goto out;
1396 } else {
1397 nfsvno_relpathbuf(ndp);
1398 vput(ndp->ni_dvp);
1399 error = ENXIO;
1400 goto out;
1401 }
1402 *vpp = ndp->ni_vp;
1403 } else {
1404 /*
1405 * Handle cases where error is already set and/or
1406 * the file exists.
1407 * 1 - clean up the lookup
1408 * 2 - iff !error and na_size set, truncate it
1409 */
1410 nfsvno_relpathbuf(ndp);
1411 *vpp = ndp->ni_vp;
1412 if (ndp->ni_dvp == *vpp)
1413 vrele(ndp->ni_dvp);
1414 else
1415 vput(ndp->ni_dvp);
1416 if (!error && nvap->na_size != VNOVAL) {
1417 error = nfsvno_accchk(*vpp, VWRITE,
1418 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1419 NFSACCCHK_VPISLOCKED, NULL);
1420 if (!error) {
1421 tempsize = nvap->na_size;
1422 NFSVNO_ATTRINIT(nvap);
1423 nvap->na_size = tempsize;
1424 error = nfsvno_setattr(*vpp, nvap,
1425 nd->nd_cred, p, exp);
1426 }
1427 }
1428 if (error)
1429 vput(*vpp);
1430 }
1431
1432 out:
1433 NFSEXITCODE(error);
1434 return (error);
1435 }
1436
1437 /*
1438 * Do a mknod vnode op.
1439 */
1440 int
nfsvno_mknod(struct nameidata * ndp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p)1441 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
1442 struct thread *p)
1443 {
1444 int error = 0;
1445 __enum_uint8(vtype) vtyp;
1446
1447 vtyp = nvap->na_type;
1448 /*
1449 * Iff doesn't exist, create it.
1450 */
1451 if (ndp->ni_vp) {
1452 nfsvno_relpathbuf(ndp);
1453 vput(ndp->ni_dvp);
1454 vrele(ndp->ni_vp);
1455 error = EEXIST;
1456 goto out;
1457 }
1458 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1459 nfsvno_relpathbuf(ndp);
1460 vput(ndp->ni_dvp);
1461 error = NFSERR_BADTYPE;
1462 goto out;
1463 }
1464 if (vtyp == VSOCK) {
1465 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
1466 &ndp->ni_cnd, &nvap->na_vattr);
1467 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1468 false);
1469 nfsvno_relpathbuf(ndp);
1470 } else {
1471 if (nvap->na_type != VFIFO &&
1472 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
1473 nfsvno_relpathbuf(ndp);
1474 vput(ndp->ni_dvp);
1475 goto out;
1476 }
1477 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1478 &ndp->ni_cnd, &nvap->na_vattr);
1479 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1480 false);
1481 nfsvno_relpathbuf(ndp);
1482 /*
1483 * Since VOP_MKNOD returns the ni_vp, I can't
1484 * see any reason to do the lookup.
1485 */
1486 }
1487
1488 out:
1489 NFSEXITCODE(error);
1490 return (error);
1491 }
1492
1493 /*
1494 * Mkdir vnode op.
1495 */
1496 int
nfsvno_mkdir(struct nameidata * ndp,struct nfsvattr * nvap,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1497 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
1498 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
1499 {
1500 int error = 0;
1501
1502 if (ndp->ni_vp != NULL) {
1503 if (ndp->ni_dvp == ndp->ni_vp)
1504 vrele(ndp->ni_dvp);
1505 else
1506 vput(ndp->ni_dvp);
1507 vrele(ndp->ni_vp);
1508 nfsvno_relpathbuf(ndp);
1509 error = EEXIST;
1510 goto out;
1511 }
1512 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1513 &nvap->na_vattr);
1514 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false);
1515 nfsvno_relpathbuf(ndp);
1516
1517 out:
1518 NFSEXITCODE(error);
1519 return (error);
1520 }
1521
1522 /*
1523 * symlink vnode op.
1524 */
1525 int
nfsvno_symlink(struct nameidata * ndp,struct nfsvattr * nvap,char * pathcp,int pathlen,int not_v2,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1526 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
1527 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
1528 struct nfsexstuff *exp)
1529 {
1530 int error = 0;
1531
1532 if (ndp->ni_vp) {
1533 nfsvno_relpathbuf(ndp);
1534 if (ndp->ni_dvp == ndp->ni_vp)
1535 vrele(ndp->ni_dvp);
1536 else
1537 vput(ndp->ni_dvp);
1538 vrele(ndp->ni_vp);
1539 error = EEXIST;
1540 goto out;
1541 }
1542
1543 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1544 &nvap->na_vattr, pathcp);
1545 /*
1546 * Although FreeBSD still had the lookup code in
1547 * it for 7/current, there doesn't seem to be any
1548 * point, since VOP_SYMLINK() returns the ni_vp.
1549 * Just vput it for v2.
1550 */
1551 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0);
1552 nfsvno_relpathbuf(ndp);
1553
1554 out:
1555 NFSEXITCODE(error);
1556 return (error);
1557 }
1558
1559 /*
1560 * Parse symbolic link arguments.
1561 * This function has an ugly side effect. It will malloc() an area for
1562 * the symlink and set iov_base to point to it, only if it succeeds.
1563 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
1564 * be FREE'd later.
1565 */
1566 int
nfsvno_getsymlink(struct nfsrv_descript * nd,struct nfsvattr * nvap,struct thread * p,char ** pathcpp,int * lenp)1567 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
1568 struct thread *p, char **pathcpp, int *lenp)
1569 {
1570 u_int32_t *tl;
1571 char *pathcp = NULL;
1572 int error = 0, len;
1573 struct nfsv2_sattr *sp;
1574
1575 *pathcpp = NULL;
1576 *lenp = 0;
1577 if ((nd->nd_flag & ND_NFSV3) &&
1578 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, NULL, p)))
1579 goto nfsmout;
1580 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1581 len = fxdr_unsigned(int, *tl);
1582 if (len > NFS_MAXPATHLEN || len <= 0) {
1583 error = EBADRPC;
1584 goto nfsmout;
1585 }
1586 pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
1587 error = nfsrv_mtostr(nd, pathcp, len);
1588 if (error)
1589 goto nfsmout;
1590 if (nd->nd_flag & ND_NFSV2) {
1591 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1592 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1593 }
1594 *pathcpp = pathcp;
1595 *lenp = len;
1596 NFSEXITCODE2(0, nd);
1597 return (0);
1598 nfsmout:
1599 if (pathcp)
1600 free(pathcp, M_TEMP);
1601 NFSEXITCODE2(error, nd);
1602 return (error);
1603 }
1604
1605 /*
1606 * Remove a non-directory object.
1607 */
1608 int
nfsvno_removesub(struct nameidata * ndp,bool is_v4,struct nfsrv_descript * nd,struct thread * p,struct nfsexstuff * exp)1609 nfsvno_removesub(struct nameidata *ndp, bool is_v4, struct nfsrv_descript *nd,
1610 struct thread *p, struct nfsexstuff *exp)
1611 {
1612 struct vnode *vp, **dsdvpp, *newvp;
1613 struct mount *mp;
1614 int error = 0, dsfilecnt, ret;
1615 char fname[PNFS_FILENAME_LEN + 1];
1616 fhandle_t fh;
1617
1618 vp = ndp->ni_vp;
1619 dsdvpp = NULL;
1620 if (vp->v_type == VDIR) {
1621 error = NFSERR_ISDIR;
1622 } else if (is_v4) {
1623 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
1624 error = nfsrv_checkremove(vp, 1, NULL,
1625 (nfsquad_t)((u_quad_t)0), p);
1626 else
1627 error = nfsrv_checkremove(vp, 1, NULL, nd->nd_clientid,
1628 p);
1629 }
1630 if (error == 0) {
1631 nfsrv_pnfsremovesetup(vp, p, &dsdvpp, &dsfilecnt, fname, &fh);
1632 NFSD_DEBUG(4, "nfsrv_pnfsremovesetup err=%d dsfilecnt=%d\n",
1633 error, dsfilecnt);
1634 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1635 }
1636 if (error == 0 && dsdvpp != NULL) {
1637 nfsrv_pnfsremove(dsdvpp, dsfilecnt, fname, &fh, p);
1638 NFSD_DEBUG(4, "aft nfsrv_pnfsremove dsfilecnt=%d fname=%s\n",
1639 dsfilecnt, fname);
1640 }
1641 free(dsdvpp, M_TEMP);
1642 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0)
1643 error = nfsvno_getfh(vp, &fh, p);
1644 if (ndp->ni_dvp == vp)
1645 vrele(ndp->ni_dvp);
1646 else
1647 vput(ndp->ni_dvp);
1648 vput(vp);
1649
1650 /* Use ret to determine if the file still exists. */
1651 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
1652 mp = vfs_busyfs(&fh.fh_fsid);
1653 if (mp != NULL) {
1654 /* Find out if the file still exists. */
1655 ret = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &newvp);
1656 if (ret == 0)
1657 vput(newvp);
1658 else
1659 ret = ESTALE;
1660 vfs_unbusy(mp);
1661 } else {
1662 ret = ESTALE;
1663 }
1664 if (ret == ESTALE) {
1665 /* Get rid of any delegation. */
1666 nfsrv_removedeleg(&fh, nd, p);
1667 }
1668 }
1669
1670 nfsvno_relpathbuf(ndp);
1671 NFSEXITCODE(error);
1672 return (error);
1673 }
1674
1675 /*
1676 * Remove a directory.
1677 */
1678 int
nfsvno_rmdirsub(struct nameidata * ndp,int is_v4,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1679 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1680 struct thread *p, struct nfsexstuff *exp)
1681 {
1682 struct vnode *vp;
1683 int error = 0;
1684
1685 vp = ndp->ni_vp;
1686 if (vp->v_type != VDIR) {
1687 error = ENOTDIR;
1688 goto out;
1689 }
1690 /*
1691 * No rmdir "." please.
1692 */
1693 if (ndp->ni_dvp == vp) {
1694 error = EINVAL;
1695 goto out;
1696 }
1697 /*
1698 * The root of a mounted filesystem cannot be deleted.
1699 */
1700 if (vp->v_vflag & VV_ROOT)
1701 error = EBUSY;
1702 out:
1703 if (!error)
1704 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1705 if (ndp->ni_dvp == vp)
1706 vrele(ndp->ni_dvp);
1707 else
1708 vput(ndp->ni_dvp);
1709 vput(vp);
1710 nfsvno_relpathbuf(ndp);
1711 NFSEXITCODE(error);
1712 return (error);
1713 }
1714
1715 /*
1716 * Rename vnode op.
1717 */
1718 int
nfsvno_rename(struct nameidata * fromndp,struct nameidata * tondp,struct nfsrv_descript * nd,struct thread * p)1719 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1720 struct nfsrv_descript *nd, struct thread *p)
1721 {
1722 struct vnode *fvp, *tvp, *tdvp, **dsdvpp, *newvp;
1723 struct mount *mp;
1724 int error = 0, dsfilecnt, ret;
1725 char fname[PNFS_FILENAME_LEN + 1];
1726 fhandle_t fh, fh2;
1727
1728 dsdvpp = NULL;
1729 fvp = fromndp->ni_vp;
1730 if (nd->nd_repstat != 0) {
1731 vrele(fromndp->ni_dvp);
1732 vrele(fvp);
1733 error = nd->nd_repstat;
1734 goto out1;
1735 }
1736 tdvp = tondp->ni_dvp;
1737 tvp = tondp->ni_vp;
1738 if (tvp != NULL) {
1739 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1740 error = (nd->nd_flag & ND_NFSV2) ? EISDIR : EEXIST;
1741 goto out;
1742 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1743 error = (nd->nd_flag & ND_NFSV2) ? ENOTDIR : EEXIST;
1744 goto out;
1745 }
1746 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1747 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1748 goto out;
1749 }
1750
1751 /*
1752 * A rename to '.' or '..' results in a prematurely
1753 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1754 * here.
1755 */
1756 if ((tondp->ni_cnd.cn_namelen == 1 &&
1757 tondp->ni_cnd.cn_nameptr[0] == '.') ||
1758 (tondp->ni_cnd.cn_namelen == 2 &&
1759 tondp->ni_cnd.cn_nameptr[0] == '.' &&
1760 tondp->ni_cnd.cn_nameptr[1] == '.')) {
1761 error = EINVAL;
1762 goto out;
1763 }
1764 }
1765 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1766 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1767 goto out;
1768 }
1769 if (fvp->v_mount != tdvp->v_mount) {
1770 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1771 goto out;
1772 }
1773 if (fvp == tdvp) {
1774 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1775 goto out;
1776 }
1777 if (fvp == tvp) {
1778 /*
1779 * If source and destination are the same, there is
1780 * nothing to do. Set error to EJUSTRETURN to indicate
1781 * this.
1782 */
1783 error = EJUSTRETURN;
1784 goto out;
1785 }
1786 if (nd->nd_flag & ND_NFSV4) {
1787 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1788 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
1789 error = nfsrv_checkremove(fvp, 0, NULL,
1790 (nfsquad_t)((u_quad_t)0), p);
1791 else
1792 error = nfsrv_checkremove(fvp, 0, NULL,
1793 nd->nd_clientid, p);
1794 NFSVOPUNLOCK(fvp);
1795 } else
1796 error = EPERM;
1797 if (tvp && !error) {
1798 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
1799 error = nfsrv_checkremove(tvp, 1, NULL,
1800 (nfsquad_t)((u_quad_t)0), p);
1801 else
1802 error = nfsrv_checkremove(tvp, 1, NULL,
1803 nd->nd_clientid, p);
1804 }
1805 } else {
1806 /*
1807 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1808 * that the NFSv4 client won't be confused by the rename.
1809 * Since nfsd_recalldelegation() can only be called on an
1810 * unlocked vnode at this point and fvp is the file that will
1811 * still exist after the rename, just do fvp.
1812 */
1813 nfsd_recalldelegation(fvp, p);
1814 }
1815 if (error == 0 && tvp != NULL) {
1816 if ((nd->nd_flag & ND_NFSV41) != 0)
1817 error = nfsvno_getfh(tvp, &fh2, p);
1818 if (error == 0)
1819 nfsrv_pnfsremovesetup(tvp, p, &dsdvpp, &dsfilecnt,
1820 fname, &fh);
1821 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
1822 " dsdvpp=%p\n", dsdvpp);
1823 }
1824 out:
1825 mp = NULL;
1826 if (error == 0) {
1827 error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp);
1828 if (error == 0) {
1829 if (mp == NULL) {
1830 error = ENOENT;
1831 } else {
1832 error = lockmgr(&mp->mnt_renamelock,
1833 LK_EXCLUSIVE | LK_NOWAIT, NULL);
1834 if (error != 0)
1835 error = ERELOOKUP;
1836 }
1837 }
1838 }
1839 if (error == 0) {
1840 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1841 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1842 &tondp->ni_cnd, 0);
1843 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
1844 vfs_rel(mp);
1845 } else {
1846 if (tdvp == tvp)
1847 vrele(tdvp);
1848 else
1849 vput(tdvp);
1850 if (tvp)
1851 vput(tvp);
1852 vrele(fromndp->ni_dvp);
1853 vrele(fvp);
1854 if (error == EJUSTRETURN) {
1855 error = 0;
1856 } else if (error == ERELOOKUP && mp != NULL) {
1857 lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0);
1858 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
1859 vfs_rel(mp);
1860 }
1861 }
1862
1863 /*
1864 * If dsdvpp != NULL, it was set up by nfsrv_pnfsremovesetup() and
1865 * if the rename succeeded, the DS file for the tvp needs to be
1866 * removed.
1867 */
1868 if (error == 0 && dsdvpp != NULL) {
1869 nfsrv_pnfsremove(dsdvpp, dsfilecnt, fname, &fh, p);
1870 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
1871 free(dsdvpp, M_TEMP);
1872 }
1873
1874 /* Use ret to determine if the file still exists. */
1875 if ((nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
1876 mp = vfs_busyfs(&fh2.fh_fsid);
1877 if (mp != NULL) {
1878 /* Find out if the file still exists. */
1879 ret = VFS_FHTOVP(mp, &fh2.fh_fid, LK_SHARED, &newvp);
1880 if (ret == 0)
1881 vput(newvp);
1882 else
1883 ret = ESTALE;
1884 vfs_unbusy(mp);
1885 } else {
1886 ret = ESTALE;
1887 }
1888 if (ret == ESTALE) {
1889 /* Get rid of any delegation. */
1890 nfsrv_removedeleg(&fh2, nd, p);
1891 }
1892 }
1893
1894 nfsvno_relpathbuf(tondp);
1895 out1:
1896 nfsvno_relpathbuf(fromndp);
1897 NFSEXITCODE(error);
1898 return (error);
1899 }
1900
1901 /*
1902 * Link vnode op.
1903 */
1904 int
nfsvno_link(struct nameidata * ndp,struct vnode * vp,nfsquad_t clientid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1905 nfsvno_link(struct nameidata *ndp, struct vnode *vp, nfsquad_t clientid,
1906 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
1907 {
1908 struct vnode *xp;
1909 int error = 0;
1910
1911 xp = ndp->ni_vp;
1912 if (xp != NULL) {
1913 error = EEXIST;
1914 } else {
1915 xp = ndp->ni_dvp;
1916 if (vp->v_mount != xp->v_mount)
1917 error = EXDEV;
1918 }
1919 if (!error) {
1920 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1921 if (!VN_IS_DOOMED(vp)) {
1922 error = nfsrv_checkremove(vp, 0, NULL, clientid, p);
1923 if (error == 0)
1924 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1925 } else
1926 error = EPERM;
1927 if (ndp->ni_dvp == vp) {
1928 vrele(ndp->ni_dvp);
1929 NFSVOPUNLOCK(vp);
1930 } else {
1931 vref(vp);
1932 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true);
1933 }
1934 } else {
1935 if (ndp->ni_dvp == ndp->ni_vp)
1936 vrele(ndp->ni_dvp);
1937 else
1938 vput(ndp->ni_dvp);
1939 if (ndp->ni_vp)
1940 vrele(ndp->ni_vp);
1941 }
1942 nfsvno_relpathbuf(ndp);
1943 NFSEXITCODE(error);
1944 return (error);
1945 }
1946
1947 /*
1948 * Do the fsync() appropriate for the commit.
1949 */
1950 int
nfsvno_fsync(struct vnode * vp,u_int64_t off,int cnt,struct ucred * cred,struct thread * td)1951 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1952 struct thread *td)
1953 {
1954 int error = 0;
1955
1956 /*
1957 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
1958 * file is done. At this time VOP_FSYNC does not accept offset and
1959 * byte count parameters so call VOP_FSYNC the whole file for now.
1960 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
1961 * File systems that do not use the buffer cache (as indicated
1962 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
1963 */
1964 if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
1965 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
1966 /*
1967 * Give up and do the whole thing
1968 */
1969 vnode_pager_clean_sync(vp);
1970 error = VOP_FSYNC(vp, MNT_WAIT, td);
1971 } else {
1972 /*
1973 * Locate and synchronously write any buffers that fall
1974 * into the requested range. Note: we are assuming that
1975 * f_iosize is a power of 2.
1976 */
1977 int iosize = vp->v_mount->mnt_stat.f_iosize;
1978 int iomask = iosize - 1;
1979 struct bufobj *bo;
1980 daddr_t lblkno;
1981
1982 /*
1983 * Align to iosize boundary, super-align to page boundary.
1984 */
1985 if (off & iomask) {
1986 cnt += off & iomask;
1987 off &= ~(u_quad_t)iomask;
1988 }
1989 if (off & PAGE_MASK) {
1990 cnt += off & PAGE_MASK;
1991 off &= ~(u_quad_t)PAGE_MASK;
1992 }
1993 lblkno = off / iosize;
1994
1995 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
1996 VM_OBJECT_WLOCK(vp->v_object);
1997 vm_object_page_clean(vp->v_object, off, off + cnt,
1998 OBJPC_SYNC);
1999 VM_OBJECT_WUNLOCK(vp->v_object);
2000 }
2001
2002 bo = &vp->v_bufobj;
2003 BO_LOCK(bo);
2004 while (cnt > 0) {
2005 struct buf *bp;
2006
2007 /*
2008 * If we have a buffer and it is marked B_DELWRI we
2009 * have to lock and write it. Otherwise the prior
2010 * write is assumed to have already been committed.
2011 *
2012 * gbincore() can return invalid buffers now so we
2013 * have to check that bit as well (though B_DELWRI
2014 * should not be set if B_INVAL is set there could be
2015 * a race here since we haven't locked the buffer).
2016 */
2017 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
2018 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
2019 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
2020 BO_LOCK(bo);
2021 continue; /* retry */
2022 }
2023 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
2024 B_DELWRI) {
2025 bremfree(bp);
2026 bp->b_flags &= ~B_ASYNC;
2027 bwrite(bp);
2028 ++nfs_commit_miss;
2029 } else
2030 BUF_UNLOCK(bp);
2031 BO_LOCK(bo);
2032 }
2033 ++nfs_commit_blks;
2034 if (cnt < iosize)
2035 break;
2036 cnt -= iosize;
2037 ++lblkno;
2038 }
2039 BO_UNLOCK(bo);
2040 }
2041 NFSEXITCODE(error);
2042 return (error);
2043 }
2044
2045 /*
2046 * Statfs vnode op.
2047 */
2048 int
nfsvno_statfs(struct vnode * vp,struct statfs * sf)2049 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
2050 {
2051 struct statfs *tsf;
2052 int error;
2053
2054 tsf = NULL;
2055 if (nfsrv_devidcnt > 0) {
2056 /* For a pNFS service, get the DS numbers. */
2057 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
2058 error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
2059 if (error != 0) {
2060 free(tsf, M_TEMP);
2061 tsf = NULL;
2062 }
2063 }
2064 error = VFS_STATFS(vp->v_mount, sf);
2065 if (error == 0) {
2066 if (tsf != NULL) {
2067 sf->f_blocks = tsf->f_blocks;
2068 sf->f_bavail = tsf->f_bavail;
2069 sf->f_bfree = tsf->f_bfree;
2070 sf->f_bsize = tsf->f_bsize;
2071 }
2072 /*
2073 * Since NFS handles these values as unsigned on the
2074 * wire, there is no way to represent negative values,
2075 * so set them to 0. Without this, they will appear
2076 * to be very large positive values for clients like
2077 * Solaris10.
2078 */
2079 if (sf->f_bavail < 0)
2080 sf->f_bavail = 0;
2081 if (sf->f_ffree < 0)
2082 sf->f_ffree = 0;
2083 }
2084 free(tsf, M_TEMP);
2085 NFSEXITCODE(error);
2086 return (error);
2087 }
2088
2089 /*
2090 * Replenish the numfiles in .pnfshide/numfiles directory.
2091 * These files are used for the pNFS server when an Open/Create needs a
2092 * new regular file. By creating them here asynchronously, we can avoid
2093 * the delay of doing do for Open/Create, since creation requires RPCs to
2094 * the DSs be done.
2095 * (A) - When the sleep times out, work backwards creating
2096 * one new numfile for each cycle.
2097 * (Use a timeout of 10msec for now.)
2098 * (B) - When the sleep returns 0, this indicates that a
2099 * nfsd thread didn't find a numfiles. For this case
2100 * be more agressive and create numfiles going forward.
2101 * (Use a timeout of 1msec for now.)
2102 * Runs as a kernel process.
2103 */
2104 static char pnfshide_name[] = ".pnfshide";
2105 static char numfiles_name[] = "numfiles";
2106
2107 static void
nfsvno_pnfsreplenish(void * arg)2108 nfsvno_pnfsreplenish(void *arg)
2109 {
2110 struct componentname cn;
2111 struct vattr va;
2112 char name[11];
2113 struct timespec ts;
2114 struct mount *mp = (struct mount *)arg, *temp_mp;
2115 struct ucred *cred;
2116 struct vnode *numfiledvp, *vp;
2117 struct netexport *nep;
2118 uint64_t prevcnt;
2119 time_t prevsec;
2120 u_int cnt, last_back, next_back, next_forw, prevrate[4];
2121 int averate, error, i, timo;
2122 bool back, use_same_num;
2123
2124 cred = curthread->td_ucred;
2125 if (cred->cr_uid != 0)
2126 printf("nfsvno_pnfsreplenish: not root\n");
2127
2128 /*
2129 * Do a lookup for ".pnfshide" in the root dir
2130 * of the file system.
2131 */
2132 cn.cn_nameiop = LOOKUP;
2133 cn.cn_lkflags = LK_SHARED;
2134 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | NOCROSSMOUNT;
2135 cn.cn_cred = cred;
2136 cn.cn_nameptr = pnfshide_name;
2137 cn.cn_namelen = sizeof(pnfshide_name) - 1;
2138 vp = NULL;
2139 numfiledvp = NULL;
2140 error = vn_lock(mp->mnt_rootvnode, LK_SHARED);
2141 if (error == 0) {
2142 error = VOP_LOOKUP(mp->mnt_rootvnode, &vp, &cn);
2143 VOP_UNLOCK(mp->mnt_rootvnode);
2144 }
2145
2146 /*
2147 * Do a lookup for "numfiles" in the ".pnfshide" dir
2148 * of the file system.
2149 */
2150 if (error == 0) {
2151 cn.cn_nameiop = LOOKUP;
2152 cn.cn_lkflags = LK_SHARED;
2153 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | NOCROSSMOUNT;
2154 cn.cn_cred = cred;
2155 cn.cn_nameptr = numfiles_name;
2156 cn.cn_namelen = sizeof(numfiles_name) - 1;
2157 error = VOP_LOOKUP(vp, &numfiledvp, &cn);
2158 vput(vp);
2159 if (error == 0)
2160 VOP_UNLOCK(numfiledvp);
2161 }
2162 lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
2163 nep = mp->mnt_export;
2164 if (nep != NULL)
2165 (void)vfs_netexport_acquire(nep);
2166 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
2167
2168 /*
2169 * The states for ne_pnfsnumfile are as follows:
2170 * NULL - Initial state for normal operation.
2171 * PNFSD_START - Transition state when the
2172 * replenisher kernel process is starting up.
2173 * non-NULL valid pointer - Points to the directory vnode for the
2174 * "numfiles" directory.
2175 * PNFSD_STOP - Transition state when the
2176 * replenisher kernel process is shutting down.
2177 * PNFSD_STOPPED - Replenisher kernel process has stopped and
2178 * vrele()'d the "numfiles" directory vnode.
2179 */
2180 if (error != 0 || nep == NULL) {
2181 if (error == 0)
2182 error = ENOENT;
2183 goto out;
2184 }
2185 MNTEXP_LOCK(nep);
2186 KASSERT(nep->ne_pnfsnumfile == PNFSD_START ||
2187 nep->ne_pnfsnumfile == PNFSD_STOP,
2188 ("nfsvno_pnfsreplenish: ne_pnfsnumfile not PNFSD_START/STOP"));
2189 if (nep->ne_pnfsnumfile == PNFSD_START) {
2190 nep->ne_pnfsnumfile = numfiledvp;
2191 wakeup(&nep->ne_pnfsnumfile);
2192 }
2193 MNTEXP_UNLOCK(nep);
2194
2195 VATTR_NULL(&va);
2196 va.va_mode = 0644;
2197 va.va_type = VREG;
2198 timo = hz / 1000;
2199 if (timo == 0)
2200 timo = 1;
2201 cnt = 5;
2202 back = false;
2203 use_same_num = false;
2204 prevcnt = 0;
2205 for (i = 0; i < 4; i++)
2206 prevrate[i] = 1;
2207 prevsec = 0;
2208 averate = 100;
2209 last_back = next_back = next_forw = UINT_MAX;
2210
2211 /* Loop around sleeping and then doing (A) or (B) */
2212 for (;;) {
2213 /* Sample replenish rate once/sec. */
2214 getnanouptime(&ts);
2215 if (ts.tv_sec != prevsec) {
2216 /* Calculate a moving ave. of creates/sec. */
2217 prevsec = ts.tv_sec;
2218 for (i = 0; i < 3; i++)
2219 prevrate[i + 1] = prevrate[i];
2220 prevrate[0] = atomic_load_int(&nep->ne_pnfsnumcnt) -
2221 prevcnt;
2222 if (prevrate[0] < 1)
2223 prevrate[0] = 1;
2224 prevcnt = atomic_load_int(&nep->ne_pnfsnumcnt);
2225 averate = prevrate[0] * 4 / 10 + prevrate[1] * 3 / 10 +
2226 prevrate[2] * 2 / 10 + prevrate[3] / 10;
2227 if (averate < 1)
2228 averate = 1;
2229 averate *= 2;
2230 }
2231
2232 if (cnt == 0) {
2233 error = tsleep(&mp->mnt_export, PVFS, "pnfsrpl", timo);
2234 if (error == ETIMEDOUT || error == EAGAIN) {
2235 if (!back)
2236 next_back = last_back;
2237 back = true;
2238 timo = hz / averate;
2239 if (timo == 0)
2240 timo = 1;
2241 } else {
2242 if (back) {
2243 next_forw = UINT_MAX;
2244 last_back = next_back;
2245 }
2246 back = false;
2247 cnt = nfsrv_pnfsforwcnt;
2248 timo = hz / 1000;
2249 if (timo == 0)
2250 timo = 1;
2251 nfsrv_pnfsswitchforw++;
2252 }
2253 }
2254
2255 /* Check for exports having gone away. */
2256 if (mp->mnt_export == NULL)
2257 break;
2258 /* And check for replenisher being stopped. */
2259 MNTEXP_LOCK(nep);
2260 if (nep->ne_pnfsnumfile != PNFSD_START &&
2261 nep->ne_pnfsnumfile != PNFSD_STOP) {
2262 KASSERT(numfiledvp == nep->ne_pnfsnumfile,
2263 ("nfsvno_pnfsreplenish: numfiledvp changed"));
2264 MNTEXP_UNLOCK(nep);
2265 } else {
2266 MNTEXP_UNLOCK(nep);
2267 break;
2268 }
2269
2270 if (back) {
2271 /* This is (A) in this function's comment above. */
2272 error = vn_start_write(numfiledvp, &temp_mp, V_NOWAIT);
2273 if (error == 0)
2274 error = vn_lock(numfiledvp, LK_EXCLUSIVE |
2275 LK_NOWAIT);
2276 if (error != 0 && temp_mp != NULL)
2277 vn_finished_write(temp_mp);
2278 if (error == EBUSY || error == EWOULDBLOCK)
2279 continue;
2280 if (error != 0)
2281 break;
2282 if (next_back == UINT_MAX) {
2283 if (nep->ne_pnfsnextfile == 0)
2284 next_back = nfsrv_pnfsmaxnumfiles - 1;
2285 else
2286 next_back = nep->ne_pnfsnextfile - 1;
2287 } else if (!use_same_num) {
2288 if (next_back == 0)
2289 next_back = nfsrv_pnfsmaxnumfiles - 1;
2290 else
2291 next_back--;
2292 }
2293 snprintf(name, sizeof(name), "%d", next_back);
2294 } else {
2295 /* This is (B) in this function's comment, above. */
2296 vn_start_write(numfiledvp, &temp_mp, V_WAIT);
2297 error = vn_lock(numfiledvp, LK_EXCLUSIVE);
2298 if (error != 0 && temp_mp != NULL)
2299 vn_finished_write(temp_mp);
2300 if (error != 0)
2301 break;
2302 if (next_forw == UINT_MAX)
2303 next_forw = nep->ne_pnfsnextfile;
2304 else if (!use_same_num)
2305 next_forw = (next_forw + 1) %
2306 nfsrv_pnfsmaxnumfiles;
2307 snprintf(name, sizeof(name), "%d", next_forw);
2308 }
2309 use_same_num = false;
2310
2311 /* Do a lookup for the file. */
2312 cn.cn_nameiop = CREATE;
2313 cn.cn_lkflags = LK_EXCLUSIVE;
2314 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | LOCKPARENT |
2315 NOCROSSMOUNT | MAKEENTRY;
2316 cn.cn_cred = cred;
2317 cn.cn_nameptr = name;
2318 cn.cn_namelen = strlen(name);
2319 vref(numfiledvp);
2320 error = VOP_LOOKUP(numfiledvp, &vp, &cn);
2321 if (error == 0) {
2322 VOP_VPUT_PAIR(numfiledvp, &vp, true);
2323 if (temp_mp != NULL)
2324 vn_finished_write(temp_mp);
2325 if (back) {
2326 last_back = next_back = UINT_MAX;
2327 timo = hz / 10;
2328 if (timo == 0)
2329 timo = 1;
2330 } else {
2331 timo = hz / 100;
2332 if (timo == 0)
2333 timo = 1;
2334 }
2335 cnt = 0;
2336 continue;
2337 } else if (error != ENOENT && error != EJUSTRETURN) {
2338 VOP_VPUT_PAIR(numfiledvp, NULL, true);
2339 if (temp_mp != NULL)
2340 vn_finished_write(temp_mp);
2341 if (error == ERELOOKUP) {
2342 use_same_num = true;
2343 continue;
2344 }
2345 printf("nfsvno_pnfsreplenish: lookup failed %d\n",
2346 error);
2347 break;
2348 }
2349
2350 /* Create the numfile and its DS file(s). */
2351 error = VOP_CREATE(numfiledvp, &vp, &cn, &va);
2352 if (error == 0) {
2353 /*
2354 * Create a data file on a DS for a pNFS
2355 * server. This function just returns if
2356 * not running a pNFS DS or the creation
2357 * fails.
2358 */
2359 nfsrv_pnfscreate(vp, &va, cred, curthread);
2360 } else
2361 printf("nfsvno_pnfsreplenish: vop_create failed %d\n",
2362 error);
2363 VOP_VPUT_PAIR(numfiledvp, error == 0 ? &vp : NULL, true);
2364 if (temp_mp != NULL)
2365 vn_finished_write(temp_mp);
2366 cnt = cnt > 0 ? cnt - 1 : 0;
2367 }
2368 out:
2369 if (numfiledvp != NULL)
2370 vrele(numfiledvp);
2371 if (nep != NULL) {
2372 MNTEXP_LOCK(nep);
2373 nep->ne_pnfsnumfile = PNFSD_STOPPED;
2374 wakeup(&mp->mnt_explock);
2375 MNTEXP_UNLOCK(nep);
2376 vfs_netexport_release(nep);
2377 } else
2378 wakeup(&mp->mnt_explock);
2379 kproc_exit(0);
2380 }
2381
2382 /*
2383 * Do a lookup of a file in the .numfiles directory.
2384 * If successful, use VOP_SETATTR() to set the uid/gid/mode and
2385 * then VOP_LINK()/VOP_REMOVE() the num file.
2386 * Return ENOENT to indicate that nfsvno_open() should fall back to
2387 * doing VOP_CREATE(), other errors for failure.
2388 * XXX This code probably is not correct for a stacked file
2389 * system, but should never be used for that case.
2390 */
2391 static int
nfsvno_pnfsusenumfile(struct nameidata * ndp,struct vattr * vap)2392 nfsvno_pnfsusenumfile(struct nameidata *ndp, struct vattr *vap)
2393 {
2394 struct componentname cn;
2395 struct vattr va;
2396 char name[11];
2397 gid_t gid;
2398 struct ucred *cred, *savcred;
2399 struct vnode *numfiledvp;
2400 struct mount *mp;
2401 struct netexport *nep;
2402 u_int nextf;
2403 int error;
2404
2405 cred = newnfs_getcred();
2406 /*
2407 * Not sure if this is necessary. If all VOP calls use
2408 * cn_cred, it is not.
2409 */
2410 savcred = curthread->td_ucred;
2411 curthread->td_ucred = cred;
2412
2413 /*
2414 * If the replenish kernel process is not yet running,
2415 * start it up now.
2416 */
2417 numfiledvp = NULL;
2418 ndp->ni_vp = NULL;
2419 mp = ndp->ni_dvp->v_mount;
2420 lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
2421 nep = mp->mnt_export;
2422 if (nep == NULL) {
2423 error = ENOENT;
2424 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
2425 goto out;
2426 }
2427 (void)vfs_netexport_acquire(nep);
2428 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
2429 MNTEXP_LOCK(nep);
2430 if (nep->ne_pnfsnumfile == NULL) {
2431 /* Mark kernel process startup in-progress. */
2432 nep->ne_pnfsnumfile = PNFSD_START;
2433 MNTEXP_UNLOCK(nep);
2434
2435 /* Create the replenish kernel process. */
2436 error = kproc_create(nfsvno_pnfsreplenish, mp, NULL, RFHIGHPID,
2437 0, "pnfsreplenish");
2438 if (error != 0) {
2439 printf("nfsvno_pnfsusenumfile: replenish won't start"
2440 " %d\n", error);
2441 error = ENOENT;
2442 goto out;
2443 }
2444
2445 /* And wait for it to set up ne_pnfsnumfile. */
2446 MNTEXP_LOCK(nep);
2447 (void)msleep(&nep->ne_pnfsnumfile, MNTEXP_MTX(nep), PVFS,
2448 "pnfsnumf", hz);
2449 }
2450
2451 if (nep->ne_pnfsnumfile == NULL ||
2452 nep->ne_pnfsnumfile == PNFSD_START ||
2453 nep->ne_pnfsnumfile == PNFSD_STOP) {
2454 MNTEXP_UNLOCK(nep);
2455 error = ENOENT;
2456 goto out;
2457 } else {
2458 numfiledvp = nep->ne_pnfsnumfile;
2459 MNTEXP_UNLOCK(nep);
2460 /*
2461 * Check to ensure the new file is not in ".pnfshide/numfiles".
2462 */
2463 if (numfiledvp == ndp->ni_dvp) {
2464 error = ENOENT;
2465 numfiledvp = NULL;
2466 goto out;
2467 }
2468 }
2469
2470 error = vn_lock(numfiledvp, LK_EXCLUSIVE);
2471 if (error != 0) {
2472 error = ENOENT;
2473 numfiledvp = NULL;
2474 goto out;
2475 }
2476 vref(numfiledvp);
2477
2478 /* Get the next filenum. */
2479 nextf = nep->ne_pnfsnextfile;
2480 snprintf(name, sizeof(name), "%d", nextf);
2481
2482 /* Now, look up the numbered file. */
2483 cn.cn_nameiop = DELETE;
2484 cn.cn_lkflags = LK_EXCLUSIVE;
2485 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | LOCKPARENT |
2486 NOCROSSMOUNT;
2487 cn.cn_cred = cred;
2488 cn.cn_nameptr = name;
2489 cn.cn_namelen = strlen(name);
2490 error = VOP_LOOKUP(numfiledvp, &ndp->ni_vp, &cn);
2491 if (error != 0) {
2492 nfsrv_pnfsnumfilemiss++;
2493 VOP_UNLOCK(numfiledvp);
2494 ndp->ni_vp = NULL;
2495 if (error == ENOENT || error == EJUSTRETURN)
2496 wakeup(&mp->mnt_export);
2497 else
2498 VOP_VPUT_PAIR(ndp->ni_dvp, NULL, true);
2499 goto out;
2500 }
2501
2502 /*
2503 * Set the new file's attributes to what VOP_CREATE() would
2504 * have set them to.
2505 */
2506 gid = GID_NOGROUP;
2507 if (vap->va_gid == VNOVAL &&
2508 VOP_GETATTR(ndp->ni_dvp, &va, cred) == 0)
2509 gid = va.va_gid;
2510 VATTR_NULL(&va);
2511 va.va_gid = gid;
2512 va.va_uid = ndp->ni_cnd.cn_cred->cr_uid;
2513 va.va_mode = vap->va_mode;
2514 error = VOP_SETATTR(ndp->ni_vp, &va, cred);
2515 if (error != 0) {
2516 VOP_UNLOCK(numfiledvp);
2517 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, true);
2518 ndp->ni_vp = NULL;
2519 printf("nfsvno_pnfsusenumfile: setattr failed %d\n",
2520 error);
2521 if (error == ENOENT)
2522 error = ENXIO;
2523 goto out;
2524 }
2525
2526 /*
2527 * Link the numbered file to the name VOP_CREATE() would have
2528 * created in the correct directory and then VOP_REMOVE() the
2529 * numbered file.
2530 * Use VOP_LINK()/VOP_REMOVE() so that the numbered file
2531 * directory can remain locked.
2532 */
2533 error = VOP_LINK(ndp->ni_dvp, ndp->ni_vp, &ndp->ni_cnd);
2534 /* Remove the file in .numfiles. */
2535 if (error == 0) {
2536 nep->ne_pnfsnextfile = (nextf + 1) %
2537 nfsrv_pnfsmaxnumfiles;
2538 error = VOP_REMOVE(numfiledvp, ndp->ni_vp, &cn);
2539 if (error != 0) {
2540 /* Shut down the numfiles stuff. */
2541 MNTEXP_LOCK(nep);
2542 nep->ne_pnfsnumfile = PNFSD_STOP;
2543 MNTEXP_UNLOCK(nep);
2544 printf("nfsvno_pnfsusenumfile: remove failed "
2545 "%d %s\n", error, name);
2546 }
2547 }
2548 VOP_UNLOCK(numfiledvp);
2549 if (error != 0) {
2550 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, true);
2551 ndp->ni_vp = NULL;
2552 } else {
2553 atomic_add_int(&nep->ne_pnfsnumcnt, 1);
2554 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, false);
2555 }
2556 if (error == ENOENT)
2557 error = ENXIO;
2558
2559 out:
2560 if (numfiledvp != NULL)
2561 vrele(numfiledvp);
2562 if (nep != NULL)
2563 vfs_netexport_release(nep);
2564 curthread->td_ucred = savcred; /* Reset the thread's cred. */
2565 NFSFREECRED(cred);
2566 return (error);
2567 }
2568
2569 /*
2570 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
2571 * must handle nfsrv_opencheck() calls after any other access checks.
2572 */
2573 void
nfsvno_open(struct nfsrv_descript * nd,struct nameidata * ndp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * stp,int * exclusive_flagp,struct nfsvattr * nvap,int32_t * cverf,int create,NFSACL_T * aclp,NFSACL_T * daclp,nfsattrbit_t * attrbitp,struct ucred * cred,bool done_namei,struct nfsexstuff * exp,struct vnode ** vpp)2574 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
2575 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
2576 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
2577 NFSACL_T *aclp, NFSACL_T *daclp, nfsattrbit_t *attrbitp, struct ucred *cred,
2578 bool done_namei, struct nfsexstuff *exp, struct vnode **vpp)
2579 {
2580 struct vattr va;
2581 struct vnode *vp = NULL;
2582 u_quad_t tempsize;
2583 struct nfsexstuff nes;
2584 struct thread *p = curthread;
2585 uint32_t oldrepstat;
2586 u_long savflags;
2587 int error;
2588
2589 if (ndp->ni_vp == NULL) {
2590 /*
2591 * If nfsrv_opencheck() sets nd_repstat, done_namei needs to be
2592 * set true, since cleanup after nfsvno_namei() is needed.
2593 */
2594 oldrepstat = nd->nd_repstat;
2595 nd->nd_repstat = nfsrv_opencheck(clientid,
2596 stateidp, stp, NULL, nd, p, nd->nd_repstat);
2597 if (nd->nd_repstat != 0 && oldrepstat == 0)
2598 done_namei = true;
2599 }
2600 if (!nd->nd_repstat) {
2601 if (ndp->ni_vp == NULL) {
2602 struct sockaddr_in *sin;
2603 struct sockaddr_in6 *sin6;
2604 bool try_pnfs;
2605
2606 sin = (struct sockaddr_in *)nd->nd_nam;
2607 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
2608 error = ENOENT;
2609 try_pnfs = !TAILQ_EMPTY(&nfsrv_devidhead);
2610
2611 if (try_pnfs && !(sin->sin_family == AF_INET &&
2612 IN_LOOPBACK(ntohl(sin->sin_addr.s_addr))) &&
2613 !(sin6->sin6_family == AF_INET6 &&
2614 IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
2615 error = nfsvno_pnfsusenumfile(ndp,
2616 &nvap->na_vattr);
2617 if (error != ENOENT)
2618 nd->nd_repstat = error;
2619 }
2620 if (error == ENOENT) {
2621 /*
2622 * Most file systems ignore va_flags for
2623 * VOP_CREATE(), however setting va_flags
2624 * for VOP_CREATE() causes problems for ZFS.
2625 * So disable them and let nfsrv_fixattr()
2626 * do them, as required.
2627 */
2628 savflags = nvap->na_flags;
2629 nvap->na_flags = VNOVAL;
2630 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
2631 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
2632 if (try_pnfs && nd->nd_repstat == 0) {
2633 /*
2634 * Create a data file on a DS for a pNFS
2635 * server. This function just returns if
2636 * not running a pNFS DS or the creation
2637 * fails.
2638 */
2639 nfsrv_pnfscreate(ndp->ni_vp,
2640 &nvap->na_vattr, cred, p);
2641 }
2642 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ?
2643 &ndp->ni_vp : NULL, false);
2644 nvap->na_flags = savflags;
2645 }
2646 nfsvno_relpathbuf(ndp);
2647 if (!nd->nd_repstat) {
2648 if (*exclusive_flagp != NFSV4_EXCLUSIVE_NONE) {
2649 VATTR_NULL(&va);
2650 va.va_atime.tv_sec = cverf[0];
2651 va.va_atime.tv_nsec = cverf[1];
2652 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
2653 &va, cred);
2654 if (nd->nd_repstat != 0) {
2655 vput(ndp->ni_vp);
2656 ndp->ni_vp = NULL;
2657 nd->nd_repstat = NFSERR_NOTSUPP;
2658 } else {
2659 /*
2660 * Few clients set these
2661 * attributes in Open/Create
2662 * Exclusive_41. If this
2663 * changes, this should include
2664 * setting atime, instead of
2665 * the above.
2666 */
2667 if (*exclusive_flagp ==
2668 NFSV4_EXCLUSIVE_41 &&
2669 (NFSISSET_ATTRBIT(attrbitp,
2670 NFSATTRBIT_OWNER) ||
2671 NFSISSET_ATTRBIT(attrbitp,
2672 NFSATTRBIT_OWNERGROUP) ||
2673 NFSISSET_ATTRBIT(attrbitp,
2674 NFSATTRBIT_TIMEMODIFYSET)||
2675 NFSISSET_ATTRBIT(attrbitp,
2676 NFSATTRBIT_ARCHIVE) ||
2677 NFSISSET_ATTRBIT(attrbitp,
2678 NFSATTRBIT_HIDDEN) ||
2679 NFSISSET_ATTRBIT(attrbitp,
2680 NFSATTRBIT_SYSTEM) ||
2681 aclp != NULL ||
2682 daclp != NULL))
2683 nfsrv_fixattr(nd,
2684 ndp->ni_vp, nvap,
2685 aclp, daclp, p,
2686 attrbitp, true);
2687 NFSSETBIT_ATTRBIT(attrbitp,
2688 NFSATTRBIT_TIMEACCESS);
2689 }
2690 *exclusive_flagp = NFSV4_EXCLUSIVE_NONE;
2691 } else {
2692 nfsrv_fixattr(nd, ndp->ni_vp, nvap,
2693 aclp, daclp, p, attrbitp, false);
2694 }
2695 }
2696 vp = ndp->ni_vp;
2697 } else {
2698 nfsvno_relpathbuf(ndp);
2699 vp = ndp->ni_vp;
2700 if (create == NFSV4OPEN_CREATE) {
2701 if (ndp->ni_dvp == vp)
2702 vrele(ndp->ni_dvp);
2703 else
2704 vput(ndp->ni_dvp);
2705 }
2706 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
2707 if (ndp->ni_cnd.cn_flags & RDONLY)
2708 NFSVNO_SETEXRDONLY(&nes);
2709 else
2710 NFSVNO_EXINIT(&nes);
2711 nd->nd_repstat = nfsvno_accchk(vp,
2712 VWRITE, cred, &nes, p,
2713 NFSACCCHK_NOOVERRIDE,
2714 NFSACCCHK_VPISLOCKED, NULL);
2715 nd->nd_repstat = nfsrv_opencheck(clientid,
2716 stateidp, stp, vp, nd, p, nd->nd_repstat);
2717 if (!nd->nd_repstat) {
2718 tempsize = nvap->na_size;
2719 NFSVNO_ATTRINIT(nvap);
2720 nvap->na_size = tempsize;
2721 nd->nd_repstat = nfsvno_setattr(vp,
2722 nvap, cred, p, exp);
2723 }
2724 } else if (vp->v_type == VREG) {
2725 nd->nd_repstat = nfsrv_opencheck(clientid,
2726 stateidp, stp, vp, nd, p, nd->nd_repstat);
2727 }
2728 }
2729 } else if (done_namei) {
2730 KASSERT(create == NFSV4OPEN_CREATE,
2731 ("nfsvno_open: not create"));
2732 /*
2733 * done_namei is set when nfsvno_namei() has completed
2734 * successfully, but a subsequent error was set in
2735 * nd_repstat. As such, cleanup of the nfsvno_namei()
2736 * results is required.
2737 */
2738 nfsvno_relpathbuf(ndp);
2739 if (ndp->ni_dvp == ndp->ni_vp)
2740 vrele(ndp->ni_dvp);
2741 else
2742 vput(ndp->ni_dvp);
2743 if (ndp->ni_vp)
2744 vput(ndp->ni_vp);
2745 }
2746 *vpp = vp;
2747
2748 NFSEXITCODE2(0, nd);
2749 }
2750
2751 /*
2752 * Updates the file rev and sets the mtime and ctime
2753 * to the current clock time, returning the va_filerev and va_Xtime
2754 * values.
2755 * Return ESTALE to indicate the vnode is VIRF_DOOMED.
2756 */
2757 int
nfsvno_updfilerev(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p)2758 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
2759 struct nfsrv_descript *nd, struct thread *p)
2760 {
2761 struct vattr va;
2762
2763 VATTR_NULL(&va);
2764 vfs_timestamp(&va.va_mtime);
2765 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
2766 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
2767 if (VN_IS_DOOMED(vp))
2768 return (ESTALE);
2769 }
2770 (void) VOP_SETATTR(vp, &va, nd->nd_cred);
2771 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
2772 return (0);
2773 }
2774
2775 /*
2776 * Glue routine to nfsv4_fillattr().
2777 */
2778 int
nfsvno_fillattr(struct nfsrv_descript * nd,struct mount * mp,struct vnode * vp,struct nfsvattr * nvap,fhandle_t * fhp,int rderror,nfsattrbit_t * attrbitp,struct ucred * cred,struct thread * p,int isdgram,int reterr,int supports_nfsv4acls,int at_root,uint64_t mounted_on_fileno,bool xattrsupp,bool has_hiddensystem,bool has_namedattr,uint32_t clone_blksize,bool has_caseinsensitive)2779 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
2780 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
2781 struct ucred *cred, struct thread *p, int isdgram, int reterr,
2782 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno,
2783 bool xattrsupp, bool has_hiddensystem, bool has_namedattr,
2784 uint32_t clone_blksize, bool has_caseinsensitive)
2785 {
2786 struct statfs *sf;
2787 int error;
2788
2789 sf = NULL;
2790 if (nfsrv_devidcnt > 0 &&
2791 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
2792 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
2793 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
2794 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
2795 error = nfsrv_pnfsstatfs(sf, mp);
2796 if (error != 0) {
2797 free(sf, M_TEMP);
2798 sf = NULL;
2799 }
2800 }
2801
2802 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
2803 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
2804 mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr,
2805 clone_blksize, NULL, has_caseinsensitive);
2806 free(sf, M_TEMP);
2807 NFSEXITCODE2(0, nd);
2808 return (error);
2809 }
2810
2811 /*
2812 * Convert a dirent d_type to a vnode type.
2813 */
nfs_dtypetovtype(struct nfsvattr * nvap,struct vnode * vp,uint8_t dtype)2814 static void nfs_dtypetovtype(struct nfsvattr *nvap, struct vnode *vp,
2815 uint8_t dtype)
2816 {
2817
2818 if ((vn_irflag_read(vp) & VIRF_NAMEDDIR) != 0) {
2819 nvap->na_type = VREG;
2820 nvap->na_bsdflags |= SFBSD_NAMEDATTR;
2821 } else if (dtype <= DT_WHT) {
2822 nvap->na_type = dtype_to_vnode[dtype];
2823 } else {
2824 nvap->na_type = VNON;
2825 }
2826 }
2827
2828 /* Since the Readdir vnode ops vary, put the entire functions in here. */
2829 /*
2830 * nfs readdir service
2831 * - mallocs what it thinks is enough to read
2832 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
2833 * - calls VOP_READDIR()
2834 * - loops around building the reply
2835 * if the output generated exceeds count break out of loop
2836 * The NFSM_CLGET macro is used here so that the reply will be packed
2837 * tightly in mbuf clusters.
2838 * - it trims out records with d_fileno == 0
2839 * this doesn't matter for Unix clients, but they might confuse clients
2840 * for other os'.
2841 * - it trims out records with d_type == DT_WHT
2842 * these cannot be seen through NFS (unless we extend the protocol)
2843 * The alternate call nfsrvd_readdirplus() does lookups as well.
2844 * PS: The NFS protocol spec. does not clarify what the "count" byte
2845 * argument is a count of.. just name strings and file id's or the
2846 * entire reply rpc or ...
2847 * I tried just file name and id sizes and it confused the Sun client,
2848 * so I am using the full rpc size now. The "paranoia.." comment refers
2849 * to including the status longwords that are not a part of the dir.
2850 * "entry" structures, but are in the rpc.
2851 */
2852 int
nfsrvd_readdir(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)2853 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
2854 struct vnode *vp, struct nfsexstuff *exp)
2855 {
2856 struct dirent *dp;
2857 u_int32_t *tl;
2858 int dirlen;
2859 char *cpos, *cend, *rbuf;
2860 struct nfsvattr at;
2861 int nlen, error = 0, getret = 1;
2862 int siz, cnt, fullsiz, eofflag, ncookies;
2863 u_int64_t off, toff, verf __unused;
2864 uint64_t *cookies = NULL, *cookiep;
2865 struct uio io;
2866 struct iovec iv;
2867 int is_ufs;
2868 struct thread *p = curthread;
2869
2870 if (nd->nd_repstat) {
2871 nfsrv_postopattr(nd, getret, &at);
2872 goto out;
2873 }
2874 if (nd->nd_flag & ND_NFSV2) {
2875 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2876 off = fxdr_unsigned(u_quad_t, *tl++);
2877 } else {
2878 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2879 off = fxdr_hyper(tl);
2880 tl += 2;
2881 verf = fxdr_hyper(tl);
2882 tl += 2;
2883 }
2884 toff = off;
2885 cnt = fxdr_unsigned(int, *tl);
2886 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
2887 cnt = NFS_SRVMAXDATA(nd);
2888 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2889 fullsiz = siz;
2890 if (nd->nd_flag & ND_NFSV3) {
2891 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
2892 NULL);
2893 #if 0
2894 /*
2895 * va_filerev is not sufficient as a cookie verifier,
2896 * since it is not supposed to change when entries are
2897 * removed/added unless that offset cookies returned to
2898 * the client are no longer valid.
2899 */
2900 if (!nd->nd_repstat && toff && verf != at.na_filerev)
2901 nd->nd_repstat = NFSERR_BAD_COOKIE;
2902 #endif
2903 }
2904 if (!nd->nd_repstat && vp->v_type != VDIR)
2905 nd->nd_repstat = NFSERR_NOTDIR;
2906 if (nd->nd_repstat == 0 && cnt == 0) {
2907 if (nd->nd_flag & ND_NFSV2)
2908 /* NFSv2 does not have NFSERR_TOOSMALL */
2909 nd->nd_repstat = EPERM;
2910 else
2911 nd->nd_repstat = NFSERR_TOOSMALL;
2912 }
2913 if (!nd->nd_repstat)
2914 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
2915 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
2916 NFSACCCHK_VPISLOCKED, NULL);
2917 if (nd->nd_repstat) {
2918 vput(vp);
2919 if (nd->nd_flag & ND_NFSV3)
2920 nfsrv_postopattr(nd, getret, &at);
2921 goto out;
2922 }
2923 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2924 rbuf = malloc(siz, M_TEMP, M_WAITOK);
2925 again:
2926 eofflag = 0;
2927 if (cookies) {
2928 free(cookies, M_TEMP);
2929 cookies = NULL;
2930 }
2931
2932 iv.iov_base = rbuf;
2933 iv.iov_len = siz;
2934 io.uio_iov = &iv;
2935 io.uio_iovcnt = 1;
2936 io.uio_offset = (off_t)off;
2937 io.uio_resid = siz;
2938 io.uio_segflg = UIO_SYSSPACE;
2939 io.uio_rw = UIO_READ;
2940 io.uio_td = NULL;
2941 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
2942 &cookies);
2943 off = (u_int64_t)io.uio_offset;
2944 if (io.uio_resid)
2945 siz -= io.uio_resid;
2946
2947 if (!cookies && !nd->nd_repstat)
2948 nd->nd_repstat = NFSERR_PERM;
2949 if (nd->nd_flag & ND_NFSV3) {
2950 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2951 if (!nd->nd_repstat)
2952 nd->nd_repstat = getret;
2953 }
2954
2955 /*
2956 * Handles the failed cases. nd->nd_repstat == 0 past here.
2957 */
2958 if (nd->nd_repstat) {
2959 vput(vp);
2960 free(rbuf, M_TEMP);
2961 if (cookies)
2962 free(cookies, M_TEMP);
2963 if (nd->nd_flag & ND_NFSV3)
2964 nfsrv_postopattr(nd, getret, &at);
2965 goto out;
2966 }
2967 /*
2968 * If nothing read, return eof
2969 * rpc reply
2970 */
2971 if (siz == 0) {
2972 vput(vp);
2973 if (nd->nd_flag & ND_NFSV2) {
2974 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2975 } else {
2976 nfsrv_postopattr(nd, getret, &at);
2977 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2978 txdr_hyper(at.na_filerev, tl);
2979 tl += 2;
2980 }
2981 *tl++ = newnfs_false;
2982 *tl = newnfs_true;
2983 free(rbuf, M_TEMP);
2984 free(cookies, M_TEMP);
2985 goto out;
2986 }
2987
2988 /*
2989 * Check for degenerate cases of nothing useful read.
2990 * If so go try again
2991 */
2992 cpos = rbuf;
2993 cend = rbuf + siz;
2994 dp = (struct dirent *)cpos;
2995 cookiep = cookies;
2996
2997 /*
2998 * For some reason FreeBSD's ufs_readdir() chooses to back the
2999 * directory offset up to a block boundary, so it is necessary to
3000 * skip over the records that precede the requested offset. This
3001 * requires the assumption that file offset cookies monotonically
3002 * increase.
3003 */
3004 while (cpos < cend && ncookies > 0 &&
3005 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3006 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
3007 cpos += dp->d_reclen;
3008 dp = (struct dirent *)cpos;
3009 cookiep++;
3010 ncookies--;
3011 }
3012 if (cpos >= cend || ncookies == 0) {
3013 siz = fullsiz;
3014 toff = off;
3015 goto again;
3016 }
3017 vput(vp);
3018
3019 /*
3020 * If cnt > MCLBYTES and the reply will not be saved, use
3021 * ext_pgs mbufs for TLS.
3022 * For NFSv4.0, we do not know for sure if the reply will
3023 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
3024 */
3025 if (cnt > MCLBYTES && siz > MCLBYTES &&
3026 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
3027 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
3028 nd->nd_flag |= ND_EXTPG;
3029
3030 /*
3031 * dirlen is the size of the reply, including all XDR and must
3032 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
3033 * if the XDR should be included in "count", but to be safe, we do.
3034 * (Include the two booleans at the end of the reply in dirlen now.)
3035 */
3036 if (nd->nd_flag & ND_NFSV3) {
3037 nfsrv_postopattr(nd, getret, &at);
3038 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3039 txdr_hyper(at.na_filerev, tl);
3040 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
3041 } else {
3042 dirlen = 2 * NFSX_UNSIGNED;
3043 }
3044
3045 /* Loop through the records and build reply */
3046 while (cpos < cend && ncookies > 0) {
3047 nlen = dp->d_namlen;
3048 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
3049 nlen <= NFS_MAXNAMLEN) {
3050 if (nd->nd_flag & ND_NFSV3)
3051 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
3052 else
3053 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
3054 if (dirlen > cnt) {
3055 eofflag = 0;
3056 break;
3057 }
3058
3059 /*
3060 * Build the directory record xdr from
3061 * the dirent entry.
3062 */
3063 if (nd->nd_flag & ND_NFSV3) {
3064 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3065 *tl++ = newnfs_true;
3066 txdr_hyper(dp->d_fileno, tl);
3067 } else {
3068 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3069 *tl++ = newnfs_true;
3070 *tl = txdr_unsigned(dp->d_fileno);
3071 }
3072 (void) nfsm_strtom(nd, dp->d_name, nlen);
3073 if (nd->nd_flag & ND_NFSV3) {
3074 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3075 txdr_hyper(*cookiep, tl);
3076 } else {
3077 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3078 *tl = txdr_unsigned(*cookiep);
3079 }
3080 }
3081 cpos += dp->d_reclen;
3082 dp = (struct dirent *)cpos;
3083 cookiep++;
3084 ncookies--;
3085 }
3086 if (cpos < cend)
3087 eofflag = 0;
3088 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3089 *tl++ = newnfs_false;
3090 if (eofflag)
3091 *tl = newnfs_true;
3092 else
3093 *tl = newnfs_false;
3094 free(rbuf, M_TEMP);
3095 free(cookies, M_TEMP);
3096
3097 out:
3098 NFSEXITCODE2(0, nd);
3099 return (0);
3100 nfsmout:
3101 vput(vp);
3102 NFSEXITCODE2(error, nd);
3103 return (error);
3104 }
3105
3106 /*
3107 * Readdirplus for V3 and Readdir for V4.
3108 */
3109 int
nfsrvd_readdirplus(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)3110 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
3111 struct vnode *vp, struct nfsexstuff *exp)
3112 {
3113 struct dirent *dp;
3114 uint32_t clone_blksize, *tl;
3115 int dirlen;
3116 char *cpos, *cend, *rbuf;
3117 struct vnode *nvp;
3118 fhandle_t nfh;
3119 struct nfsvattr nva, at, *nvap = &nva;
3120 struct mbuf *mb0, *mb1;
3121 struct nfsreferral *refp;
3122 int nlen, r, error = 0, getret = 1, ret, usevget = 1;
3123 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
3124 caddr_t bpos0, bpos1;
3125 u_int64_t off, toff, verf __unused;
3126 uint64_t *cookies = NULL, *cookiep;
3127 nfsattrbit_t attrbits, rderrbits, savbits, refbits;
3128 struct uio io;
3129 struct iovec iv;
3130 struct componentname cn;
3131 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
3132 struct mount *mp, *new_mp;
3133 uint64_t mounted_on_fileno;
3134 struct thread *p = curthread;
3135 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
3136 size_t atsiz;
3137 long pathval;
3138 bool has_caseinsensitive, has_hiddensystem, has_namedattr, xattrsupp;
3139
3140 NFSZERO_ATTRBIT(&savbits); /* Shut up gcc. */
3141 if (nd->nd_repstat) {
3142 nfsrv_postopattr(nd, getret, &at);
3143 goto out;
3144 }
3145 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3146 off = fxdr_hyper(tl);
3147 toff = off;
3148 tl += 2;
3149 verf = fxdr_hyper(tl);
3150 tl += 2;
3151 siz = fxdr_unsigned(int, *tl++);
3152 cnt = fxdr_unsigned(int, *tl);
3153
3154 /*
3155 * Use the server's maximum data transfer size as the upper bound
3156 * on reply datalen.
3157 */
3158 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
3159 cnt = NFS_SRVMAXDATA(nd);
3160
3161 /*
3162 * siz is a "hint" of how much directory information (name, fileid,
3163 * cookie) should be in the reply. At least one client "hints" 0,
3164 * so I set it to cnt for that case. I also round it up to the
3165 * next multiple of DIRBLKSIZ.
3166 * Since the size of a Readdirplus directory entry reply will always
3167 * be greater than a directory entry returned by VOP_READDIR(), it
3168 * does not make sense to read more than NFS_SRVMAXDATA() via
3169 * VOP_READDIR().
3170 */
3171 if (siz <= 0)
3172 siz = cnt;
3173 else if (siz > NFS_SRVMAXDATA(nd))
3174 siz = NFS_SRVMAXDATA(nd);
3175 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3176
3177 if (nd->nd_flag & ND_NFSV4) {
3178 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3179 if (error)
3180 goto nfsmout;
3181 NFSSET_ATTRBIT(&savbits, &attrbits);
3182 NFSSET_ATTRBIT(&refbits, &attrbits);
3183 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
3184 NFSZERO_ATTRBIT(&rderrbits);
3185 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
3186 /*
3187 * If these 4 bits are the only attributes requested by the
3188 * client, they can be satisfied without acquiring the vnode
3189 * for the file object unless it is a directory.
3190 * This will be indicated by savbits being all 0s.
3191 */
3192 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_TYPE);
3193 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_FILEID);
3194 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_MOUNTEDONFILEID);
3195 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_RDATTRERROR);
3196 } else {
3197 NFSZERO_ATTRBIT(&attrbits);
3198 }
3199 fullsiz = siz;
3200 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
3201 #if 0
3202 if (!nd->nd_repstat) {
3203 if (off && verf != at.na_filerev) {
3204 /*
3205 * va_filerev is not sufficient as a cookie verifier,
3206 * since it is not supposed to change when entries are
3207 * removed/added unless that offset cookies returned to
3208 * the client are no longer valid.
3209 */
3210 if (nd->nd_flag & ND_NFSV4) {
3211 nd->nd_repstat = NFSERR_NOTSAME;
3212 } else {
3213 nd->nd_repstat = NFSERR_BAD_COOKIE;
3214 }
3215 }
3216 }
3217 #endif
3218 if (!nd->nd_repstat && vp->v_type != VDIR)
3219 nd->nd_repstat = NFSERR_NOTDIR;
3220 if (!nd->nd_repstat && cnt == 0)
3221 nd->nd_repstat = NFSERR_TOOSMALL;
3222 if (!nd->nd_repstat)
3223 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
3224 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
3225 NFSACCCHK_VPISLOCKED, NULL);
3226 if (nd->nd_repstat) {
3227 vput(vp);
3228 if (nd->nd_flag & ND_NFSV3)
3229 nfsrv_postopattr(nd, getret, &at);
3230 goto out;
3231 }
3232 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
3233 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
3234
3235 rbuf = malloc(siz, M_TEMP, M_WAITOK);
3236 again:
3237 eofflag = 0;
3238 if (cookies) {
3239 free(cookies, M_TEMP);
3240 cookies = NULL;
3241 }
3242
3243 iv.iov_base = rbuf;
3244 iv.iov_len = siz;
3245 io.uio_iov = &iv;
3246 io.uio_iovcnt = 1;
3247 io.uio_offset = (off_t)off;
3248 io.uio_resid = siz;
3249 io.uio_segflg = UIO_SYSSPACE;
3250 io.uio_rw = UIO_READ;
3251 io.uio_td = NULL;
3252 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
3253 &cookies);
3254 off = (u_int64_t)io.uio_offset;
3255 if (io.uio_resid)
3256 siz -= io.uio_resid;
3257
3258 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
3259
3260 if (!cookies && !nd->nd_repstat)
3261 nd->nd_repstat = NFSERR_PERM;
3262 if (!nd->nd_repstat)
3263 nd->nd_repstat = getret;
3264 if (nd->nd_repstat) {
3265 vput(vp);
3266 if (cookies)
3267 free(cookies, M_TEMP);
3268 free(rbuf, M_TEMP);
3269 if (nd->nd_flag & ND_NFSV3)
3270 nfsrv_postopattr(nd, getret, &at);
3271 goto out;
3272 }
3273 /*
3274 * If nothing read, return eof
3275 * rpc reply
3276 */
3277 if (siz == 0) {
3278 ateof:
3279 vput(vp);
3280 if (nd->nd_flag & ND_NFSV3)
3281 nfsrv_postopattr(nd, getret, &at);
3282 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3283 txdr_hyper(at.na_filerev, tl);
3284 tl += 2;
3285 *tl++ = newnfs_false;
3286 *tl = newnfs_true;
3287 free(cookies, M_TEMP);
3288 free(rbuf, M_TEMP);
3289 goto out;
3290 }
3291
3292 /*
3293 * Check for degenerate cases of nothing useful read.
3294 * If so go try again
3295 */
3296 cpos = rbuf;
3297 cend = rbuf + siz;
3298 dp = (struct dirent *)cpos;
3299 cookiep = cookies;
3300
3301 /*
3302 * For some reason FreeBSD's ufs_readdir() chooses to back the
3303 * directory offset up to a block boundary, so it is necessary to
3304 * skip over the records that precede the requested offset. This
3305 * requires the assumption that file offset cookies monotonically
3306 * increase.
3307 */
3308 while (cpos < cend && ncookies > 0 &&
3309 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3310 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
3311 ((nd->nd_flag & ND_NFSV4) &&
3312 ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
3313 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
3314 cpos += dp->d_reclen;
3315 dp = (struct dirent *)cpos;
3316 cookiep++;
3317 ncookies--;
3318 }
3319 if (cpos >= cend || ncookies == 0) {
3320 if (eofflag != 0)
3321 goto ateof;
3322 siz = fullsiz;
3323 toff = off;
3324 goto again;
3325 }
3326
3327 /*
3328 * Busy the file system so that the mount point won't go away
3329 * and, as such, VFS_VGET() can be used safely.
3330 */
3331 mp = vp->v_mount;
3332 vfs_ref(mp);
3333 NFSVOPUNLOCK(vp);
3334 nd->nd_repstat = vfs_busy(mp, 0);
3335 vfs_rel(mp);
3336 if (nd->nd_repstat != 0) {
3337 vrele(vp);
3338 free(cookies, M_TEMP);
3339 free(rbuf, M_TEMP);
3340 if (nd->nd_flag & ND_NFSV3)
3341 nfsrv_postopattr(nd, getret, &at);
3342 goto out;
3343 }
3344
3345 /*
3346 * Check to see if entries in this directory can be safely acquired
3347 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
3348 * ZFS snapshot directories need VOP_LOOKUP(), so that any
3349 * automount of the snapshot directory that is required will
3350 * be done.
3351 * This needs to be done here for NFSv4, since NFSv4 never does
3352 * a VFS_VGET() for "." or "..".
3353 */
3354 if (is_zfs == 1) {
3355 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
3356 if (r == EOPNOTSUPP) {
3357 usevget = 0;
3358 cn.cn_nameiop = LOOKUP;
3359 cn.cn_lkflags = LK_SHARED | LK_RETRY;
3360 cn.cn_cred = nd->nd_cred;
3361 } else if (r == 0)
3362 vput(nvp);
3363 }
3364
3365 /*
3366 * If the reply is likely to exceed MCLBYTES and the reply will
3367 * not be saved, use ext_pgs mbufs for TLS.
3368 * It is difficult to predict how large each entry will be and
3369 * how many entries have been read, so just assume the directory
3370 * entries grow by a factor of 4 when attributes are included.
3371 * For NFSv4.0, we do not know for sure if the reply will
3372 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
3373 */
3374 if (cnt > MCLBYTES && siz > MCLBYTES / 4 &&
3375 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
3376 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
3377 nd->nd_flag |= ND_EXTPG;
3378
3379 /*
3380 * Save this position, in case there is an error before one entry
3381 * is created.
3382 */
3383 mb0 = nd->nd_mb;
3384 bpos0 = nd->nd_bpos;
3385 bextpg0 = nd->nd_bextpg;
3386 bextpgsiz0 = nd->nd_bextpgsiz;
3387
3388 /*
3389 * Fill in the first part of the reply.
3390 * dirlen is the reply length in bytes and cannot exceed cnt.
3391 * (Include the two booleans at the end of the reply in dirlen now,
3392 * so we recognize when we have exceeded cnt.)
3393 */
3394 if (nd->nd_flag & ND_NFSV3) {
3395 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
3396 nfsrv_postopattr(nd, getret, &at);
3397 } else {
3398 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
3399 }
3400 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
3401 txdr_hyper(at.na_filerev, tl);
3402
3403 /*
3404 * Save this position, in case there is an empty reply needed.
3405 */
3406 mb1 = nd->nd_mb;
3407 bpos1 = nd->nd_bpos;
3408 bextpg1 = nd->nd_bextpg;
3409 bextpgsiz1 = nd->nd_bextpgsiz;
3410
3411 /* Loop through the records and build reply */
3412 entrycnt = 0;
3413 while (cpos < cend && ncookies > 0 && dirlen < cnt) {
3414 nlen = dp->d_namlen;
3415 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
3416 nlen <= NFS_MAXNAMLEN &&
3417 ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
3418 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
3419 || (nlen == 1 && dp->d_name[0] != '.'))) {
3420 /*
3421 * Save the current position in the reply, in case
3422 * this entry exceeds cnt.
3423 */
3424 mb1 = nd->nd_mb;
3425 bpos1 = nd->nd_bpos;
3426 bextpg1 = nd->nd_bextpg;
3427 bextpgsiz1 = nd->nd_bextpgsiz;
3428
3429 /*
3430 * For readdir_and_lookup get the vnode using
3431 * the file number.
3432 */
3433 nvp = NULL;
3434 refp = NULL;
3435 r = 0;
3436 at_root = 0;
3437 needs_unbusy = 0;
3438 new_mp = mp;
3439 mounted_on_fileno = (uint64_t)dp->d_fileno;
3440 if ((nd->nd_flag & ND_NFSV3) ||
3441 NFSNONZERO_ATTRBIT(&savbits) ||
3442 dp->d_type == DT_UNKNOWN ||
3443 (dp->d_type == DT_DIR &&
3444 nfsrv_enable_crossmntpt != 0)) {
3445 if (nd->nd_flag & ND_NFSV4)
3446 refp = nfsv4root_getreferral(NULL,
3447 vp, dp->d_fileno);
3448 if (refp == NULL) {
3449 if (usevget)
3450 r = VFS_VGET(mp, dp->d_fileno,
3451 LK_SHARED, &nvp);
3452 else
3453 r = EOPNOTSUPP;
3454 if (r == 0 && (vn_irflag_read(vp) &
3455 VIRF_NAMEDDIR) != 0)
3456 vn_irflag_set_cond(nvp,
3457 VIRF_NAMEDATTR);
3458 if (r == EOPNOTSUPP) {
3459 if (usevget) {
3460 usevget = 0;
3461 cn.cn_nameiop = LOOKUP;
3462 cn.cn_lkflags =
3463 LK_SHARED |
3464 LK_RETRY;
3465 cn.cn_cred =
3466 nd->nd_cred;
3467 }
3468 cn.cn_nameptr = dp->d_name;
3469 cn.cn_namelen = nlen;
3470 cn.cn_flags = ISLASTCN |
3471 NOFOLLOW | LOCKLEAF;
3472 if ((vn_irflag_read(vp) &
3473 VIRF_NAMEDDIR) != 0)
3474 cn.cn_flags |=
3475 OPENNAMED;
3476 if (nlen == 2 &&
3477 dp->d_name[0] == '.' &&
3478 dp->d_name[1] == '.')
3479 cn.cn_flags |=
3480 ISDOTDOT;
3481 if (NFSVOPLOCK(vp, LK_SHARED)
3482 != 0) {
3483 nd->nd_repstat = EPERM;
3484 break;
3485 }
3486 if ((vp->v_vflag & VV_ROOT) != 0
3487 && (cn.cn_flags & ISDOTDOT)
3488 != 0) {
3489 vref(vp);
3490 nvp = vp;
3491 r = 0;
3492 } else {
3493 r = VOP_LOOKUP(vp, &nvp,
3494 &cn);
3495 if (vp != nvp)
3496 NFSVOPUNLOCK(vp);
3497 }
3498 }
3499
3500 /*
3501 * For NFSv4, check to see if nvp is
3502 * a mount point and get the mount
3503 * point vnode, as required.
3504 */
3505 if (r == 0 &&
3506 nfsrv_enable_crossmntpt != 0 &&
3507 (nd->nd_flag & ND_NFSV4) != 0 &&
3508 nvp->v_type == VDIR &&
3509 nvp->v_mountedhere != NULL) {
3510 new_mp = nvp->v_mountedhere;
3511 r = vfs_busy(new_mp, 0);
3512 vput(nvp);
3513 nvp = NULL;
3514 if (r == 0) {
3515 r = VFS_ROOT(new_mp,
3516 LK_SHARED, &nvp);
3517 needs_unbusy = 1;
3518 if (r == 0)
3519 at_root = 1;
3520 }
3521 }
3522 }
3523
3524 /*
3525 * If we failed to look up the entry, then it
3526 * has become invalid, most likely removed.
3527 */
3528 if (r != 0) {
3529 if (needs_unbusy)
3530 vfs_unbusy(new_mp);
3531 goto invalid;
3532 }
3533 KASSERT(refp != NULL || nvp != NULL,
3534 ("%s: undetected lookup error", __func__));
3535
3536 if (refp == NULL &&
3537 ((nd->nd_flag & ND_NFSV3) ||
3538 NFSNONZERO_ATTRBIT(&attrbits))) {
3539 r = nfsvno_getfh(nvp, &nfh, p);
3540 if (!r)
3541 r = nfsvno_getattr(nvp, nvap, nd, p,
3542 1, &attrbits);
3543 if (r == 0 && is_zfs == 1 &&
3544 nfsrv_enable_crossmntpt != 0 &&
3545 (nd->nd_flag & ND_NFSV4) != 0 &&
3546 nvp->v_type == VDIR &&
3547 vp->v_mount != nvp->v_mount) {
3548 /*
3549 * For a ZFS snapshot, there is a
3550 * pseudo mount that does not set
3551 * v_mountedhere, so it needs to
3552 * be detected via a different
3553 * mount structure.
3554 */
3555 at_root = 1;
3556 if (new_mp == mp)
3557 new_mp = nvp->v_mount;
3558 }
3559 }
3560
3561 /*
3562 * If we failed to get attributes of the entry,
3563 * then just skip it for NFSv3 (the traditional
3564 * behavior in the old NFS server).
3565 * For NFSv4 the behavior is controlled by
3566 * RDATTRERROR: we either ignore the error or
3567 * fail the request.
3568 * The exception is EOPNOTSUPP, which can be
3569 * returned by nfsvno_getfh() for certain
3570 * file systems, such as devfs. This indicates
3571 * that the file system cannot be exported,
3572 * so just skip over the entry.
3573 * Note that RDATTRERROR is never set for NFSv3.
3574 */
3575 if (r != 0) {
3576 if (!NFSISSET_ATTRBIT(&attrbits,
3577 NFSATTRBIT_RDATTRERROR) ||
3578 r == EOPNOTSUPP) {
3579 vput(nvp);
3580 if (needs_unbusy != 0)
3581 vfs_unbusy(new_mp);
3582 if ((nd->nd_flag & ND_NFSV3) ||
3583 r == EOPNOTSUPP)
3584 goto invalid;
3585 nd->nd_repstat = r;
3586 break;
3587 }
3588 }
3589 } else if (NFSNONZERO_ATTRBIT(&attrbits)) {
3590 /* Only need Type and/or Fileid. */
3591 VATTR_NULL(&nvap->na_vattr);
3592 nvap->na_fileid = dp->d_fileno;
3593 nfs_dtypetovtype(nvap, vp, dp->d_type);
3594 }
3595
3596 /*
3597 * Build the directory record xdr
3598 */
3599 if (nd->nd_flag & ND_NFSV3) {
3600 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3601 *tl++ = newnfs_true;
3602 txdr_hyper(dp->d_fileno, tl);
3603 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
3604 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3605 txdr_hyper(*cookiep, tl);
3606 nfsrv_postopattr(nd, 0, nvap);
3607 dirlen += nfsm_fhtom(NULL, nd, (u_int8_t *)&nfh,
3608 0, 1);
3609 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
3610 if (nvp != NULL)
3611 vput(nvp);
3612 } else {
3613 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3614 *tl++ = newnfs_true;
3615 txdr_hyper(*cookiep, tl);
3616 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
3617 xattrsupp = false;
3618 has_hiddensystem = false;
3619 has_namedattr = false;
3620 has_caseinsensitive = false;
3621 clone_blksize = 0;
3622 if (nvp != NULL) {
3623 supports_nfsv4acls =
3624 nfs_supportsacls(nvp);
3625 if (NFSISSET_ATTRBIT(&attrbits,
3626 NFSATTRBIT_XATTRSUPPORT)) {
3627 ret = VOP_GETEXTATTR(nvp,
3628 EXTATTR_NAMESPACE_USER,
3629 "xxx", NULL, &atsiz,
3630 nd->nd_cred, p);
3631 xattrsupp = ret != EOPNOTSUPP;
3632 }
3633 if (VOP_PATHCONF(nvp,
3634 _PC_HAS_HIDDENSYSTEM, &pathval) !=
3635 0)
3636 pathval = 0;
3637 has_hiddensystem = pathval > 0;
3638 pathval = 0;
3639 if (NFSISSET_ATTRBIT(&attrbits,
3640 NFSATTRBIT_NAMEDATTR) &&
3641 VOP_PATHCONF(nvp, _PC_HAS_NAMEDATTR,
3642 &pathval) != 0)
3643 pathval = 0;
3644 has_namedattr = pathval > 0;
3645 pathval = 0;
3646 if (VOP_PATHCONF(nvp, _PC_CLONE_BLKSIZE,
3647 &pathval) != 0)
3648 pathval = 0;
3649 clone_blksize = pathval;
3650 if (VOP_PATHCONF(nvp,
3651 _PC_CASE_INSENSITIVE,
3652 &pathval) != 0)
3653 pathval = 0;
3654 has_caseinsensitive = pathval > 0;
3655 NFSVOPUNLOCK(nvp);
3656 } else
3657 supports_nfsv4acls = 0;
3658 if (refp != NULL) {
3659 dirlen += nfsrv_putreferralattr(nd,
3660 &refbits, refp, 0,
3661 &nd->nd_repstat);
3662 if (nd->nd_repstat) {
3663 if (nvp != NULL)
3664 vrele(nvp);
3665 if (needs_unbusy != 0)
3666 vfs_unbusy(new_mp);
3667 break;
3668 }
3669 } else if (r) {
3670 dirlen += nfsvno_fillattr(nd, new_mp,
3671 nvp, nvap, &nfh, r, &rderrbits,
3672 nd->nd_cred, p, isdgram, 0,
3673 supports_nfsv4acls, at_root,
3674 mounted_on_fileno, xattrsupp,
3675 has_hiddensystem, has_namedattr,
3676 clone_blksize, has_caseinsensitive);
3677 } else {
3678 dirlen += nfsvno_fillattr(nd, new_mp,
3679 nvp, nvap, &nfh, r, &attrbits,
3680 nd->nd_cred, p, isdgram, 0,
3681 supports_nfsv4acls, at_root,
3682 mounted_on_fileno, xattrsupp,
3683 has_hiddensystem, has_namedattr,
3684 clone_blksize, has_caseinsensitive);
3685 }
3686 if (nvp != NULL)
3687 vrele(nvp);
3688 dirlen += (3 * NFSX_UNSIGNED);
3689 }
3690 if (needs_unbusy != 0)
3691 vfs_unbusy(new_mp);
3692 if (dirlen <= cnt)
3693 entrycnt++;
3694 }
3695 invalid:
3696 cpos += dp->d_reclen;
3697 dp = (struct dirent *)cpos;
3698 cookiep++;
3699 ncookies--;
3700 }
3701 vrele(vp);
3702 vfs_unbusy(mp);
3703
3704 /*
3705 * If dirlen > cnt, we must strip off the last entry. If that
3706 * results in an empty reply, report NFSERR_TOOSMALL.
3707 */
3708 if (dirlen > cnt || nd->nd_repstat) {
3709 if (!nd->nd_repstat && entrycnt == 0)
3710 nd->nd_repstat = NFSERR_TOOSMALL;
3711 if (nd->nd_repstat) {
3712 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0);
3713 if (nd->nd_flag & ND_NFSV3)
3714 nfsrv_postopattr(nd, getret, &at);
3715 } else
3716 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1);
3717 eofflag = 0;
3718 } else if (cpos < cend)
3719 eofflag = 0;
3720 if (!nd->nd_repstat) {
3721 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3722 *tl++ = newnfs_false;
3723 if (eofflag)
3724 *tl = newnfs_true;
3725 else
3726 *tl = newnfs_false;
3727 }
3728 free(cookies, M_TEMP);
3729 free(rbuf, M_TEMP);
3730
3731 out:
3732 NFSEXITCODE2(0, nd);
3733 return (0);
3734 nfsmout:
3735 vput(vp);
3736 NFSEXITCODE2(error, nd);
3737 return (error);
3738 }
3739
3740 /*
3741 * Get the settable attributes out of the mbuf list.
3742 * (Return 0 or EBADRPC)
3743 */
3744 int
nfsrv_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,NFSACL_T * daclp,struct thread * p)3745 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
3746 nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSACL_T *daclp, struct thread *p)
3747 {
3748 u_int32_t *tl;
3749 struct nfsv2_sattr *sp;
3750 int error = 0, toclient = 0;
3751
3752 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
3753 case ND_NFSV2:
3754 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
3755 /*
3756 * Some old clients didn't fill in the high order 16bits.
3757 * --> check the low order 2 bytes for 0xffff
3758 */
3759 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
3760 nvap->na_mode = nfstov_mode(sp->sa_mode);
3761 if (sp->sa_uid != newnfs_xdrneg1)
3762 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
3763 if (sp->sa_gid != newnfs_xdrneg1)
3764 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
3765 if (sp->sa_size != newnfs_xdrneg1)
3766 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
3767 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
3768 #ifdef notyet
3769 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
3770 #else
3771 nvap->na_atime.tv_sec =
3772 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
3773 nvap->na_atime.tv_nsec = 0;
3774 #endif
3775 }
3776 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
3777 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
3778 break;
3779 case ND_NFSV3:
3780 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3781 if (*tl == newnfs_true) {
3782 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3783 nvap->na_mode = nfstov_mode(*tl);
3784 }
3785 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3786 if (*tl == newnfs_true) {
3787 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3788 nvap->na_uid = fxdr_unsigned(uid_t, *tl);
3789 }
3790 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3791 if (*tl == newnfs_true) {
3792 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3793 nvap->na_gid = fxdr_unsigned(gid_t, *tl);
3794 }
3795 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3796 if (*tl == newnfs_true) {
3797 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3798 nvap->na_size = fxdr_hyper(tl);
3799 }
3800 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3801 switch (fxdr_unsigned(int, *tl)) {
3802 case NFSV3SATTRTIME_TOCLIENT:
3803 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3804 fxdr_nfsv3time(tl, &nvap->na_atime);
3805 toclient = 1;
3806 break;
3807 case NFSV3SATTRTIME_TOSERVER:
3808 vfs_timestamp(&nvap->na_atime);
3809 nvap->na_vaflags |= VA_UTIMES_NULL;
3810 break;
3811 }
3812 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3813 switch (fxdr_unsigned(int, *tl)) {
3814 case NFSV3SATTRTIME_TOCLIENT:
3815 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3816 fxdr_nfsv3time(tl, &nvap->na_mtime);
3817 nvap->na_vaflags &= ~VA_UTIMES_NULL;
3818 break;
3819 case NFSV3SATTRTIME_TOSERVER:
3820 vfs_timestamp(&nvap->na_mtime);
3821 if (!toclient)
3822 nvap->na_vaflags |= VA_UTIMES_NULL;
3823 break;
3824 }
3825 break;
3826 case ND_NFSV4:
3827 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, daclp, p);
3828 }
3829 nfsmout:
3830 NFSEXITCODE2(error, nd);
3831 return (error);
3832 }
3833
3834 /*
3835 * Handle the setable attributes for V4.
3836 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
3837 */
3838 int
nfsv4_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,NFSACL_T * daclp,struct thread * p)3839 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
3840 nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSACL_T *daclp, struct thread *p)
3841 {
3842 u_int32_t *tl;
3843 int attrsum = 0;
3844 int i, j;
3845 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
3846 int moderet, toclient = 0;
3847 u_char *cp, namestr[NFSV4_SMALLSTR + 1];
3848 uid_t uid;
3849 gid_t gid;
3850 u_short mode, mask; /* Same type as va_mode. */
3851 struct vattr va;
3852
3853 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
3854 if (error)
3855 goto nfsmout;
3856 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3857 attrsize = fxdr_unsigned(int, *tl);
3858
3859 /*
3860 * Loop around getting the setable attributes. If an unsupported
3861 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
3862 * Once nd_repstat != 0, do not set the attribute value, but keep
3863 * parsing the attribute(s).
3864 */
3865 if (retnotsup) {
3866 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3867 bitpos = NFSATTRBIT_MAX;
3868 } else {
3869 bitpos = 0;
3870 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ARCHIVE) ||
3871 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) ||
3872 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM))
3873 nvap->na_flags = 0;
3874 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL) &&
3875 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_POSIXDEFAULTACL) ||
3876 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_POSIXACCESSACL)))
3877 nd->nd_repstat = NFSERR_INVAL;
3878 }
3879 moderet = 0;
3880 for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
3881 if (attrsum > attrsize) {
3882 error = NFSERR_BADXDR;
3883 goto nfsmout;
3884 }
3885 if (NFSISSET_ATTRBIT(attrbitp, bitpos))
3886 switch (bitpos) {
3887 case NFSATTRBIT_SIZE:
3888 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3889 if (!nd->nd_repstat) {
3890 if (vp != NULL && vp->v_type != VREG)
3891 nd->nd_repstat = (vp->v_type == VDIR) ?
3892 NFSERR_ISDIR : NFSERR_INVAL;
3893 else
3894 nvap->na_size = fxdr_hyper(tl);
3895 }
3896 attrsum += NFSX_HYPER;
3897 break;
3898 case NFSATTRBIT_ACL:
3899 error = nfsrv_dissectacl(nd, aclp, true, false, &aceerr,
3900 &aclsize);
3901 if (error)
3902 goto nfsmout;
3903 if (aceerr && !nd->nd_repstat)
3904 nd->nd_repstat = aceerr;
3905 attrsum += aclsize;
3906 break;
3907 case NFSATTRBIT_ARCHIVE:
3908 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
3909 if (nd->nd_repstat == 0) {
3910 if (*tl == newnfs_true)
3911 nvap->na_flags |= UF_ARCHIVE;
3912 }
3913 attrsum += NFSX_UNSIGNED;
3914 break;
3915 case NFSATTRBIT_HIDDEN:
3916 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
3917 if (nd->nd_repstat == 0) {
3918 if (*tl == newnfs_true)
3919 nvap->na_flags |= UF_HIDDEN;
3920 }
3921 attrsum += NFSX_UNSIGNED;
3922 break;
3923 case NFSATTRBIT_MIMETYPE:
3924 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3925 i = fxdr_unsigned(int, *tl);
3926 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3927 if (error)
3928 goto nfsmout;
3929 if (!nd->nd_repstat)
3930 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3931 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
3932 break;
3933 case NFSATTRBIT_MODE:
3934 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */
3935 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3936 if (!nd->nd_repstat)
3937 nvap->na_mode = nfstov_mode(*tl);
3938 attrsum += NFSX_UNSIGNED;
3939 break;
3940 case NFSATTRBIT_OWNER:
3941 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3942 j = fxdr_unsigned(int, *tl);
3943 if (j < 0) {
3944 error = NFSERR_BADXDR;
3945 goto nfsmout;
3946 }
3947 if (j > NFSV4_SMALLSTR)
3948 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3949 else
3950 cp = namestr;
3951 error = nfsrv_mtostr(nd, cp, j);
3952 if (error) {
3953 if (j > NFSV4_SMALLSTR)
3954 free(cp, M_NFSSTRING);
3955 goto nfsmout;
3956 }
3957 if (!nd->nd_repstat) {
3958 nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
3959 &uid);
3960 if (!nd->nd_repstat)
3961 nvap->na_uid = uid;
3962 }
3963 if (j > NFSV4_SMALLSTR)
3964 free(cp, M_NFSSTRING);
3965 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3966 break;
3967 case NFSATTRBIT_OWNERGROUP:
3968 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3969 j = fxdr_unsigned(int, *tl);
3970 if (j < 0) {
3971 error = NFSERR_BADXDR;
3972 goto nfsmout;
3973 }
3974 if (j > NFSV4_SMALLSTR)
3975 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3976 else
3977 cp = namestr;
3978 error = nfsrv_mtostr(nd, cp, j);
3979 if (error) {
3980 if (j > NFSV4_SMALLSTR)
3981 free(cp, M_NFSSTRING);
3982 goto nfsmout;
3983 }
3984 if (!nd->nd_repstat) {
3985 nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
3986 &gid);
3987 if (!nd->nd_repstat)
3988 nvap->na_gid = gid;
3989 }
3990 if (j > NFSV4_SMALLSTR)
3991 free(cp, M_NFSSTRING);
3992 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3993 break;
3994 case NFSATTRBIT_SYSTEM:
3995 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
3996 if (nd->nd_repstat == 0) {
3997 if (*tl == newnfs_true)
3998 nvap->na_flags |= UF_SYSTEM;
3999 }
4000 attrsum += NFSX_UNSIGNED;
4001 break;
4002 case NFSATTRBIT_TIMEACCESSSET:
4003 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4004 attrsum += NFSX_UNSIGNED;
4005 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
4006 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
4007 if (!nd->nd_repstat)
4008 fxdr_nfsv4time(tl, &nvap->na_atime);
4009 toclient = 1;
4010 attrsum += NFSX_V4TIME;
4011 } else if (!nd->nd_repstat) {
4012 vfs_timestamp(&nvap->na_atime);
4013 nvap->na_vaflags |= VA_UTIMES_NULL;
4014 }
4015 break;
4016 case NFSATTRBIT_TIMEBACKUP:
4017 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
4018 if (!nd->nd_repstat)
4019 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4020 attrsum += NFSX_V4TIME;
4021 break;
4022 case NFSATTRBIT_TIMECREATE:
4023 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
4024 if (!nd->nd_repstat)
4025 fxdr_nfsv4time(tl, &nvap->na_btime);
4026 attrsum += NFSX_V4TIME;
4027 break;
4028 case NFSATTRBIT_TIMEMODIFYSET:
4029 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4030 attrsum += NFSX_UNSIGNED;
4031 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
4032 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
4033 if (!nd->nd_repstat)
4034 fxdr_nfsv4time(tl, &nvap->na_mtime);
4035 nvap->na_vaflags &= ~VA_UTIMES_NULL;
4036 attrsum += NFSX_V4TIME;
4037 } else if (!nd->nd_repstat) {
4038 vfs_timestamp(&nvap->na_mtime);
4039 if (!toclient)
4040 nvap->na_vaflags |= VA_UTIMES_NULL;
4041 }
4042 break;
4043 case NFSATTRBIT_MODESETMASKED:
4044 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4045 mode = fxdr_unsigned(u_short, *tl++);
4046 mask = fxdr_unsigned(u_short, *tl);
4047 /*
4048 * vp == NULL implies an Open/Create operation.
4049 * This attribute can only be used for Setattr and
4050 * only for NFSv4.1 or higher.
4051 * If moderet != 0, a mode attribute has also been
4052 * specified and this attribute cannot be done in the
4053 * same Setattr operation.
4054 */
4055 if (!nd->nd_repstat) {
4056 if ((nd->nd_flag & ND_NFSV41) == 0)
4057 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4058 else if ((mode & ~07777) != 0 ||
4059 (mask & ~07777) != 0 || vp == NULL)
4060 nd->nd_repstat = NFSERR_INVAL;
4061 else if (moderet == 0)
4062 moderet = VOP_GETATTR(vp, &va,
4063 nd->nd_cred);
4064 if (moderet == 0)
4065 nvap->na_mode = (mode & mask) |
4066 (va.va_mode & ~mask);
4067 else
4068 nd->nd_repstat = moderet;
4069 }
4070 attrsum += 2 * NFSX_UNSIGNED;
4071 break;
4072 case NFSATTRBIT_MODEUMASK:
4073 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4074 mode = fxdr_unsigned(u_short, *tl++);
4075 mask = fxdr_unsigned(u_short, *tl);
4076 /*
4077 * If moderet != 0, mode has already been done.
4078 * If vp != NULL, this is not a file object creation.
4079 */
4080 if (!nd->nd_repstat) {
4081 if ((nd->nd_flag & ND_NFSV42) == 0)
4082 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4083 else if ((mask & ~0777) != 0 || vp != NULL ||
4084 moderet != 0)
4085 nd->nd_repstat = NFSERR_INVAL;
4086 else
4087 nvap->na_mode = (mode & ~mask);
4088 }
4089 attrsum += 2 * NFSX_UNSIGNED;
4090 break;
4091 case NFSATTRBIT_POSIXACCESSACL:
4092 error = nfsrv_dissectacl(nd, aclp, true, true, &aceerr,
4093 &aclsize);
4094 if (error != 0)
4095 goto nfsmout;
4096 if (!nd->nd_repstat) {
4097 if ((nd->nd_flag & ND_NFSV42) == 0)
4098 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4099 else if (aclp != NULL && aclp->acl_cnt == 0)
4100 nd->nd_repstat = NFSERR_INVAL;
4101 else if (aceerr != 0)
4102 nd->nd_repstat = aceerr;
4103 }
4104 attrsum += aclsize;
4105 break;
4106 case NFSATTRBIT_POSIXDEFAULTACL:
4107 error = nfsrv_dissectacl(nd, daclp, true, true, &aceerr,
4108 &aclsize);
4109 if (error != 0)
4110 goto nfsmout;
4111 if (!nd->nd_repstat) {
4112 if ((nd->nd_flag & ND_NFSV42) == 0)
4113 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4114 else if (aclp != NULL && aclp->acl_cnt == 0)
4115 nd->nd_repstat = NFSERR_INVAL;
4116 else if (aceerr != 0)
4117 nd->nd_repstat = aceerr;
4118 else if (vp != NULL && vp->v_type != VDIR)
4119 nd->nd_repstat = NFSERR_INVAL;
4120 }
4121 attrsum += aclsize;
4122 break;
4123 default:
4124 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
4125 /*
4126 * set bitpos so we drop out of the loop.
4127 */
4128 bitpos = NFSATTRBIT_MAX;
4129 break;
4130 }
4131 }
4132
4133 /*
4134 * some clients pad the attrlist, so we need to skip over the
4135 * padding. This also skips over unparsed non-supported attributes.
4136 */
4137 if (attrsum > attrsize) {
4138 error = NFSERR_BADXDR;
4139 } else {
4140 attrsize = NFSM_RNDUP(attrsize);
4141 if (attrsum < attrsize)
4142 error = nfsm_advance(nd, attrsize - attrsum, -1);
4143 }
4144 nfsmout:
4145 NFSEXITCODE2(error, nd);
4146 return (error);
4147 }
4148
4149 /*
4150 * Check/setup export credentials.
4151 */
4152 int
nfsd_excred(struct nfsrv_descript * nd,struct nfsexstuff * exp,struct ucred * credanon,bool testsec)4153 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
4154 struct ucred *credanon, bool testsec)
4155 {
4156 int error;
4157
4158 /*
4159 * Check/setup credentials.
4160 */
4161 if (nd->nd_flag & ND_GSS)
4162 exp->nes_exflag &= ~MNT_EXPORTANON;
4163
4164 /*
4165 * Check to see if the operation is allowed for this security flavor.
4166 */
4167 error = 0;
4168 if (testsec) {
4169 error = nfsvno_testexp(nd, exp);
4170 if (error != 0)
4171 goto out;
4172 }
4173
4174 /*
4175 * Check to see if the file system is exported V4 only.
4176 */
4177 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
4178 error = NFSERR_PROGNOTV4;
4179 goto out;
4180 }
4181
4182 /*
4183 * Now, map the user credentials.
4184 * (Note that ND_AUTHNONE will only be set for an NFSv3
4185 * Fsinfo RPC. If set for anything else, this code might need
4186 * to change.)
4187 */
4188 if (NFSVNO_EXPORTED(exp)) {
4189 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
4190 NFSVNO_EXPORTANON(exp) ||
4191 (nd->nd_flag & ND_AUTHNONE) != 0) {
4192 nd->nd_cred->cr_uid = credanon->cr_uid;
4193 nd->nd_cred->cr_gid = credanon->cr_gid;
4194 crsetgroups(nd->nd_cred, credanon->cr_ngroups,
4195 credanon->cr_groups);
4196 } else if ((nd->nd_flag & ND_GSS) == 0) {
4197 /*
4198 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
4199 * if there is a replacement credential with a group
4200 * list set up by "nfsuserd -manage-gids".
4201 * If there is no replacement, nfsrv_getgrpscred()
4202 * simply returns its argument.
4203 */
4204 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
4205 }
4206 }
4207
4208 out:
4209 NFSEXITCODE2(error, nd);
4210 return (error);
4211 }
4212
4213 /*
4214 * Check exports.
4215 */
4216 int
nfsvno_checkexp(struct mount * mp,struct sockaddr * nam,struct nfsexstuff * exp,struct ucred ** credp)4217 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
4218 struct ucred **credp)
4219 {
4220 int error;
4221
4222 error = 0;
4223 *credp = NULL;
4224 MNT_ILOCK(mp);
4225 if (mp->mnt_exjail == NULL ||
4226 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
4227 error = EACCES;
4228 MNT_IUNLOCK(mp);
4229 if (error == 0)
4230 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
4231 &exp->nes_numsecflavor, exp->nes_secflavors);
4232 if (error) {
4233 if (VNET(nfs_rootfhset)) {
4234 exp->nes_exflag = 0;
4235 exp->nes_numsecflavor = 0;
4236 error = 0;
4237 }
4238 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
4239 MAXSECFLAVORS) {
4240 printf("nfsvno_checkexp: numsecflavors out of range\n");
4241 exp->nes_numsecflavor = 0;
4242 error = EACCES;
4243 }
4244 NFSEXITCODE(error);
4245 return (error);
4246 }
4247
4248 /*
4249 * Get a vnode for a file handle and export stuff.
4250 */
4251 int
nfsvno_fhtovp(struct mount * mp,fhandle_t * fhp,struct sockaddr * nam,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct ucred ** credp)4252 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
4253 int lktype, struct vnode **vpp, struct nfsexstuff *exp,
4254 struct ucred **credp)
4255 {
4256 int error;
4257
4258 *credp = NULL;
4259 exp->nes_numsecflavor = 0;
4260 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
4261 if (error != 0)
4262 /* Make sure the server replies ESTALE to the client. */
4263 error = ESTALE;
4264 if (nam && !error) {
4265 MNT_ILOCK(mp);
4266 if (mp->mnt_exjail == NULL ||
4267 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
4268 error = EACCES;
4269 MNT_IUNLOCK(mp);
4270 if (error == 0)
4271 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
4272 &exp->nes_numsecflavor, exp->nes_secflavors);
4273 if (error) {
4274 if (VNET(nfs_rootfhset)) {
4275 exp->nes_exflag = 0;
4276 exp->nes_numsecflavor = 0;
4277 error = 0;
4278 } else {
4279 vput(*vpp);
4280 }
4281 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
4282 MAXSECFLAVORS) {
4283 printf("nfsvno_fhtovp: numsecflavors out of range\n");
4284 exp->nes_numsecflavor = 0;
4285 error = EACCES;
4286 vput(*vpp);
4287 }
4288 }
4289 NFSEXITCODE(error);
4290 return (error);
4291 }
4292
4293 /*
4294 * nfsd_fhtovp() - convert a fh to a vnode ptr
4295 * - look up fsid in mount list (if not found ret error)
4296 * - get vp and export rights by calling nfsvno_fhtovp()
4297 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
4298 * for AUTH_SYS
4299 * - if mpp != NULL, return the mount point so that it can
4300 * be used for vn_finished_write() by the caller
4301 */
4302 void
nfsd_fhtovp(struct nfsrv_descript * nd,struct nfsrvfh * nfp,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct mount ** mpp,int startwrite,int nextop)4303 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
4304 struct vnode **vpp, struct nfsexstuff *exp,
4305 struct mount **mpp, int startwrite, int nextop)
4306 {
4307 struct mount *mp, *mpw;
4308 struct ucred *credanon;
4309 fhandle_t *fhp;
4310 int error;
4311
4312 if (mpp != NULL)
4313 *mpp = NULL;
4314 *vpp = NULL;
4315 fhp = (fhandle_t *)nfp->nfsrvfh_data;
4316 mp = vfs_busyfs(&fhp->fh_fsid);
4317 if (mp == NULL) {
4318 nd->nd_repstat = ESTALE;
4319 goto out;
4320 }
4321
4322 if (startwrite) {
4323 mpw = mp;
4324 error = vn_start_write(NULL, &mpw, V_WAIT);
4325 if (error != 0) {
4326 mpw = NULL;
4327 vfs_unbusy(mp);
4328 nd->nd_repstat = ESTALE;
4329 goto out;
4330 }
4331 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
4332 lktype = LK_EXCLUSIVE;
4333 } else
4334 mpw = NULL;
4335
4336 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
4337 &credanon);
4338 vfs_unbusy(mp);
4339
4340 if (nd->nd_repstat == 0 &&
4341 nfp->nfsrvfh_len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH &&
4342 nfp->nfsrvfh_len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) {
4343 if (nfp->nfsrvfh_len == NFSX_MYFH + NFSX_V4NAMEDDIRFH)
4344 vn_irflag_set_cond(*vpp, VIRF_NAMEDDIR);
4345 else
4346 vn_irflag_set_cond(*vpp, VIRF_NAMEDATTR);
4347 }
4348
4349 /*
4350 * For NFSv4 without a pseudo root fs, unexported file handles
4351 * can be returned, so that Lookup works everywhere.
4352 */
4353 if (!nd->nd_repstat && exp->nes_exflag == 0 &&
4354 !(nd->nd_flag & ND_NFSV4)) {
4355 vput(*vpp);
4356 *vpp = NULL;
4357 nd->nd_repstat = EACCES;
4358 }
4359
4360 /*
4361 * Personally, I've never seen any point in requiring a
4362 * reserved port#, since only in the rare case where the
4363 * clients are all boxes with secure system privileges,
4364 * does it provide any enhanced security, but... some people
4365 * believe it to be useful and keep putting this code back in.
4366 * (There is also some "security checker" out there that
4367 * complains if the nfs server doesn't enforce this.)
4368 * However, note the following:
4369 * RFC3530 (NFSv4) specifies that a reserved port# not be
4370 * required.
4371 * RFC2623 recommends that, if a reserved port# is checked for,
4372 * that there be a way to turn that off--> ifdef'd.
4373 */
4374 #ifdef NFS_REQRSVPORT
4375 if (!nd->nd_repstat) {
4376 struct sockaddr_in *saddr;
4377 struct sockaddr_in6 *saddr6;
4378
4379 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
4380 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
4381 if (!(nd->nd_flag & ND_NFSV4) &&
4382 ((saddr->sin_family == AF_INET &&
4383 ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
4384 (saddr6->sin6_family == AF_INET6 &&
4385 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
4386 vput(*vpp);
4387 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
4388 }
4389 }
4390 #endif /* NFS_REQRSVPORT */
4391
4392 /*
4393 * Check/setup credentials.
4394 */
4395 if (!nd->nd_repstat) {
4396 nd->nd_saveduid = nd->nd_cred->cr_uid;
4397 nd->nd_repstat = nfsd_excred(nd, exp, credanon,
4398 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type));
4399 if (nd->nd_repstat)
4400 vput(*vpp);
4401 }
4402 if (credanon != NULL)
4403 crfree(credanon);
4404 if (nd->nd_repstat) {
4405 vn_finished_write(mpw);
4406 *vpp = NULL;
4407 } else if (mpp != NULL) {
4408 *mpp = mpw;
4409 }
4410
4411 out:
4412 NFSEXITCODE2(0, nd);
4413 }
4414
4415 /*
4416 * glue for fp.
4417 */
4418 static int
fp_getfvp(struct thread * p,int fd,struct file ** fpp,struct vnode ** vpp)4419 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
4420 {
4421 struct filedesc *fdp;
4422 struct file *fp;
4423 int error = 0;
4424
4425 fdp = p->td_proc->p_fd;
4426 if (fd < 0 || fd >= fdp->fd_nfiles ||
4427 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
4428 error = EBADF;
4429 goto out;
4430 }
4431 *fpp = fp;
4432
4433 out:
4434 NFSEXITCODE(error);
4435 return (error);
4436 }
4437
4438 /*
4439 * Called from nfssvc() to update the exports list. Just call
4440 * vfs_export(). This has to be done, since the v4 root fake fs isn't
4441 * in the mount list.
4442 */
4443 int
nfsrv_v4rootexport(void * argp,struct ucred * cred,struct thread * p)4444 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
4445 {
4446 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
4447 int error = 0;
4448 struct nameidata nd;
4449 fhandle_t fh;
4450
4451 error = vfs_export(VNET(nfsv4root_mnt), &nfsexargp->export, false);
4452 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
4453 VNET(nfs_rootfhset) = 0;
4454 else if (error == 0) {
4455 if (nfsexargp->fspec == NULL) {
4456 error = EPERM;
4457 goto out;
4458 }
4459 /*
4460 * If fspec != NULL, this is the v4root path.
4461 */
4462 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec);
4463 if ((error = namei(&nd)) != 0)
4464 goto out;
4465 NDFREE_PNBUF(&nd);
4466 error = nfsvno_getfh(nd.ni_vp, &fh, p);
4467 vrele(nd.ni_vp);
4468 if (!error) {
4469 VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH;
4470 NFSBCOPY((caddr_t)&fh,
4471 VNET(nfs_rootfh).nfsrvfh_data,
4472 sizeof (fhandle_t));
4473 VNET(nfs_rootfhset) = 1;
4474 }
4475 }
4476
4477 out:
4478 NFSEXITCODE(error);
4479 return (error);
4480 }
4481
4482 /*
4483 * This function needs to test to see if the system is near its limit
4484 * for memory allocation via malloc() or mget() and return True iff
4485 * either of these resources are near their limit.
4486 * XXX (For now, this is just a stub.)
4487 */
4488 int nfsrv_testmalloclimit = 0;
4489 int
nfsrv_mallocmget_limit(void)4490 nfsrv_mallocmget_limit(void)
4491 {
4492 static int printmesg = 0;
4493 static int testval = 1;
4494
4495 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
4496 if ((printmesg++ % 100) == 0)
4497 printf("nfsd: malloc/mget near limit\n");
4498 return (1);
4499 }
4500 return (0);
4501 }
4502
4503 /*
4504 * BSD specific initialization of a mount point.
4505 */
4506 void
nfsd_mntinit(void)4507 nfsd_mntinit(void)
4508 {
4509
4510 NFSD_LOCK();
4511 if (VNET(nfsrv_mntinited)) {
4512 NFSD_UNLOCK();
4513 return;
4514 }
4515 VNET(nfsrv_mntinited) = true;
4516 nfsrvd_init(0);
4517 NFSD_UNLOCK();
4518
4519 VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP,
4520 M_WAITOK | M_ZERO);
4521 VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
4522 mtx_init(&VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
4523 lockinit(&VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0);
4524 TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_nvnodelist);
4525 TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_lazyvnodelist);
4526 VNET(nfsv4root_mnt)->mnt_export = NULL;
4527 TAILQ_INIT(&VNET(nfsv4root_opt));
4528 TAILQ_INIT(&VNET(nfsv4root_newopt));
4529 VNET(nfsv4root_mnt)->mnt_opt = &VNET(nfsv4root_opt);
4530 VNET(nfsv4root_mnt)->mnt_optnew = &VNET(nfsv4root_newopt);
4531 VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0;
4532 VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0;
4533 callout_init(&VNET(nfsd_callout), 1);
4534
4535 nfsrvd_initcache();
4536 nfsd_init();
4537 }
4538
4539 static void
nfsd_timer(void * arg)4540 nfsd_timer(void *arg)
4541 {
4542 struct vnet *vnetp;
4543
4544 vnetp = (struct vnet *)arg;
4545 CURVNET_SET_QUIET(vnetp);
4546 nfsrv_servertimer(vnetp);
4547 callout_reset_sbt(&VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer,
4548 arg, 0);
4549 CURVNET_RESTORE();
4550 }
4551
4552 /*
4553 * Get a vnode for a file handle, without checking exports, etc.
4554 */
4555 struct vnode *
nfsvno_getvp(fhandle_t * fhp)4556 nfsvno_getvp(fhandle_t *fhp)
4557 {
4558 struct mount *mp;
4559 struct vnode *vp;
4560 int error;
4561
4562 mp = vfs_busyfs(&fhp->fh_fsid);
4563 if (mp == NULL)
4564 return (NULL);
4565 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
4566 vfs_unbusy(mp);
4567 if (error)
4568 return (NULL);
4569 return (vp);
4570 }
4571
4572 /*
4573 * Do a local VOP_ADVLOCK().
4574 */
4575 int
nfsvno_advlock(struct vnode * vp,int ftype,u_int64_t first,u_int64_t end,struct thread * td)4576 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
4577 u_int64_t end, struct thread *td)
4578 {
4579 int error = 0;
4580 struct flock fl;
4581 u_int64_t tlen;
4582
4583 if (nfsrv_dolocallocks == 0)
4584 goto out;
4585 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
4586
4587 fl.l_whence = SEEK_SET;
4588 fl.l_type = ftype;
4589 fl.l_start = (off_t)first;
4590 if (end == NFS64BITSSET) {
4591 fl.l_len = 0;
4592 } else {
4593 tlen = end - first;
4594 fl.l_len = (off_t)tlen;
4595 }
4596 /*
4597 * For FreeBSD8, the l_pid and l_sysid must be set to the same
4598 * values for all calls, so that all locks will be held by the
4599 * nfsd server. (The nfsd server handles conflicts between the
4600 * various clients.)
4601 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
4602 * bytes, so it can't be put in l_sysid.
4603 */
4604 if (nfsv4_sysid == 0)
4605 nfsv4_sysid = nlm_acquire_next_sysid();
4606 fl.l_pid = (pid_t)0;
4607 fl.l_sysid = (int)nfsv4_sysid;
4608
4609 if (ftype == F_UNLCK)
4610 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
4611 (F_POSIX | F_REMOTE));
4612 else
4613 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
4614 (F_POSIX | F_REMOTE));
4615
4616 out:
4617 NFSEXITCODE(error);
4618 return (error);
4619 }
4620
4621 /*
4622 * Check the nfsv4 root exports.
4623 */
4624 int
nfsvno_v4rootexport(struct nfsrv_descript * nd)4625 nfsvno_v4rootexport(struct nfsrv_descript *nd)
4626 {
4627 struct ucred *credanon;
4628 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
4629 uint64_t exflags;
4630
4631 error = vfs_stdcheckexp(VNET(nfsv4root_mnt), nd->nd_nam, &exflags,
4632 &credanon, &numsecflavor, secflavors);
4633 if (error) {
4634 error = NFSERR_PROGUNAVAIL;
4635 goto out;
4636 }
4637 if (credanon != NULL)
4638 crfree(credanon);
4639 for (i = 0; i < numsecflavor; i++) {
4640 if (secflavors[i] == AUTH_SYS)
4641 nd->nd_flag |= ND_EXAUTHSYS;
4642 else if (secflavors[i] == RPCSEC_GSS_KRB5)
4643 nd->nd_flag |= ND_EXGSS;
4644 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
4645 nd->nd_flag |= ND_EXGSSINTEGRITY;
4646 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
4647 nd->nd_flag |= ND_EXGSSPRIVACY;
4648 }
4649
4650 /* And set ND_EXxx flags for TLS. */
4651 if ((exflags & MNT_EXTLS) != 0) {
4652 nd->nd_flag |= ND_EXTLS;
4653 if ((exflags & MNT_EXTLSCERT) != 0)
4654 nd->nd_flag |= ND_EXTLSCERT;
4655 if ((exflags & MNT_EXTLSCERTUSER) != 0)
4656 nd->nd_flag |= ND_EXTLSCERTUSER;
4657 }
4658
4659 out:
4660 NFSEXITCODE(error);
4661 return (error);
4662 }
4663
4664 /*
4665 * Nfs server pseudo system call for the nfsd's
4666 */
4667 /*
4668 * MPSAFE
4669 */
4670 static int
nfssvc_nfsd(struct thread * td,struct nfssvc_args * uap)4671 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
4672 {
4673 struct file *fp;
4674 struct nfsd_addsock_args sockarg;
4675 struct nfsd_nfsd_args nfsdarg;
4676 struct nfsd_nfsd_oargs onfsdarg;
4677 struct nfsd_pnfsd_args pnfsdarg;
4678 struct vnode *vp, *nvp, *curdvp;
4679 struct pnfsdsfile *pf;
4680 struct nfsdevice *ds, *fds;
4681 cap_rights_t rights;
4682 int buflen, error, ret;
4683 char *buf, *cp, *cp2, *cp3;
4684 char fname[PNFS_FILENAME_LEN + 1];
4685
4686 CURVNET_SET(TD_TO_VNET(td));
4687 if (uap->flag & NFSSVC_NFSDADDSOCK) {
4688 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
4689 if (error)
4690 goto out;
4691 /*
4692 * Since we don't know what rights might be required,
4693 * pretend that we need them all. It is better to be too
4694 * careful than too reckless.
4695 */
4696 error = fget(td, sockarg.sock,
4697 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp);
4698 if (error != 0)
4699 goto out;
4700 if (fp->f_type != DTYPE_SOCKET) {
4701 fdrop(fp, td);
4702 error = EPERM;
4703 goto out;
4704 }
4705 error = nfsrvd_addsock(fp);
4706 fdrop(fp, td);
4707 } else if (uap->flag & NFSSVC_NFSDNFSD) {
4708 if (uap->argp == NULL) {
4709 error = EINVAL;
4710 goto out;
4711 }
4712 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
4713 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
4714 if (error == 0) {
4715 nfsdarg.principal = onfsdarg.principal;
4716 nfsdarg.minthreads = onfsdarg.minthreads;
4717 nfsdarg.maxthreads = onfsdarg.maxthreads;
4718 nfsdarg.version = 1;
4719 nfsdarg.addr = NULL;
4720 nfsdarg.addrlen = 0;
4721 nfsdarg.dnshost = NULL;
4722 nfsdarg.dnshostlen = 0;
4723 nfsdarg.dspath = NULL;
4724 nfsdarg.dspathlen = 0;
4725 nfsdarg.mdspath = NULL;
4726 nfsdarg.mdspathlen = 0;
4727 nfsdarg.mirrorcnt = 1;
4728 }
4729 } else
4730 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
4731 if (error)
4732 goto out;
4733 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
4734 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
4735 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
4736 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
4737 nfsdarg.mirrorcnt >= 1 &&
4738 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
4739 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
4740 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
4741 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
4742 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
4743 nfsdarg.dspathlen, nfsdarg.dnshostlen,
4744 nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
4745 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
4746 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
4747 if (error != 0) {
4748 free(cp, M_TEMP);
4749 goto out;
4750 }
4751 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */
4752 nfsdarg.addr = cp;
4753 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
4754 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
4755 if (error != 0) {
4756 free(nfsdarg.addr, M_TEMP);
4757 free(cp, M_TEMP);
4758 goto out;
4759 }
4760 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */
4761 nfsdarg.dnshost = cp;
4762 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
4763 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
4764 if (error != 0) {
4765 free(nfsdarg.addr, M_TEMP);
4766 free(nfsdarg.dnshost, M_TEMP);
4767 free(cp, M_TEMP);
4768 goto out;
4769 }
4770 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */
4771 nfsdarg.dspath = cp;
4772 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
4773 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
4774 if (error != 0) {
4775 free(nfsdarg.addr, M_TEMP);
4776 free(nfsdarg.dnshost, M_TEMP);
4777 free(nfsdarg.dspath, M_TEMP);
4778 free(cp, M_TEMP);
4779 goto out;
4780 }
4781 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */
4782 nfsdarg.mdspath = cp;
4783 } else {
4784 nfsdarg.addr = NULL;
4785 nfsdarg.addrlen = 0;
4786 nfsdarg.dnshost = NULL;
4787 nfsdarg.dnshostlen = 0;
4788 nfsdarg.dspath = NULL;
4789 nfsdarg.dspathlen = 0;
4790 nfsdarg.mdspath = NULL;
4791 nfsdarg.mdspathlen = 0;
4792 nfsdarg.mirrorcnt = 1;
4793 }
4794 nfsd_timer(TD_TO_VNET(td));
4795 error = nfsrvd_nfsd(td, &nfsdarg);
4796 callout_drain(&VNET(nfsd_callout));
4797 free(nfsdarg.addr, M_TEMP);
4798 free(nfsdarg.dnshost, M_TEMP);
4799 free(nfsdarg.dspath, M_TEMP);
4800 free(nfsdarg.mdspath, M_TEMP);
4801 } else if (uap->flag & NFSSVC_PNFSDS) {
4802 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
4803 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
4804 pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
4805 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
4806 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
4807 NULL);
4808 if (error == 0)
4809 error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
4810 free(cp, M_TEMP);
4811 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
4812 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
4813 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
4814 buf = malloc(buflen, M_TEMP, M_WAITOK);
4815 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
4816 NULL);
4817 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
4818 if (error == 0 && pnfsdarg.dspath != NULL) {
4819 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
4820 error = copyinstr(pnfsdarg.dspath, cp2,
4821 PATH_MAX + 1, NULL);
4822 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
4823 error);
4824 } else
4825 cp2 = NULL;
4826 if (error == 0 && pnfsdarg.curdspath != NULL) {
4827 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
4828 error = copyinstr(pnfsdarg.curdspath, cp3,
4829 PATH_MAX + 1, NULL);
4830 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
4831 error);
4832 } else
4833 cp3 = NULL;
4834 curdvp = NULL;
4835 fds = NULL;
4836 if (error == 0)
4837 error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
4838 &buflen, fname, td, &vp, &nvp, &pf, &ds,
4839 &fds);
4840 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
4841 if (error == 0) {
4842 if (pf->dsf_dir >= nfsrv_dsdirsize) {
4843 printf("copymr: dsdir out of range\n");
4844 pf->dsf_dir = 0;
4845 }
4846 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
4847 error = nfsrv_copymr(vp, nvp,
4848 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
4849 (struct pnfsdsfile *)buf,
4850 buflen / sizeof(*pf), td->td_ucred, td);
4851 vput(vp);
4852 vput(nvp);
4853 if (fds != NULL && error == 0) {
4854 curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
4855 ret = vn_lock(curdvp, LK_EXCLUSIVE);
4856 if (ret == 0) {
4857 nfsrv_dsremove(curdvp, fname,
4858 td->td_ucred, td);
4859 NFSVOPUNLOCK(curdvp);
4860 }
4861 }
4862 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
4863 }
4864 free(cp, M_TEMP);
4865 free(cp2, M_TEMP);
4866 free(cp3, M_TEMP);
4867 free(buf, M_TEMP);
4868 }
4869 } else {
4870 error = nfssvc_srvcall(td, uap, td->td_ucred);
4871 }
4872
4873 out:
4874 CURVNET_RESTORE();
4875 NFSEXITCODE(error);
4876 return (error);
4877 }
4878
4879 static int
nfssvc_srvcall(struct thread * p,struct nfssvc_args * uap,struct ucred * cred)4880 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
4881 {
4882 struct nfsex_args export;
4883 struct nfsex_oldargs oexp;
4884 struct file *fp = NULL;
4885 int stablefd, i, len;
4886 struct nfsd_clid adminrevoke;
4887 struct nfsd_dumplist dumplist;
4888 struct nfsd_dumpclients *dumpclients;
4889 struct nfsd_dumplocklist dumplocklist;
4890 struct nfsd_dumplocks *dumplocks;
4891 struct nameidata nd;
4892 vnode_t vp;
4893 int error = EINVAL, igotlock;
4894 struct proc *procp;
4895 gid_t *grps;
4896
4897 if (uap->flag & NFSSVC_PUBLICFH) {
4898 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
4899 sizeof (fhandle_t));
4900 error = copyin(uap->argp,
4901 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
4902 if (!error)
4903 nfs_pubfhset = 1;
4904 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
4905 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
4906 error = copyin(uap->argp,(caddr_t)&export,
4907 sizeof (struct nfsex_args));
4908 if (!error) {
4909 grps = NULL;
4910 if (export.export.ex_ngroups > NGROUPS_MAX ||
4911 export.export.ex_ngroups < 0)
4912 error = EINVAL;
4913 else if (export.export.ex_ngroups > 0) {
4914 grps = malloc(export.export.ex_ngroups *
4915 sizeof(gid_t), M_TEMP, M_WAITOK);
4916 error = copyin(export.export.ex_groups, grps,
4917 export.export.ex_ngroups * sizeof(gid_t));
4918 export.export.ex_groups = grps;
4919 } else
4920 export.export.ex_groups = NULL;
4921 if (!error)
4922 error = nfsrv_v4rootexport(&export, cred, p);
4923 free(grps, M_TEMP);
4924 }
4925 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
4926 NFSSVC_V4ROOTEXPORT) {
4927 error = copyin(uap->argp,(caddr_t)&oexp,
4928 sizeof (struct nfsex_oldargs));
4929 if (!error) {
4930 memset(&export.export, 0, sizeof(export.export));
4931 export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
4932 export.export.ex_root = oexp.export.ex_root;
4933 export.export.ex_uid = oexp.export.ex_anon.cr_uid;
4934 export.export.ex_ngroups =
4935 oexp.export.ex_anon.cr_ngroups;
4936 export.export.ex_groups = NULL;
4937 if (export.export.ex_ngroups > XU_NGROUPS ||
4938 export.export.ex_ngroups < 0)
4939 error = EINVAL;
4940 else if (export.export.ex_ngroups > 0) {
4941 export.export.ex_groups = malloc(
4942 export.export.ex_ngroups * sizeof(gid_t),
4943 M_TEMP, M_WAITOK);
4944 for (i = 0; i < export.export.ex_ngroups; i++)
4945 export.export.ex_groups[i] =
4946 oexp.export.ex_anon.cr_groups[i];
4947 }
4948 export.export.ex_addr = oexp.export.ex_addr;
4949 export.export.ex_addrlen = oexp.export.ex_addrlen;
4950 export.export.ex_mask = oexp.export.ex_mask;
4951 export.export.ex_masklen = oexp.export.ex_masklen;
4952 export.export.ex_indexfile = oexp.export.ex_indexfile;
4953 export.export.ex_numsecflavors =
4954 oexp.export.ex_numsecflavors;
4955 if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
4956 export.export.ex_numsecflavors < 0)
4957 error = EINVAL;
4958 else {
4959 for (i = 0; i < export.export.ex_numsecflavors;
4960 i++)
4961 export.export.ex_secflavors[i] =
4962 oexp.export.ex_secflavors[i];
4963 }
4964 export.fspec = oexp.fspec;
4965 if (error == 0)
4966 error = nfsrv_v4rootexport(&export, cred, p);
4967 free(export.export.ex_groups, M_TEMP);
4968 }
4969 } else if (uap->flag & NFSSVC_NOPUBLICFH) {
4970 nfs_pubfhset = 0;
4971 error = 0;
4972 } else if (uap->flag & NFSSVC_STABLERESTART) {
4973 error = copyin(uap->argp, (caddr_t)&stablefd,
4974 sizeof (int));
4975 if (!error)
4976 error = fp_getfvp(p, stablefd, &fp, &vp);
4977 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
4978 error = EBADF;
4979 if (!error && VNET(nfsrv_numnfsd) != 0)
4980 error = ENXIO;
4981 if (!error) {
4982 VNET(nfsrv_stablefirst).nsf_fp = fp;
4983 nfsrv_setupstable(p);
4984 }
4985 } else if (uap->flag & NFSSVC_ADMINREVOKE) {
4986 error = copyin(uap->argp, (caddr_t)&adminrevoke,
4987 sizeof (struct nfsd_clid));
4988 if (!error)
4989 error = nfsrv_adminrevoke(&adminrevoke, p);
4990 } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
4991 error = copyin(uap->argp, (caddr_t)&dumplist,
4992 sizeof (struct nfsd_dumplist));
4993 if (!error && (dumplist.ndl_size < 1 ||
4994 dumplist.ndl_size > NFSRV_MAXDUMPLIST))
4995 error = EPERM;
4996 if (!error) {
4997 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
4998 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
4999 nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
5000 error = copyout(dumpclients, dumplist.ndl_list, len);
5001 free(dumpclients, M_TEMP);
5002 }
5003 } else if (uap->flag & NFSSVC_DUMPLOCKS) {
5004 error = copyin(uap->argp, (caddr_t)&dumplocklist,
5005 sizeof (struct nfsd_dumplocklist));
5006 if (!error && (dumplocklist.ndllck_size < 1 ||
5007 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
5008 error = EPERM;
5009 if (!error)
5010 error = nfsrv_lookupfilename(&nd,
5011 dumplocklist.ndllck_fname, p);
5012 if (!error) {
5013 len = sizeof (struct nfsd_dumplocks) *
5014 dumplocklist.ndllck_size;
5015 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
5016 nfsrv_dumplocks(nd.ni_vp, dumplocks,
5017 dumplocklist.ndllck_size, p);
5018 vput(nd.ni_vp);
5019 error = copyout(dumplocks, dumplocklist.ndllck_list,
5020 len);
5021 free(dumplocks, M_TEMP);
5022 }
5023 } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
5024 procp = p->td_proc;
5025 PROC_LOCK(procp);
5026 nfsd_master_pid = procp->p_pid;
5027 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
5028 nfsd_master_start = procp->p_stats->p_start;
5029 VNET(nfsd_master_proc) = procp;
5030 PROC_UNLOCK(procp);
5031 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
5032 NFSLOCKV4ROOTMUTEX();
5033 if (!VNET(nfsrv_suspend_nfsd)) {
5034 /* Lock out all nfsd threads */
5035 do {
5036 igotlock = nfsv4_lock(
5037 &VNET(nfsd_suspend_lock), 1, NULL,
5038 NFSV4ROOTLOCKMUTEXPTR, NULL);
5039 } while (igotlock == 0 &&
5040 !VNET(nfsrv_suspend_nfsd));
5041 VNET(nfsrv_suspend_nfsd) = true;
5042 }
5043 NFSUNLOCKV4ROOTMUTEX();
5044 error = 0;
5045 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
5046 NFSLOCKV4ROOTMUTEX();
5047 if (VNET(nfsrv_suspend_nfsd)) {
5048 nfsv4_unlock(&VNET(nfsd_suspend_lock), 0);
5049 VNET(nfsrv_suspend_nfsd) = false;
5050 }
5051 NFSUNLOCKV4ROOTMUTEX();
5052 error = 0;
5053 }
5054
5055 NFSEXITCODE(error);
5056 return (error);
5057 }
5058
5059 /*
5060 * Check exports.
5061 * Returns 0 if ok, 1 otherwise.
5062 */
5063 int
nfsvno_testexp(struct nfsrv_descript * nd,struct nfsexstuff * exp)5064 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
5065 {
5066 int i;
5067
5068 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) ||
5069 (NFSVNO_EXTLSCERT(exp) &&
5070 (nd->nd_flag & ND_TLSCERT) == 0) ||
5071 (NFSVNO_EXTLSCERTUSER(exp) &&
5072 (nd->nd_flag & ND_TLSCERTUSER) == 0)) {
5073 if ((nd->nd_flag & ND_NFSV4) != 0)
5074 return (NFSERR_WRONGSEC);
5075 #ifdef notnow
5076 /* There is currently no auth_stat for this. */
5077 else if ((nd->nd_flag & ND_TLS) == 0)
5078 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS);
5079 else
5080 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST);
5081 #endif
5082 else
5083 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
5084 }
5085
5086 /*
5087 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use
5088 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
5089 */
5090 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO)
5091 return (0);
5092
5093 /*
5094 * This seems odd, but allow the case where the security flavor
5095 * list is empty. This happens when NFSv4 is traversing non-exported
5096 * file systems. Exported file systems should always have a non-empty
5097 * security flavor list.
5098 */
5099 if (exp->nes_numsecflavor == 0)
5100 return (0);
5101
5102 for (i = 0; i < exp->nes_numsecflavor; i++) {
5103 /*
5104 * The tests for privacy and integrity must be first,
5105 * since ND_GSS is set for everything but AUTH_SYS.
5106 */
5107 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
5108 (nd->nd_flag & ND_GSSPRIVACY))
5109 return (0);
5110 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
5111 (nd->nd_flag & ND_GSSINTEGRITY))
5112 return (0);
5113 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
5114 (nd->nd_flag & ND_GSS))
5115 return (0);
5116 if (exp->nes_secflavors[i] == AUTH_SYS &&
5117 (nd->nd_flag & ND_GSS) == 0)
5118 return (0);
5119 }
5120 if ((nd->nd_flag & ND_NFSV4) != 0)
5121 return (NFSERR_WRONGSEC);
5122 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
5123 }
5124
5125 /*
5126 * Calculate a hash value for the fid in a file handle.
5127 */
5128 uint32_t
nfsrv_hashfh(fhandle_t * fhp)5129 nfsrv_hashfh(fhandle_t *fhp)
5130 {
5131 uint32_t hashval;
5132
5133 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
5134 return (hashval);
5135 }
5136
5137 /*
5138 * Calculate a hash value for the sessionid.
5139 */
5140 uint32_t
nfsrv_hashsessionid(uint8_t * sessionid)5141 nfsrv_hashsessionid(uint8_t *sessionid)
5142 {
5143 uint32_t hashval;
5144
5145 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
5146 return (hashval);
5147 }
5148
5149 /*
5150 * Signal the userland master nfsd to backup the stable restart file.
5151 */
5152 void
nfsrv_backupstable(void)5153 nfsrv_backupstable(void)
5154 {
5155 struct proc *procp;
5156
5157 if (VNET(nfsd_master_proc) != NULL) {
5158 procp = pfind(nfsd_master_pid);
5159 /* Try to make sure it is the correct process. */
5160 if (procp == VNET(nfsd_master_proc) &&
5161 procp->p_stats->p_start.tv_sec ==
5162 nfsd_master_start.tv_sec &&
5163 procp->p_stats->p_start.tv_usec ==
5164 nfsd_master_start.tv_usec &&
5165 strcmp(procp->p_comm, nfsd_master_comm) == 0)
5166 kern_psignal(procp, SIGUSR2);
5167 else
5168 VNET(nfsd_master_proc) = NULL;
5169
5170 if (procp != NULL)
5171 PROC_UNLOCK(procp);
5172 }
5173 }
5174
5175 /*
5176 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
5177 * The arguments are in a structure, so that they can be passed through
5178 * taskqueue for a kernel process to execute this function.
5179 */
5180 struct nfsrvdscreate {
5181 int done;
5182 int inprog;
5183 struct task tsk;
5184 struct ucred *tcred;
5185 struct vnode *dvp;
5186 NFSPROC_T *p;
5187 struct pnfsdsfile *pf;
5188 int err;
5189 fhandle_t fh;
5190 struct vattr va;
5191 struct vattr createva;
5192 };
5193
5194 int
nfsrv_dscreate(struct vnode * dvp,struct vattr * vap,struct vattr * nvap,fhandle_t * fhp,struct pnfsdsfile * pf,struct pnfsdsattr * dsa,char * fnamep,struct ucred * tcred,NFSPROC_T * p,struct vnode ** nvpp)5195 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
5196 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
5197 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
5198 {
5199 struct vnode *nvp;
5200 struct nameidata named;
5201 struct vattr va;
5202 char *bufp;
5203 u_long *hashp;
5204 struct nfsnode *np;
5205 struct nfsmount *nmp;
5206 int error;
5207
5208 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
5209 LOCKPARENT | LOCKLEAF | NOCACHE);
5210 nfsvno_setpathbuf(&named, &bufp, &hashp);
5211 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
5212 named.ni_cnd.cn_nameptr = bufp;
5213 if (fnamep != NULL) {
5214 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
5215 named.ni_cnd.cn_namelen = strlen(bufp);
5216 } else
5217 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
5218 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
5219
5220 /* Create the date file in the DS mount. */
5221 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
5222 if (error == 0) {
5223 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
5224 vref(dvp);
5225 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false);
5226 if (error == 0) {
5227 /* Set the ownership of the file. */
5228 error = VOP_SETATTR(nvp, nvap, tcred);
5229 NFSD_DEBUG(4, "nfsrv_dscreate:"
5230 " setattr-uid=%d\n", error);
5231 if (error != 0)
5232 vput(nvp);
5233 }
5234 if (error != 0)
5235 printf("pNFS: pnfscreate failed=%d\n", error);
5236 } else
5237 printf("pNFS: pnfscreate vnlock=%d\n", error);
5238 if (error == 0) {
5239 np = VTONFS(nvp);
5240 nmp = VFSTONFS(nvp->v_mount);
5241 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
5242 != 0 || nmp->nm_nam->sa_len > sizeof(
5243 struct sockaddr_in6) ||
5244 np->n_fhp->nfh_len != NFSX_MYFH) {
5245 printf("Bad DS file: fstype=%s salen=%d"
5246 " fhlen=%d\n",
5247 nvp->v_mount->mnt_vfc->vfc_name,
5248 nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
5249 error = ENOENT;
5250 }
5251
5252 /* Set extattrs for the DS on the MDS file. */
5253 if (error == 0) {
5254 if (dsa != NULL) {
5255 error = VOP_GETATTR(nvp, &va, tcred);
5256 if (error == 0) {
5257 dsa->dsa_filerev = va.va_filerev;
5258 dsa->dsa_size = va.va_size;
5259 dsa->dsa_atime = va.va_atime;
5260 dsa->dsa_mtime = va.va_mtime;
5261 dsa->dsa_bytes = va.va_bytes;
5262 }
5263 }
5264 if (error == 0) {
5265 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
5266 NFSX_MYFH);
5267 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
5268 nmp->nm_nam->sa_len);
5269 NFSBCOPY(named.ni_cnd.cn_nameptr,
5270 pf->dsf_filename,
5271 sizeof(pf->dsf_filename));
5272 }
5273 } else
5274 printf("pNFS: pnfscreate can't get DS"
5275 " attr=%d\n", error);
5276 if (nvpp != NULL && error == 0)
5277 *nvpp = nvp;
5278 else
5279 vput(nvp);
5280 }
5281 nfsvno_relpathbuf(&named);
5282 return (error);
5283 }
5284
5285 /*
5286 * Start up the thread that will execute nfsrv_dscreate().
5287 */
5288 static void
start_dscreate(void * arg,int pending)5289 start_dscreate(void *arg, int pending)
5290 {
5291 struct nfsrvdscreate *dsc;
5292
5293 dsc = (struct nfsrvdscreate *)arg;
5294 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
5295 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
5296 dsc->done = 1;
5297 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
5298 }
5299
5300 /*
5301 * Create a pNFS data file on the Data Server(s).
5302 */
5303 static void
nfsrv_pnfscreate(struct vnode * vp,struct vattr * vap,struct ucred * cred,NFSPROC_T * p)5304 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
5305 NFSPROC_T *p)
5306 {
5307 struct nfsrvdscreate *dsc, *tdsc = NULL; /* Shut up gcc. */
5308 struct nfsdevice *ds, *tds, *fds;
5309 struct mount *mp;
5310 struct pnfsdsfile *pf, *tpf;
5311 struct pnfsdsattr dsattr;
5312 struct vattr va;
5313 struct vnode **dvp;
5314 struct nfsmount *nmp;
5315 fhandle_t fh;
5316 uid_t vauid;
5317 gid_t vagid;
5318 u_short vamode;
5319 struct ucred *tcred;
5320 int *dsdir, error, i, j, mirrorcnt, ret, stripecnt;
5321 int failpos, timo;
5322 uint64_t stripesiz;
5323
5324 /* Get a DS server directory in a round-robin order. */
5325 mirrorcnt = 1;
5326 mp = vp->v_mount;
5327 ds = fds = NULL;
5328 i = j = 0;
5329 dvp = malloc(sizeof(*dvp) * nfsrv_maxpnfsmirror * nfsrv_maxstripecnt,
5330 M_TEMP, M_WAITOK);
5331 dsdir = malloc(sizeof(*dsdir) * nfsrv_maxpnfsmirror *
5332 nfsrv_maxstripecnt, M_TEMP, M_WAITOK);
5333 stripesiz = nfsrv_stripesiz;
5334 NFSDDSLOCK();
5335 /*
5336 * Search for the first entry that handles this MDS fs, but use the
5337 * first entry for all MDS fs's otherwise.
5338 */
5339 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
5340 if (tds->nfsdev_nmp != NULL) {
5341 i++;
5342 if (tds->nfsdev_mdsisset == 0 && ds == NULL)
5343 ds = tds;
5344 else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
5345 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
5346 if (j == 0) {
5347 ds = fds = tds;
5348 if (nfsrv_maxstripecnt > 1)
5349 stripesiz =
5350 tds->nfsdev_mdsstripesiz;
5351 }
5352 if (stripesiz == 0)
5353 break;
5354 j++;
5355 }
5356 }
5357 }
5358 if (ds == NULL) {
5359 NFSDDSUNLOCK();
5360 free(dvp, M_TEMP);
5361 free(dsdir, M_TEMP);
5362 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
5363 return;
5364 }
5365
5366 /*
5367 * i or j is the count of devices. The stripecnt is that number
5368 * of devices devided by the number of mirrors.
5369 */
5370 stripecnt = 0;
5371 if (stripesiz > 0) {
5372 if (j > 0)
5373 stripecnt = j / nfsrv_maxpnfsmirror;
5374 else
5375 stripecnt = i / nfsrv_maxpnfsmirror;
5376 if (stripecnt > nfsrv_maxstripecnt)
5377 stripecnt = nfsrv_maxstripecnt;
5378 }
5379 if (stripecnt == 0)
5380 stripecnt = 1;
5381
5382 /* Set the first device as found above. */
5383 i = dsdir[0] = ds->nfsdev_nextdir;
5384 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
5385 dvp[0] = ds->nfsdev_dsdir[i];
5386 tds = TAILQ_NEXT(ds, nfsdev_list);
5387 if ((nfsrv_maxpnfsmirror > 1 || stripecnt > 1) && tds != NULL) {
5388 j = 1; /* Stripe number */
5389 mirrorcnt = 0;
5390 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
5391 if (tds->nfsdev_nmp != NULL &&
5392 ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
5393 (tds->nfsdev_mdsisset != 0 && fds != NULL &&
5394 fsidcmp(&mp->mnt_stat.f_fsid,
5395 &tds->nfsdev_mdsfsid) == 0))) {
5396 dsdir[mirrorcnt * stripecnt + j] = i;
5397 dvp[mirrorcnt * stripecnt + j] =
5398 tds->nfsdev_dsdir[i];
5399 j++;
5400 if (j >= stripecnt) {
5401 mirrorcnt++;
5402 if (mirrorcnt >= nfsrv_maxpnfsmirror)
5403 break;
5404 j = 0;
5405 }
5406 }
5407 }
5408 if (mirrorcnt == 0) {
5409 mirrorcnt = 1;
5410 stripecnt = j;
5411 }
5412 } else
5413 stripecnt = 1;
5414 /* Put at end of list to implement round-robin usage. */
5415 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
5416 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
5417 NFSDDSUNLOCK();
5418 dsc = NULL;
5419 j = mirrorcnt * stripecnt;
5420 if (j > 1)
5421 tdsc = dsc = malloc(sizeof(*dsc) * (j - 1), M_TEMP,
5422 M_WAITOK | M_ZERO);
5423 tpf = pf = malloc(sizeof(*pf) * j, M_TEMP, M_WAITOK | M_ZERO);
5424
5425 error = nfsvno_getfh(vp, &fh, p);
5426 if (error == 0)
5427 error = VOP_GETATTR(vp, &va, cred);
5428 if (error == 0) {
5429 /* Set the attributes for "vp" to Setattr the DS vp. */
5430 vauid = va.va_uid;
5431 vagid = va.va_gid;
5432 vamode = va.va_mode;
5433 VATTR_NULL(&va);
5434 va.va_uid = vauid;
5435 va.va_gid = vagid;
5436 va.va_mode = vamode;
5437 va.va_size = 0;
5438 } else
5439 printf("pNFS: pnfscreate getfh+attr=%d\n", error);
5440
5441 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
5442 cred->cr_gid);
5443 /* Make data file name based on FH. */
5444 tcred = newnfs_getcred();
5445
5446 /*
5447 * Create the file on each DS mirror, using kernel process(es) for the
5448 * additional mirrors.
5449 */
5450 failpos = -1;
5451 for (i = 0; i < j - 1 && error == 0; i++, tpf++, tdsc++) {
5452 tpf->dsf_stripecnt = stripecnt;
5453 tpf->dsf_stripesiz = stripesiz;
5454 tpf->dsf_dir = dsdir[i];
5455 tdsc->tcred = tcred;
5456 tdsc->p = p;
5457 tdsc->pf = tpf;
5458 tdsc->createva = *vap;
5459 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
5460 tdsc->va = va;
5461 tdsc->dvp = dvp[i];
5462 tdsc->done = 0;
5463 tdsc->inprog = 0;
5464 tdsc->err = 0;
5465 ret = EIO;
5466 if (nfs_pnfsiothreads != 0) {
5467 ret = nfs_pnfsio(start_dscreate, tdsc);
5468 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
5469 }
5470 if (ret != 0) {
5471 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
5472 NULL, tcred, p, NULL);
5473 if (ret != 0) {
5474 KASSERT(error == 0, ("nfsrv_dscreate err=%d",
5475 error));
5476 if (failpos == -1 && nfsds_failerr(ret))
5477 failpos = i;
5478 else
5479 error = ret;
5480 }
5481 }
5482 }
5483 if (error == 0) {
5484 tpf->dsf_stripecnt = stripecnt;
5485 tpf->dsf_stripesiz = stripesiz;
5486 tpf->dsf_dir = dsdir[j - 1];
5487 error = nfsrv_dscreate(dvp[j - 1], vap, &va, &fh, tpf,
5488 &dsattr, NULL, tcred, p, NULL);
5489 if (failpos == -1 && j > 1 && nfsds_failerr(error)) {
5490 failpos = j - 1;
5491 error = 0;
5492 }
5493 }
5494 timo = hz / 50; /* Wait for 20msec. */
5495 if (timo < 1)
5496 timo = 1;
5497 /* Wait for kernel task(s) to complete. */
5498 for (tdsc = dsc, i = 0; i < j - 1; i++, tdsc++) {
5499 while (tdsc->inprog != 0 && tdsc->done == 0)
5500 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
5501 if (tdsc->err != 0) {
5502 if (failpos == -1 && nfsds_failerr(tdsc->err))
5503 failpos = i;
5504 else if (error == 0)
5505 error = tdsc->err;
5506 }
5507 }
5508
5509 /*
5510 * If failpos has been set, that DS has failed, so it needs
5511 * to be disabled.
5512 */
5513 if (failpos >= 0) {
5514 nmp = VFSTONFS(dvp[failpos]->v_mount);
5515 NFSLOCKMNT(nmp);
5516 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
5517 NFSMNTP_CANCELRPCS)) == 0) {
5518 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
5519 NFSUNLOCKMNT(nmp);
5520 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
5521 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
5522 ds);
5523 if (ds != NULL)
5524 nfsrv_killrpcs(nmp);
5525 NFSLOCKMNT(nmp);
5526 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
5527 wakeup(nmp);
5528 }
5529 NFSUNLOCKMNT(nmp);
5530 }
5531
5532 NFSFREECRED(tcred);
5533 if (error == 0) {
5534 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
5535 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d stripecnt=%d\n",
5536 mirrorcnt, stripecnt);
5537
5538 error = vn_extattr_set(vp, IO_NODELOCKED,
5539 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
5540 sizeof(*pf) * mirrorcnt * stripecnt, (char *)pf, p);
5541 if (error == 0)
5542 error = vn_extattr_set(vp, IO_NODELOCKED,
5543 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
5544 sizeof(dsattr), (char *)&dsattr, p);
5545 if (error != 0)
5546 printf("pNFS: pnfscreate setextattr=%d\n",
5547 error);
5548 } else
5549 printf("pNFS: pnfscreate=%d\n", error);
5550 free(dvp, M_TEMP);
5551 free(dsdir, M_TEMP);
5552 free(pf, M_TEMP);
5553 free(dsc, M_TEMP);
5554 }
5555
5556 /*
5557 * Get the information needed to remove the pNFS Data Server file from the
5558 * Metadata file. Upon success, *dvppp is set to an array of locked
5559 * DS directory vnode(s). The caller must unlock this array of *dvp when done
5560 * with it.
5561 */
5562 static void
nfsrv_pnfsremovesetup(struct vnode * vp,NFSPROC_T * p,struct vnode *** dvppp,int * dsfilecntp,char * fname,fhandle_t * fhp)5563 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode ***dvppp,
5564 int *dsfilecntp, char *fname, fhandle_t *fhp)
5565 {
5566 struct vattr va;
5567 struct ucred *tcred;
5568 char *buf;
5569 int buflen, error;
5570
5571 *dvppp = NULL;
5572 /* If not an exported regular file or not a pNFS server, just return. */
5573 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
5574 nfsrv_devidcnt == 0)
5575 return;
5576
5577 /* Check to see if this is the last hard link. */
5578 tcred = newnfs_getcred();
5579 error = VOP_GETATTR(vp, &va, tcred);
5580 NFSFREECRED(tcred);
5581 if (error != 0) {
5582 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
5583 return;
5584 }
5585 if (va.va_nlink > 1)
5586 return;
5587
5588 error = nfsvno_getfh(vp, fhp, p);
5589 if (error != 0) {
5590 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
5591 return;
5592 }
5593
5594 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS *
5595 NFSDEV_MAXSTRIPE;
5596 buf = malloc(buflen, M_TEMP, M_WAITOK);
5597 /* Get the directory vnode for the DS mount and the file handle. */
5598 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, dsfilecntp, NULL, NULL,
5599 p, dvppp, NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
5600 free(buf, M_TEMP);
5601 if (error != 0)
5602 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
5603 }
5604
5605 /*
5606 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
5607 * The arguments are in a structure, so that they can be passed through
5608 * taskqueue for a kernel process to execute this function.
5609 */
5610 struct nfsrvdsremove {
5611 int done;
5612 int inprog;
5613 struct task tsk;
5614 struct ucred *tcred;
5615 struct vnode *dvp;
5616 NFSPROC_T *p;
5617 int err;
5618 char fname[PNFS_FILENAME_LEN + 1];
5619 };
5620
5621 static int
nfsrv_dsremove(struct vnode * dvp,char * fname,struct ucred * tcred,NFSPROC_T * p)5622 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
5623 NFSPROC_T *p)
5624 {
5625 struct nameidata named;
5626 struct vnode *nvp;
5627 char *bufp;
5628 u_long *hashp;
5629 int error;
5630
5631 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
5632 if (error != 0)
5633 return (error);
5634 named.ni_cnd.cn_nameiop = DELETE;
5635 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
5636 named.ni_cnd.cn_cred = tcred;
5637 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
5638 nfsvno_setpathbuf(&named, &bufp, &hashp);
5639 named.ni_cnd.cn_nameptr = bufp;
5640 named.ni_cnd.cn_namelen = strlen(fname);
5641 strlcpy(bufp, fname, NAME_MAX);
5642 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
5643 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
5644 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
5645 if (error == 0) {
5646 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
5647 vput(nvp);
5648 }
5649 NFSVOPUNLOCK(dvp);
5650 nfsvno_relpathbuf(&named);
5651 if (error != 0)
5652 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
5653 return (error);
5654 }
5655
5656 /*
5657 * Start up the thread that will execute nfsrv_dsremove().
5658 */
5659 static void
start_dsremove(void * arg,int pending)5660 start_dsremove(void *arg, int pending)
5661 {
5662 struct nfsrvdsremove *dsrm;
5663
5664 dsrm = (struct nfsrvdsremove *)arg;
5665 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
5666 dsrm->p);
5667 dsrm->done = 1;
5668 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
5669 }
5670
5671 /*
5672 * Remove a pNFS data file from a Data Server.
5673 * nfsrv_pnfsremovesetup() must have been called before the MDS file was
5674 * removed to set up the dvp and fill in the FH.
5675 */
5676 static void
nfsrv_pnfsremove(struct vnode ** dvpp,int dsfilecnt,char * fname,fhandle_t * fhp,NFSPROC_T * p)5677 nfsrv_pnfsremove(struct vnode **dvpp, int dsfilecnt, char *fname,
5678 fhandle_t *fhp, NFSPROC_T *p)
5679 {
5680 struct ucred *tcred;
5681 struct nfsrvdsremove *dsrm, *tdsrm = NULL; /* Shut up gcc. */
5682 struct nfsdevice *ds;
5683 struct nfsmount *nmp;
5684 struct vnode **tdvpp;
5685 int failpos, i, ret, timo;
5686
5687 tcred = newnfs_getcred();
5688 dsrm = NULL;
5689 if (dsfilecnt > 1)
5690 tdsrm = dsrm = malloc(sizeof(*dsrm) * dsfilecnt - 1, M_TEMP,
5691 M_WAITOK);
5692 /*
5693 * Remove the file on each DS mirror, using kernel process(es) for the
5694 * additional mirrors.
5695 */
5696 failpos = -1;
5697 tdvpp = dvpp;
5698 for (i = 0; i < dsfilecnt - 1; i++, tdsrm++, tdvpp++) {
5699 tdsrm->tcred = tcred;
5700 tdsrm->p = p;
5701 tdsrm->dvp = *tdvpp;
5702 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
5703 tdsrm->inprog = 0;
5704 tdsrm->done = 0;
5705 tdsrm->err = 0;
5706 ret = EIO;
5707 if (nfs_pnfsiothreads != 0) {
5708 ret = nfs_pnfsio(start_dsremove, tdsrm);
5709 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
5710 }
5711 if (ret != 0) {
5712 ret = nfsrv_dsremove(tdsrm->dvp, fname, tcred, p);
5713 if (failpos == -1 && nfsds_failerr(ret))
5714 failpos = i;
5715 }
5716 }
5717 ret = nfsrv_dsremove(*tdvpp, fname, tcred, p);
5718 if (failpos == -1 && dsfilecnt > 1 && nfsds_failerr(ret))
5719 failpos = dsfilecnt - 1;
5720 timo = hz / 50; /* Wait for 20msec. */
5721 if (timo < 1)
5722 timo = 1;
5723 /* Wait for kernel task(s) to complete. */
5724 for (tdsrm = dsrm, i = 0; i < dsfilecnt - 1; i++, tdsrm++) {
5725 while (tdsrm->inprog != 0 && tdsrm->done == 0)
5726 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
5727 if (failpos == -1 && nfsds_failerr(tdsrm->err))
5728 failpos = i;
5729 }
5730
5731 /*
5732 * If failpos has been set, that mirror has failed, so it needs
5733 * to be disabled.
5734 */
5735 if (failpos >= 0) {
5736 tdvpp = dvpp + failpos;
5737 nmp = VFSTONFS((*tdvpp)->v_mount);
5738 NFSLOCKMNT(nmp);
5739 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
5740 NFSMNTP_CANCELRPCS)) == 0) {
5741 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
5742 NFSUNLOCKMNT(nmp);
5743 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
5744 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
5745 ds);
5746 if (ds != NULL)
5747 nfsrv_killrpcs(nmp);
5748 NFSLOCKMNT(nmp);
5749 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
5750 wakeup(nmp);
5751 }
5752 NFSUNLOCKMNT(nmp);
5753 }
5754
5755 /* Get rid all layouts for the file. */
5756 nfsrv_freefilelayouts(fhp);
5757
5758 NFSFREECRED(tcred);
5759 free(dsrm, M_TEMP);
5760 }
5761
5762 /*
5763 * Generate a file name based on the file handle and put it in *bufp.
5764 * Return the number of bytes generated.
5765 */
5766 static int
nfsrv_putfhname(fhandle_t * fhp,char * bufp)5767 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
5768 {
5769 int i;
5770 uint8_t *cp;
5771 const uint8_t *hexdigits = "0123456789abcdef";
5772
5773 cp = (uint8_t *)fhp;
5774 for (i = 0; i < sizeof(*fhp); i++) {
5775 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
5776 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
5777 }
5778 bufp[2 * i] = '\0';
5779 return (2 * i);
5780 }
5781
5782 /*
5783 * Update the Metadata file's attributes from the DS file when a Read/Write
5784 * layout is returned.
5785 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
5786 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
5787 */
5788 int
nfsrv_updatemdsattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)5789 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
5790 {
5791 struct ucred *tcred;
5792 int error;
5793
5794 /* Do this as root so that it won't fail with EACCES. */
5795 tcred = newnfs_getcred();
5796 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
5797 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
5798 NFSFREECRED(tcred);
5799 return (error);
5800 }
5801
5802 /*
5803 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
5804 */
5805 static int
nfsrv_dssetacl(struct vnode * vp,struct acl * aclp,struct ucred * cred,NFSPROC_T * p)5806 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
5807 NFSPROC_T *p)
5808 {
5809 int error;
5810
5811 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
5812 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
5813 return (error);
5814 }
5815
5816 static int
nfsrv_proxyds(struct vnode * vp,off_t off,int cnt,struct ucred * cred,struct thread * p,int ioproc,struct mbuf ** mpp,char * cp,struct mbuf ** mpp2,struct nfsvattr * nap,struct acl * aclp,off_t * offp,int content,bool * eofp)5817 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
5818 struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
5819 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
5820 off_t *offp, int content, bool *eofp)
5821 {
5822 struct nfsmount **nmp, *failnmp;
5823 fhandle_t *fhp;
5824 struct vnode **dvp;
5825 struct nfsdevice *ds;
5826 struct pnfsdsattr dsattr;
5827 struct opnfsdsattr odsattr;
5828 char *buf;
5829 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
5830 int stripecnt;
5831 uint64_t stripesiz;
5832
5833 NFSD_DEBUG(4, "in nfsrv_proxyds\n");
5834 /*
5835 * If not a regular file, not exported or not a pNFS server,
5836 * just return ENOENT.
5837 */
5838 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
5839 nfsrv_devidcnt == 0)
5840 return (ENOENT);
5841
5842 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS *
5843 NFSDEV_MAXSTRIPE;
5844 buf = malloc(buflen, M_TEMP, M_WAITOK);
5845 error = 0;
5846
5847 /*
5848 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
5849 * from the MetaData file's extended attribute.
5850 */
5851 if (ioproc == NFSPROC_GETATTR) {
5852 error = vn_extattr_get(vp, IO_NODELOCKED,
5853 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
5854 p);
5855 if (error == 0) {
5856 if (buflen == sizeof(odsattr)) {
5857 NFSBCOPY(buf, &odsattr, buflen);
5858 nap->na_filerev = odsattr.dsa_filerev;
5859 nap->na_size = odsattr.dsa_size;
5860 nap->na_atime = odsattr.dsa_atime;
5861 nap->na_mtime = odsattr.dsa_mtime;
5862 /*
5863 * Fake na_bytes by rounding up na_size.
5864 * Since we don't know the block size, just
5865 * use BLKDEV_IOSIZE.
5866 */
5867 nap->na_bytes = (odsattr.dsa_size +
5868 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
5869 } else if (buflen == sizeof(dsattr)) {
5870 NFSBCOPY(buf, &dsattr, buflen);
5871 nap->na_filerev = dsattr.dsa_filerev;
5872 nap->na_size = dsattr.dsa_size;
5873 nap->na_atime = dsattr.dsa_atime;
5874 nap->na_mtime = dsattr.dsa_mtime;
5875 nap->na_bytes = dsattr.dsa_bytes;
5876 } else
5877 error = ENXIO;
5878 }
5879 if (error == 0) {
5880 /*
5881 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
5882 * returns 0, just return now. nfsrv_checkdsattr()
5883 * returns 0 if there is no Read/Write layout
5884 * plus either an Open/Write_access or Write
5885 * delegation issued to a client for the file.
5886 */
5887 if (nfsrv_pnfsgetdsattr == 0 ||
5888 nfsrv_checkdsattr(vp, p) == 0) {
5889 free(buf, M_TEMP);
5890 return (error);
5891 }
5892 }
5893
5894 /*
5895 * Clear ENOATTR so the code below will attempt to do a
5896 * nfsrv_getattrdsrpc() to get the attributes and (re)create
5897 * the extended attribute.
5898 */
5899 if (error == ENOATTR)
5900 error = 0;
5901 }
5902
5903 origmircnt = -1;
5904 trycnt = 0;
5905 tryagain:
5906 nmp = NULL;
5907 dvp = NULL;
5908 fhp = NULL;
5909 if (error == 0) {
5910 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS *
5911 NFSDEV_MAXSTRIPE;
5912 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
5913 LK_EXCLUSIVE)
5914 printf("nfsrv_proxyds: Readds vp exclusively locked\n");
5915 if (ioproc == NFSPROC_WRITEDS && NFSVOPISLOCKED(vp) ==
5916 LK_SHARED)
5917 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
5918 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
5919 &mirrorcnt, &stripecnt, &stripesiz, p, &dvp, &fhp, NULL,
5920 NULL, NULL, NULL, NULL, NULL, NULL);
5921 if (error == 0) {
5922 nmp = malloc(sizeof(*nmp) * mirrorcnt * stripecnt,
5923 M_TEMP, M_WAITOK);
5924 for (i = 0; i < mirrorcnt * stripecnt; i++)
5925 if (dvp[i] != NULL)
5926 nmp[i] = VFSTONFS(dvp[i]->v_mount);
5927 else
5928 nmp[i] = NULL;
5929 } else
5930 printf("pNFS: proxy getextattr sockaddr=%d\n", error);
5931 } else
5932 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
5933 if (error == 0) {
5934 if (origmircnt == -1)
5935 origmircnt = mirrorcnt;
5936 /*
5937 * If failpos is set to a mirror#, then that mirror has
5938 * failed and will be disabled. For Read, Getattr and Seek, the
5939 * function only tries one mirror, so if that mirror has
5940 * failed, it will need to be retried. As such, increment
5941 * tryitagain for these cases.
5942 * For Write, Setattr and Setacl, the function tries all
5943 * mirrors and will not return an error for the case where
5944 * one mirror has failed. For these cases, the functioning
5945 * mirror(s) will have been modified, so a retry isn't
5946 * necessary. These functions will set failpos for the
5947 * failed mirror#.
5948 */
5949 if (ioproc == NFSPROC_READDS) {
5950 error = nfsrv_readdsrpc(fhp, off, cnt, cred, p, nmp,
5951 mirrorcnt, stripecnt, stripesiz, mpp, mpp2,
5952 &failpos);
5953 if (failpos >= 0 && mirrorcnt > 1) {
5954 /*
5955 * Setting failpos will cause the mirror
5956 * to be disabled and then a retry of this
5957 * read is required.
5958 */
5959 error = 0;
5960 trycnt++;
5961 }
5962 } else if (ioproc == NFSPROC_WRITEDS)
5963 error = nfsrv_writedsrpc(fhp, off, cnt, cred, p, vp,
5964 nmp, mirrorcnt, stripecnt, stripesiz, mpp, cp,
5965 &failpos);
5966 else if (ioproc == NFSPROC_SETATTR)
5967 error = nfsrv_setattrdsrpc(fhp, vp, cred, p, nmp,
5968 mirrorcnt, stripecnt, nap, &failpos);
5969 #ifdef notnow
5970 else if (ioproc == NFSPROC_SETACL)
5971 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
5972 mirrorcnt, aclp, &failpos);
5973 else if (ioproc == NFSPROC_SEEKDS) {
5974 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
5975 p, nmp[0]);
5976 if (nfsds_failerr(error) && mirrorcnt > 1) {
5977 /*
5978 * Setting failpos will cause the mirror
5979 * to be disabled and then a retry of this
5980 * read is required.
5981 */
5982 failpos = 0;
5983 error = 0;
5984 trycnt++;
5985 }
5986 } else if (ioproc == NFSPROC_ALLOCATE)
5987 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
5988 &nmp[0], mirrorcnt, &failpos);
5989 else if (ioproc == NFSPROC_DEALLOCATE)
5990 error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
5991 vp, &nmp[0], mirrorcnt, &failpos);
5992 #endif
5993 else {
5994 error = nfsrv_getattrdsrpc(fhp, vp, cred, p,
5995 nmp, stripecnt, nap, &failpos);
5996 if (failpos >= 0 && mirrorcnt > 1) {
5997 /*
5998 * Setting failpos will cause the mirror
5999 * to be disabled and then a retry of this
6000 * getattr is required.
6001 */
6002 error = 0;
6003 trycnt++;
6004 }
6005 }
6006 ds = NULL;
6007 if (failpos >= 0) {
6008 failnmp = nmp[failpos];
6009 NFSLOCKMNT(failnmp);
6010 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
6011 NFSMNTP_CANCELRPCS)) == 0) {
6012 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
6013 NFSUNLOCKMNT(failnmp);
6014 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
6015 failnmp, p);
6016 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
6017 failpos, ds);
6018 if (ds != NULL)
6019 nfsrv_killrpcs(failnmp);
6020 NFSLOCKMNT(failnmp);
6021 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
6022 wakeup(failnmp);
6023 }
6024 NFSUNLOCKMNT(failnmp);
6025 }
6026 for (i = 0; i < mirrorcnt * stripecnt; i++)
6027 if (dvp[i] != NULL)
6028 NFSVOPUNLOCK(dvp[i]);
6029 free(dvp, M_TEMP);
6030 free(nmp, M_TEMP);
6031 free(fhp, M_TEMP);
6032 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
6033 trycnt);
6034 /* Try the Read/Getattr again if a mirror was deleted. */
6035 if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
6036 goto tryagain;
6037 } else {
6038 /* Return ENOENT for any Extended Attribute error. */
6039 error = ENOENT;
6040 }
6041 free(buf, M_TEMP);
6042 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
6043 return (error);
6044 }
6045
6046 /*
6047 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
6048 * attribute.
6049 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
6050 * to be checked. If it points to a NULL nmp, then it returns
6051 * a suitable destination.
6052 * curnmp - If non-NULL, it is the source mount for the copy.
6053 */
6054 int
nfsrv_dsgetsockmnt(struct vnode * vp,int lktype,char * buf,int * buflenp,int * mirrorcntp,int * stripecntp,uint64_t * stripesizp,NFSPROC_T * p,struct vnode *** dvppp,fhandle_t ** fhpp,char ** devid,char * fnamep,struct vnode ** nvpp,struct nfsmount ** newnmpp,struct nfsmount * curnmp,int * ippos,int * dsdirp)6055 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
6056 int *mirrorcntp, int *stripecntp, uint64_t *stripesizp, NFSPROC_T *p,
6057 struct vnode ***dvppp, fhandle_t **fhpp,
6058 char **devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
6059 struct nfsmount *curnmp, int *ippos, int *dsdirp)
6060 {
6061 struct vnode *dvp, *nvp = NULL, **tdvpp = NULL; /* Shut up gcc. */
6062 struct mount *mp;
6063 struct nfsmount *nmp, *newnmp;
6064 fhandle_t *tfhp = NULL; /* Shut up gcc. */
6065 struct sockaddr *sad;
6066 struct sockaddr_in *sin;
6067 struct nfsdevice *ds, *tds, *fndds;
6068 struct pnfsdsfile *pf;
6069 struct opnfsdsfile *opf;
6070 uint32_t dsdir;
6071 int error, fhiszero, fnd, gotmirror, gotone, i, j, k, l, m, mirrorcnt;
6072 char *tdevid;
6073 bool dvplocked;
6074
6075 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
6076 dvplocked = false;
6077 if (mirrorcntp != NULL)
6078 *mirrorcntp = 1;
6079 if (stripecntp != NULL)
6080 *stripecntp = 1;
6081 if (stripesizp != NULL)
6082 *stripesizp = 0;
6083 if (nvpp != NULL)
6084 *nvpp = NULL;
6085 if (dvppp != NULL)
6086 *dvppp = NULL;
6087 if (fhpp != NULL)
6088 *fhpp = NULL;
6089 if (devid != NULL)
6090 *devid = NULL;
6091 tdevid = NULL;
6092 if (ippos != NULL)
6093 *ippos = -1;
6094 if (newnmpp != NULL)
6095 newnmp = *newnmpp;
6096 else
6097 newnmp = NULL;
6098 if (fnamep != NULL)
6099 fnamep[0] = '\0';
6100 mp = vp->v_mount;
6101 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
6102 "pnfsd.dsfile", buflenp, buf, p);
6103 if (error == 0 && *buflenp > 0) {
6104 j = *buflenp / sizeof(*pf);
6105 if (*buflenp != sizeof(*pf) * j) {
6106 /* Try opnfsdsfile. */
6107 j = *buflenp / sizeof(*opf);
6108 if (j >= 1 && *buflenp == sizeof(*opf) * j) {
6109 char *tbuf;
6110
6111 tbuf = malloc(*buflenp, M_TEMP, M_WAITOK);
6112 memcpy(tbuf, buf, *buflenp);
6113 pf = (struct pnfsdsfile *)buf;
6114 opf = (struct opnfsdsfile *)tbuf;
6115 for (k = 0; k < j; k++, pf++, opf++) {
6116 memcpy(&pf->dsf_fh, opf, sizeof(*opf));
6117 pf->dsf_stripecnt = 1;
6118 pf->dsf_stripesiz = 0;
6119 }
6120 free(tbuf, M_TEMP);
6121 } else
6122 error = ENOATTR;
6123 } else if (j < 1)
6124 error = ENOATTR;
6125 } else if (error == 0)
6126 error = ENOATTR;
6127 if (error != 0)
6128 return (error);
6129
6130 pf = (struct pnfsdsfile *)buf;
6131 if (pf->dsf_stripesiz > 0) {
6132 mirrorcnt = j / pf->dsf_stripecnt;
6133 k = pf->dsf_stripecnt;
6134 } else {
6135 mirrorcnt = j;
6136 k = 1;
6137 }
6138 if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
6139 k < 1 || k > NFSDEV_MAXSTRIPE || j != mirrorcnt * k)
6140 return (ENOATTR);
6141 if (stripecntp != NULL)
6142 *stripecntp = k;
6143 if (stripesizp != NULL)
6144 *stripesizp = pf->dsf_stripesiz;
6145
6146 /* Allocate a large enough array for dvppp, if required. */
6147 if (dvppp != NULL)
6148 tdvpp = *dvppp = malloc(sizeof(*tdvpp) * mirrorcnt * k, M_TEMP,
6149 M_WAITOK | M_ZERO);
6150 if (fhpp != NULL)
6151 tfhp = *fhpp = malloc(sizeof(*tfhp) * mirrorcnt * k, M_TEMP,
6152 M_WAITOK);
6153 if (devid != NULL)
6154 tdevid = *devid = malloc(NFSX_V4DEVICEID * mirrorcnt * k,
6155 M_TEMP, M_WAITOK);
6156
6157 /* If curnmp != NULL, check for a match in the mirror list. */
6158 if (curnmp != NULL) {
6159 fnd = 0;
6160 for (i = 0; i < j; i += k, pf += k) {
6161 sad = (struct sockaddr *)&pf->dsf_sin;
6162 if (nfsaddr2_match(sad, curnmp->nm_nam)) {
6163 if (ippos != NULL)
6164 *ippos = i;
6165 fnd = 1;
6166 break;
6167 }
6168 }
6169 if (fnd == 0)
6170 error = ENXIO;
6171 }
6172
6173 gotmirror = gotone = 0;
6174 l = 0; /* Index for tdvpp and tfhp. */
6175 pf = (struct pnfsdsfile *)buf;
6176 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d stripecnt=%d err=%d\n",
6177 mirrorcnt, k, error);
6178 for (i = 0; i < j && error == 0; i++, pf++) {
6179 fhiszero = 0;
6180 sad = (struct sockaddr *)&pf->dsf_sin;
6181 sin = &pf->dsf_sin;
6182 dsdir = pf->dsf_dir;
6183 if (dsdir >= nfsrv_dsdirsize) {
6184 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
6185 error = ENOATTR;
6186 } else if (nvpp != NULL && newnmp != NULL &&
6187 nfsaddr2_match(sad, newnmp->nm_nam))
6188 error = EEXIST;
6189 if (error == 0) {
6190 if (ippos != NULL && curnmp == NULL &&
6191 sad->sa_family == AF_INET &&
6192 sin->sin_addr.s_addr == 0)
6193 *ippos = i;
6194 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
6195 fhiszero = 1;
6196 /* Use the socket address to find the mount point. */
6197 fndds = NULL;
6198 NFSDDSLOCK();
6199 /* Find a match for the IP address. */
6200 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6201 if (ds->nfsdev_nmp != NULL) {
6202 dvp = ds->nfsdev_dvp;
6203 nmp = VFSTONFS(dvp->v_mount);
6204 if (nmp != ds->nfsdev_nmp)
6205 printf("different2 nmp %p %p\n",
6206 nmp, ds->nfsdev_nmp);
6207 if (nfsaddr2_match(sad, nmp->nm_nam)) {
6208 fndds = ds;
6209 break;
6210 }
6211 }
6212 }
6213 if (fndds != NULL && newnmpp != NULL &&
6214 newnmp == NULL) {
6215 /* Search for a place to make a mirror copy. */
6216 TAILQ_FOREACH(tds, &nfsrv_devidhead,
6217 nfsdev_list) {
6218 if (tds->nfsdev_nmp != NULL &&
6219 fndds != tds &&
6220 ((tds->nfsdev_mdsisset == 0 &&
6221 fndds->nfsdev_mdsisset == 0) ||
6222 (tds->nfsdev_mdsisset != 0 &&
6223 fndds->nfsdev_mdsisset != 0 &&
6224 fsidcmp(&tds->nfsdev_mdsfsid,
6225 &mp->mnt_stat.f_fsid) == 0))) {
6226 *newnmpp = tds->nfsdev_nmp;
6227 break;
6228 }
6229 }
6230 if (tds != NULL) {
6231 /*
6232 * Move this entry to the end of the
6233 * list, so it won't be selected as
6234 * easily the next time.
6235 */
6236 TAILQ_REMOVE(&nfsrv_devidhead, tds,
6237 nfsdev_list);
6238 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
6239 nfsdev_list);
6240 }
6241 }
6242 NFSDDSUNLOCK();
6243 if (fndds != NULL) {
6244 dvp = fndds->nfsdev_dsdir[dsdir];
6245 if (lktype != 0 || fhiszero != 0 ||
6246 (nvpp != NULL && *nvpp == NULL)) {
6247 dvplocked = true;
6248 if (fhiszero != 0)
6249 error = vn_lock(dvp,
6250 LK_EXCLUSIVE);
6251 else if (lktype != 0)
6252 error = vn_lock(dvp, lktype);
6253 else
6254 error = vn_lock(dvp, LK_SHARED);
6255 /*
6256 * If the file handle is all 0's, try to
6257 * do a Lookup against the DS to acquire
6258 * it.
6259 * If dvpp == NULL or the Lookup fails,
6260 * unlock dvp after the call.
6261 */
6262 if (error == 0 && (fhiszero != 0 ||
6263 (nvpp != NULL && *nvpp == NULL))) {
6264 error = nfsrv_pnfslookupds(vp,
6265 dvp, pf, &nvp, p);
6266 if (error == 0) {
6267 if (fhiszero != 0)
6268 nfsrv_pnfssetfh(
6269 vp, pf,
6270 devid,
6271 fnamep,
6272 nvp, p);
6273 if (nvpp != NULL &&
6274 *nvpp == NULL) {
6275 *nvpp = nvp;
6276 *dsdirp = dsdir;
6277 } else
6278 vput(nvp);
6279 }
6280 if (error != 0 || lktype == 0)
6281 NFSVOPUNLOCK(dvp);
6282 }
6283 }
6284 if (error == 0) {
6285 gotone++;
6286 NFSD_DEBUG(4, "gotone=%d\n", gotone);
6287 if (tdevid != NULL) {
6288 NFSBCOPY(fndds->nfsdev_deviceid,
6289 tdevid, NFSX_V4DEVICEID);
6290 tdevid += NFSX_V4DEVICEID;
6291 }
6292 if (dvppp != NULL)
6293 tdvpp[l] = dvp;
6294 if (fhpp != NULL)
6295 NFSBCOPY(&pf->dsf_fh, &tfhp[l],
6296 NFSX_MYFH);
6297 if (dvppp != NULL || fhpp != NULL) {
6298 l++;
6299 if (l % k == 0)
6300 gotmirror++;
6301 }
6302 if (fnamep != NULL && gotone == 1)
6303 strlcpy(fnamep,
6304 pf->dsf_filename,
6305 sizeof(pf->dsf_filename));
6306 } else
6307 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
6308 "err=%d\n", error);
6309 } else if (fnamep == NULL) {
6310 /*
6311 * fnamep is NULL for ReadDS, WriteDS,
6312 * SetattrDS and GetattrDS. For these cases,
6313 * do not use a partial stripe set as a
6314 * mirror.
6315 */
6316 for (m = l / k * k; m < l; m++) {
6317 if (dvplocked)
6318 NFSVOPUNLOCK(tdvpp[m]);
6319 tdvpp[m] = NULL;
6320 }
6321 l = l / k * k;
6322 }
6323 }
6324 }
6325 if (fnamep != NULL) {
6326 /*
6327 * If fnamep != NULL, a list of all DSs is wanted.
6328 * For this, cheat and return the total cound of DSs in
6329 * mirrorcnt.
6330 */
6331 gotmirror = gotone;
6332 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: fname=%s, gotmirror=%d\n",
6333 fnamep, gotmirror);
6334 }
6335 if (error == 0 && gotmirror == 0)
6336 error = ENOENT;
6337
6338 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
6339 error);
6340 if (error == 0) {
6341 if (mirrorcntp != NULL)
6342 *mirrorcntp = gotmirror;
6343 } else {
6344 if (dvppp != NULL) {
6345 if (l > 0 && dvplocked) {
6346 /*
6347 * If the error didn't occur on the first one
6348 * and dvppp != NULL, the one(s) prior to the
6349 * failure will have locked dvp's that need to
6350 * be unlocked.
6351 */
6352 tdvpp = *dvppp;
6353 for (i = 0; i < l; i++)
6354 NFSVOPUNLOCK(*tdvpp++);
6355 }
6356 free(*dvppp, M_TEMP);
6357 *dvppp = NULL;
6358 }
6359 if (fhpp != NULL) {
6360 free(*fhpp, M_TEMP);
6361 *fhpp = NULL;
6362 }
6363 if (devid != NULL) {
6364 free(*devid, M_TEMP);
6365 *devid = NULL;
6366 }
6367 /*
6368 * If it found the vnode to be copied from before a failure,
6369 * it needs to be vput()'d.
6370 */
6371 if (nvpp != NULL && *nvpp != NULL) {
6372 vput(*nvpp);
6373 *nvpp = NULL;
6374 }
6375 }
6376 return (error);
6377 }
6378
6379 /*
6380 * Set the extended attribute for the Change attribute.
6381 */
6382 static int
nfsrv_setextattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)6383 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
6384 {
6385 struct pnfsdsattr dsattr;
6386 int error;
6387
6388 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
6389 dsattr.dsa_filerev = nap->na_filerev;
6390 dsattr.dsa_size = nap->na_size;
6391 dsattr.dsa_atime = nap->na_atime;
6392 dsattr.dsa_mtime = nap->na_mtime;
6393 dsattr.dsa_bytes = nap->na_bytes;
6394 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
6395 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
6396 if (error != 0)
6397 printf("pNFS: setextattr=%d\n", error);
6398 return (error);
6399 }
6400
6401 /*
6402 * Do a read RPC on a DS data file, using this structure for the arguments,
6403 * so that this function can be executed by a separate kernel process.
6404 */
6405 struct nfsrvreaddsdorpc {
6406 int done;
6407 int inprog;
6408 struct task tsk;
6409 fhandle_t fh;
6410 off_t off;
6411 int len;
6412 struct nfsmount *nmp;
6413 struct ucred *cred;
6414 NFSPROC_T *p;
6415 struct mbuf *m;
6416 struct mbuf *mend;
6417 int err;
6418 };
6419
6420 static int
nfsrv_readdsdorpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp,struct mbuf ** mpp,struct mbuf ** mpendp)6421 nfsrv_readdsdorpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
6422 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
6423 {
6424 uint32_t *tl;
6425 struct nfsrv_descript *nd;
6426 nfsv4stateid_t st;
6427 struct mbuf *m, *m2;
6428 int error = 0, retlen, tlen, trimlen;
6429
6430 NFSD_DEBUG(4, "in nfsrv_readdsdorpc\n");
6431 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6432 *mpp = NULL;
6433 /*
6434 * Use a stateid where other is an alternating 01010 pattern and
6435 * seqid is 0xffffffff. This value is not defined as special by
6436 * the RFC and is used by the FreeBSD NFS server to indicate an
6437 * MDS->DS proxy operation.
6438 */
6439 st.other[0] = 0x55555555;
6440 st.other[1] = 0x55555555;
6441 st.other[2] = 0x55555555;
6442 st.seqid = 0xffffffff;
6443 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
6444 NULL, NULL, 0, 0, cred);
6445 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
6446 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6447 txdr_hyper(off, tl);
6448 *(tl + 2) = txdr_unsigned(len);
6449 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6450 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6451 if (error != 0) {
6452 free(nd, M_TEMP);
6453 return (error);
6454 }
6455 if (nd->nd_repstat == 0) {
6456 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6457 NFSM_STRSIZ(retlen, len);
6458 if (retlen > 0) {
6459 /* Trim off the pre-data XDR from the mbuf chain. */
6460 m = nd->nd_mrep;
6461 while (m != NULL && m != nd->nd_md) {
6462 if (m->m_next == nd->nd_md) {
6463 m->m_next = NULL;
6464 m_freem(nd->nd_mrep);
6465 nd->nd_mrep = m = nd->nd_md;
6466 } else
6467 m = m->m_next;
6468 }
6469 if (m == NULL) {
6470 printf("nfsrv_readdsdorpc: busted mbuf list\n");
6471 error = ENOENT;
6472 goto nfsmout;
6473 }
6474
6475 /*
6476 * Now, adjust first mbuf so that any XDR before the
6477 * read data is skipped over.
6478 */
6479 trimlen = nd->nd_dpos - mtod(m, char *);
6480 if (trimlen > 0) {
6481 m->m_len -= trimlen;
6482 NFSM_DATAP(m, trimlen);
6483 }
6484
6485 /*
6486 * Truncate the mbuf chain at retlen bytes of data,
6487 * plus XDR padding that brings the length up to a
6488 * multiple of 4.
6489 */
6490 tlen = NFSM_RNDUP(retlen);
6491 do {
6492 if (m->m_len >= tlen) {
6493 m->m_len = tlen;
6494 tlen = 0;
6495 m2 = m->m_next;
6496 m->m_next = NULL;
6497 m_freem(m2);
6498 break;
6499 }
6500 tlen -= m->m_len;
6501 m = m->m_next;
6502 } while (m != NULL);
6503 if (tlen > 0) {
6504 printf("nfsrv_readdsdorpc: busted mbuf list\n");
6505 error = ENOENT;
6506 goto nfsmout;
6507 }
6508 *mpp = nd->nd_mrep;
6509 *mpendp = m;
6510 nd->nd_mrep = NULL;
6511 }
6512 } else
6513 error = nd->nd_repstat;
6514 nfsmout:
6515 /* If nd->nd_mrep is already NULL, this is a no-op. */
6516 m_freem(nd->nd_mrep);
6517 free(nd, M_TEMP);
6518 NFSD_DEBUG(4, "nfsrv_readdsdorpc error=%d\n", error);
6519 return (error);
6520 }
6521
6522 /*
6523 * Start up the thread that will execute nfsrv_readdsdorpc().
6524 */
6525 static void
start_readdsdorpc(void * arg,int pending)6526 start_readdsdorpc(void *arg, int pending)
6527 {
6528 struct nfsrvreaddsdorpc *drpc;
6529
6530 drpc = (struct nfsrvreaddsdorpc *)arg;
6531 drpc->err = nfsrv_readdsdorpc(&drpc->fh, drpc->off, drpc->len,
6532 drpc->cred, drpc->p, drpc->nmp, &drpc->m, &drpc->mend);
6533 drpc->done = 1;
6534 NFSD_DEBUG(4, "start_readdsdorpc: err=%d\n", drpc->err);
6535 }
6536
6537 static int
nfsrv_readdsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct nfsmount ** nmp,int mirrorcnt,int stripecnt,uint64_t stripesiz,struct mbuf ** mpp,struct mbuf ** mendp,int * failposp)6538 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
6539 NFSPROC_T *p, struct nfsmount **nmp, int mirrorcnt, int stripecnt,
6540 uint64_t stripesiz, struct mbuf **mpp, struct mbuf **mendp, int *failposp)
6541 {
6542 struct nfsrvreaddsdorpc *drpc, *tdrpc;
6543 struct mbuf *m, *m2, *mend;
6544 fhandle_t *tfhp;
6545 struct nfsmount **tnmp;
6546 uint64_t scnt;
6547 int error, i, j, k, l, n, ret, timo;
6548
6549 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
6550 drpc = NULL;
6551 m2 = *mpp = NULL;
6552 *failposp = -1;
6553 if (stripecnt > 1) {
6554 /* Set j to the upper bound of the # of stripes to read. */
6555 j = (len + len - 1) / stripesiz + 1;
6556 tdrpc = drpc = malloc(sizeof(*drpc) * j, M_TEMP, M_WAITOK);
6557 }
6558
6559 /* For each stripe except last one, read the stripe. */
6560 for (j = 0; ; j++, tdrpc++) {
6561 if (stripecnt > 1) {
6562 k = (off / stripesiz) % (uint64_t)stripecnt;
6563 scnt = stripesiz - (off % stripesiz);
6564 l = ((uint64_t)len < scnt) ? len : (int)scnt;
6565 } else {
6566 k = 0;
6567 l = len;
6568 }
6569 if (j == 0)
6570 n = k; /* Save first stripe# for later. */
6571 tfhp = fhp + k;
6572 tnmp = nmp + k;
6573 NFSD_DEBUG(4, "nfsrv_readdsrpc: mcopy k=%d l=%d\n", k, l);
6574
6575 /* Break out of the loop for the last stripe. */
6576 if (l == len)
6577 break;
6578
6579 /*
6580 * Do the read RPC for every DS, using a separate kernel
6581 * process for every DS, except the last one.
6582 */
6583 error = 0;
6584 tdrpc->done = 0;
6585 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp));
6586 tdrpc->off = off;
6587 tdrpc->len = l;
6588 tdrpc->nmp = *tnmp;
6589 tdrpc->cred = cred;
6590 tdrpc->p = p;
6591 tdrpc->inprog = 0;
6592 tdrpc->err = 0;
6593 ret = EIO;
6594 if (nfs_pnfsiothreads != 0) {
6595 ret = nfs_pnfsio(start_readdsdorpc, tdrpc);
6596 NFSD_DEBUG(4, "nfsrv_readdsrpc: "
6597 "nfs_pnfsio=%d\n", ret);
6598 }
6599 if (ret != 0) {
6600 ret = nfsrv_readdsdorpc(tfhp, off, l, cred, p,
6601 *tnmp, &tdrpc->m, &tdrpc->mend);
6602 if (nfsds_failerr(ret) && *failposp == -1)
6603 *failposp = k;
6604 else if (error == 0 && ret != 0)
6605 tdrpc->err = ret;
6606 tdrpc->inprog = 0;
6607 tdrpc->done = 1;
6608 }
6609 off += l;
6610 len -= l;
6611 }
6612 ret = nfsrv_readdsdorpc(tfhp, off, l, cred, p, *tnmp, &m, &mend);
6613 if (nfsds_failerr(ret) && *failposp == -1)
6614 *failposp = k;
6615 if (error == 0 && ret != 0)
6616 error = ret;
6617 NFSD_DEBUG(4, "nfsrv_readdsrpc: aft stripes=%d\n", error);
6618 tdrpc = drpc;
6619 timo = hz / 50; /* Wait for 20msec. */
6620 if (timo < 1)
6621 timo = 1;
6622 k = n; /* Keep track of stripe#. */
6623 for (i = 0; i < j - 1; i++, tdrpc++) {
6624 /* Wait for RPCs on separate threads to complete. */
6625 while (tdrpc->inprog != 0 && tdrpc->done == 0)
6626 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
6627 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
6628 *failposp = k;
6629 else if (error == 0 && tdrpc->err != 0)
6630 error = tdrpc->err;
6631 else {
6632 if (m2 != NULL)
6633 m2->m_next = tdrpc->m;
6634 else
6635 *mpp = tdrpc->m;
6636 m2 = tdrpc->mend;
6637 }
6638 k = (k + 1) % stripecnt;
6639 }
6640 if (m2 != NULL)
6641 m2->m_next = m;
6642 else
6643 *mpp = m;
6644 *mendp = mend;
6645
6646 free(drpc, M_TEMP);
6647 return (error);
6648 }
6649
6650 /*
6651 * Do a write RPC on a DS data file, using this structure for the arguments,
6652 * so that this function can be executed by a separate kernel process.
6653 */
6654 struct nfsrvwritedsdorpc {
6655 int done;
6656 int inprog;
6657 struct task tsk;
6658 fhandle_t fh;
6659 off_t off;
6660 int len;
6661 struct nfsmount *nmp;
6662 struct ucred *cred;
6663 NFSPROC_T *p;
6664 struct mbuf *m;
6665 int err;
6666 };
6667
6668 static int
nfsrv_writedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,int len,struct nfsvattr * nap,struct mbuf * m,struct ucred * cred,NFSPROC_T * p)6669 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
6670 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
6671 {
6672 uint32_t *tl;
6673 struct nfsrv_descript *nd;
6674 nfsattrbit_t attrbits;
6675 nfsv4stateid_t st;
6676 int commit, error, retlen;
6677
6678 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6679 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
6680 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
6681
6682 /*
6683 * Use a stateid where other is an alternating 01010 pattern and
6684 * seqid is 0xffffffff. This value is not defined as special by
6685 * the RFC and is used by the FreeBSD NFS server to indicate an
6686 * MDS->DS proxy operation.
6687 */
6688 st.other[0] = 0x55555555;
6689 st.other[1] = 0x55555555;
6690 st.other[2] = 0x55555555;
6691 st.seqid = 0xffffffff;
6692 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
6693 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6694 txdr_hyper(off, tl);
6695 tl += 2;
6696 /*
6697 * Do all writes FileSync, since the server doesn't hold onto dirty
6698 * buffers. Since clients should be accessing the DS servers directly
6699 * using the pNFS layouts, this just needs to work correctly as a
6700 * fallback.
6701 */
6702 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
6703 *tl = txdr_unsigned(len);
6704 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
6705
6706 /* Put data in mbuf chain. */
6707 nd->nd_mb->m_next = m;
6708
6709 /* Set nd_mb and nd_bpos to end of data. */
6710 while (m->m_next != NULL)
6711 m = m->m_next;
6712 nd->nd_mb = m;
6713 nfsm_set(nd, m->m_len);
6714 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
6715
6716 /* Do a Getattr for the attributes that change upon writing. */
6717 NFSZERO_ATTRBIT(&attrbits);
6718 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
6719 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
6720 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
6721 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
6722 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
6723 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6724 *tl = txdr_unsigned(NFSV4OP_GETATTR);
6725 (void) nfsrv_putattrbit(nd, &attrbits);
6726 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
6727 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6728 if (error != 0) {
6729 free(nd, M_TEMP);
6730 return (error);
6731 }
6732 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
6733 /* Get rid of weak cache consistency data for now. */
6734 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
6735 (ND_NFSV4 | ND_V4WCCATTR)) {
6736 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
6737 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
6738 NULL, NULL);
6739 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
6740 if (error != 0)
6741 goto nfsmout;
6742 /*
6743 * Get rid of Op# and status for next op.
6744 */
6745 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6746 if (*++tl != 0)
6747 nd->nd_flag |= ND_NOMOREDATA;
6748 }
6749 if (nd->nd_repstat == 0) {
6750 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6751 retlen = fxdr_unsigned(int, *tl++);
6752 commit = fxdr_unsigned(int, *tl);
6753 if (commit != NFSWRITE_FILESYNC)
6754 error = NFSERR_IO;
6755 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
6756 retlen, commit, error);
6757 } else
6758 error = nd->nd_repstat;
6759 /* We have no use for the Write Verifier since we use FileSync. */
6760
6761 /*
6762 * Get the Change, Size, Access Time and Modify Time attributes and set
6763 * on the Metadata file, so its attributes will be what the file's
6764 * would be if it had been written.
6765 */
6766 if (error == 0) {
6767 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6768 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
6769 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
6770 NULL, NULL);
6771 }
6772 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
6773 nfsmout:
6774 m_freem(nd->nd_mrep);
6775 free(nd, M_TEMP);
6776 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
6777 return (error);
6778 }
6779
6780 /*
6781 * Start up the thread that will execute nfsrv_writedsdorpc().
6782 */
6783 static void
start_writedsdorpc(void * arg,int pending)6784 start_writedsdorpc(void *arg, int pending)
6785 {
6786 struct nfsrvwritedsdorpc *drpc;
6787
6788 drpc = (struct nfsrvwritedsdorpc *)arg;
6789 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
6790 drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
6791 drpc->done = 1;
6792 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
6793 }
6794
6795 static int
nfsrv_writedsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmp,int mirrorcnt,int stripecnt,uint64_t stripesiz,struct mbuf ** mpp,char * cp,int * failposp)6796 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
6797 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmp, int mirrorcnt,
6798 int stripecnt, uint64_t stripesiz, struct mbuf **mpp, char *cp,
6799 int *failposp)
6800 {
6801 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */
6802 struct nfsvattr na;
6803 struct mbuf *m;
6804 fhandle_t *tfhp;
6805 struct nfsmount **tnmp;
6806 uint64_t scnt;
6807 int error, i, j, k, l, n, o, offs, ret, timo;
6808
6809 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
6810 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
6811 drpc = NULL;
6812 *failposp = -1;
6813 if (mirrorcnt > 1 || stripecnt > 1) {
6814 /* Set j to the upper bound of the # of DSs to read. */
6815 if (stripecnt > 1)
6816 j = (len + len - 1) / stripesiz + 1;
6817 else
6818 j = 1;
6819 j *= mirrorcnt;
6820 tdrpc = drpc = malloc(sizeof(*drpc) * j, M_TEMP, M_WAITOK);
6821 }
6822
6823 /* Calculate offset in mbuf chain that data starts. */
6824 offs = cp - mtod(*mpp, char *);
6825 /* For each stripe, write to all the mirrors. */
6826 for (j = 0; ; j++) {
6827 if (stripecnt > 1) {
6828 k = (off / stripesiz) % (uint64_t)stripecnt;
6829 scnt = stripesiz - (off % stripesiz);
6830 l = ((uint64_t)len < scnt) ? len : (int)scnt;
6831 } else {
6832 k = 0;
6833 l = len;
6834 }
6835 if (j == 0)
6836 o = k; /* Save first stripe# for later. */
6837 tfhp = fhp + k;
6838 tnmp = nmp + k;
6839 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d k=%d "
6840 "l=%d\n", offs, k, l);
6841
6842 /*
6843 * Do the write RPC for every DS, using a separate kernel
6844 * process for every DS, except the last one.
6845 */
6846 error = 0;
6847 n = mirrorcnt;
6848 if (l == len)
6849 n--;
6850 for (i = 0; i < n; i++, tdrpc++) {
6851 tdrpc->done = 0;
6852 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp));
6853 tdrpc->off = off;
6854 tdrpc->len = l;
6855 tdrpc->nmp = *tnmp;
6856 tdrpc->cred = cred;
6857 tdrpc->p = p;
6858 tdrpc->inprog = 0;
6859 tdrpc->err = 0;
6860 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(l),
6861 M_WAITOK);
6862 ret = EIO;
6863 if (nfs_pnfsiothreads != 0) {
6864 ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
6865 NFSD_DEBUG(4, "nfsrv_writedsrpc: "
6866 "nfs_pnfsio=%d\n", ret);
6867 }
6868 if (ret != 0) {
6869 ret = nfsrv_writedsdorpc(*tnmp, tfhp, off, l,
6870 NULL, tdrpc->m, cred, p);
6871 if (nfsds_failerr(ret) && *failposp == -1)
6872 *failposp = k;
6873 else if (error == 0 && ret != 0)
6874 tdrpc->err = ret;
6875 tdrpc->inprog = 0;
6876 tdrpc->done = 1;
6877 }
6878 tnmp += stripecnt;
6879 tfhp += stripecnt;
6880 }
6881 if (l == len)
6882 break;
6883 offs += l;
6884 off += l;
6885 len -= l;
6886 }
6887 m = m_copym(*mpp, offs, NFSM_RNDUP(l), M_WAITOK);
6888 ret = nfsrv_writedsdorpc(*tnmp, tfhp, off, l, &na, m, cred, p);
6889 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
6890 *failposp = k;
6891 else if (error == 0 && ret != 0)
6892 error = ret;
6893 if (error == 0)
6894 error = nfsrv_setextattr(vp, &na, p);
6895 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
6896 tdrpc = drpc;
6897 timo = hz / 50; /* Wait for 20msec. */
6898 if (timo < 1)
6899 timo = 1;
6900 k = o;
6901 for (i = 0; i < j * mirrorcnt - 1; i++, tdrpc++) {
6902 /* Wait for RPCs on separate threads to complete. */
6903 while (tdrpc->inprog != 0 && tdrpc->done == 0)
6904 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
6905 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
6906 *failposp = k;
6907 else if (error == 0 && tdrpc->err != 0)
6908 error = tdrpc->err;
6909 k = (k + 1) % stripecnt;
6910 }
6911 free(drpc, M_TEMP);
6912 return (error);
6913 }
6914
6915 #ifdef notnow
6916 /*
6917 * Do a allocate RPC on a DS data file, using this structure for the arguments,
6918 * so that this function can be executed by a separate kernel process.
6919 */
6920 struct nfsrvallocatedsdorpc {
6921 int done;
6922 int inprog;
6923 struct task tsk;
6924 fhandle_t fh;
6925 off_t off;
6926 off_t len;
6927 struct nfsmount *nmp;
6928 struct ucred *cred;
6929 NFSPROC_T *p;
6930 int err;
6931 };
6932
6933 static int
nfsrv_allocatedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,off_t len,struct nfsvattr * nap,struct ucred * cred,NFSPROC_T * p)6934 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
6935 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
6936 {
6937 uint32_t *tl;
6938 struct nfsrv_descript *nd;
6939 nfsattrbit_t attrbits;
6940 nfsv4stateid_t st;
6941 int error;
6942
6943 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6944 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp,
6945 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
6946
6947 /*
6948 * Use a stateid where other is an alternating 01010 pattern and
6949 * seqid is 0xffffffff. This value is not defined as special by
6950 * the RFC and is used by the FreeBSD NFS server to indicate an
6951 * MDS->DS proxy operation.
6952 */
6953 st.other[0] = 0x55555555;
6954 st.other[1] = 0x55555555;
6955 st.other[2] = 0x55555555;
6956 st.seqid = 0xffffffff;
6957 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
6958 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
6959 txdr_hyper(off, tl); tl += 2;
6960 txdr_hyper(len, tl); tl += 2;
6961 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len);
6962
6963 *tl = txdr_unsigned(NFSV4OP_GETATTR);
6964 NFSGETATTR_ATTRBIT(&attrbits);
6965 nfsrv_putattrbit(nd, &attrbits);
6966 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
6967 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6968 if (error != 0) {
6969 free(nd, M_TEMP);
6970 return (error);
6971 }
6972 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n",
6973 nd->nd_repstat);
6974 if (nd->nd_repstat == 0) {
6975 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6976 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
6977 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
6978 NULL, NULL);
6979 } else
6980 error = nd->nd_repstat;
6981 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
6982 nfsmout:
6983 m_freem(nd->nd_mrep);
6984 free(nd, M_TEMP);
6985 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error);
6986 return (error);
6987 }
6988
6989 /*
6990 * Start up the thread that will execute nfsrv_allocatedsdorpc().
6991 */
6992 static void
start_allocatedsdorpc(void * arg,int pending)6993 start_allocatedsdorpc(void *arg, int pending)
6994 {
6995 struct nfsrvallocatedsdorpc *drpc;
6996
6997 drpc = (struct nfsrvallocatedsdorpc *)arg;
6998 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
6999 drpc->len, NULL, drpc->cred, drpc->p);
7000 drpc->done = 1;
7001 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err);
7002 }
7003
7004 static int
nfsrv_allocatedsrpc(fhandle_t * fhp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,int * failposp)7005 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
7006 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
7007 int *failposp)
7008 {
7009 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL;
7010 struct nfsvattr na;
7011 int error, i, ret, timo;
7012
7013 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n");
7014 drpc = NULL;
7015 if (mirrorcnt > 1)
7016 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
7017 M_WAITOK);
7018
7019 /*
7020 * Do the allocate RPC for every DS, using a separate kernel process
7021 * for every DS except the last one.
7022 */
7023 error = 0;
7024 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7025 tdrpc->done = 0;
7026 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
7027 tdrpc->off = off;
7028 tdrpc->len = len;
7029 tdrpc->nmp = *nmpp;
7030 tdrpc->cred = cred;
7031 tdrpc->p = p;
7032 tdrpc->inprog = 0;
7033 tdrpc->err = 0;
7034 ret = EIO;
7035 if (nfs_pnfsiothreads != 0) {
7036 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc);
7037 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n",
7038 ret);
7039 }
7040 if (ret != 0) {
7041 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL,
7042 cred, p);
7043 if (nfsds_failerr(ret) && *failposp == -1)
7044 *failposp = i;
7045 else if (error == 0 && ret != 0)
7046 error = ret;
7047 }
7048 nmpp++;
7049 fhp++;
7050 }
7051 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
7052 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
7053 *failposp = mirrorcnt - 1;
7054 else if (error == 0 && ret != 0)
7055 error = ret;
7056 if (error == 0)
7057 error = nfsrv_setextattr(vp, &na, p);
7058 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error);
7059 tdrpc = drpc;
7060 timo = hz / 50; /* Wait for 20msec. */
7061 if (timo < 1)
7062 timo = 1;
7063 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7064 /* Wait for RPCs on separate threads to complete. */
7065 while (tdrpc->inprog != 0 && tdrpc->done == 0)
7066 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
7067 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
7068 *failposp = i;
7069 else if (error == 0 && tdrpc->err != 0)
7070 error = tdrpc->err;
7071 }
7072 free(drpc, M_TEMP);
7073 return (error);
7074 }
7075
7076 /*
7077 * Do a deallocate RPC on a DS data file, using this structure for the
7078 * arguments, so that this function can be executed by a separate kernel
7079 * process.
7080 */
7081 struct nfsrvdeallocatedsdorpc {
7082 int done;
7083 int inprog;
7084 struct task tsk;
7085 fhandle_t fh;
7086 off_t off;
7087 off_t len;
7088 struct nfsmount *nmp;
7089 struct ucred *cred;
7090 NFSPROC_T *p;
7091 int err;
7092 };
7093
7094 static int
nfsrv_deallocatedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,off_t len,struct nfsvattr * nap,struct ucred * cred,NFSPROC_T * p)7095 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
7096 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
7097 {
7098 uint32_t *tl;
7099 struct nfsrv_descript *nd;
7100 nfsattrbit_t attrbits;
7101 nfsv4stateid_t st;
7102 int error;
7103
7104 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
7105 nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
7106 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
7107
7108 /*
7109 * Use a stateid where other is an alternating 01010 pattern and
7110 * seqid is 0xffffffff. This value is not defined as special by
7111 * the RFC and is used by the FreeBSD NFS server to indicate an
7112 * MDS->DS proxy operation.
7113 */
7114 st.other[0] = 0x55555555;
7115 st.other[1] = 0x55555555;
7116 st.other[2] = 0x55555555;
7117 st.seqid = 0xffffffff;
7118 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
7119 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7120 txdr_hyper(off, tl); tl += 2;
7121 txdr_hyper(len, tl); tl += 2;
7122 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
7123
7124 /* Do a Getattr for the attributes that change upon writing. */
7125 NFSZERO_ATTRBIT(&attrbits);
7126 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
7127 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7128 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
7129 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7130 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
7131 *tl = txdr_unsigned(NFSV4OP_GETATTR);
7132 nfsrv_putattrbit(nd, &attrbits);
7133 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
7134 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7135 if (error != 0) {
7136 free(nd, M_TEMP);
7137 return (error);
7138 }
7139 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n",
7140 nd->nd_repstat);
7141 /* Get rid of weak cache consistency data for now. */
7142 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
7143 (ND_NFSV4 | ND_V4WCCATTR)) {
7144 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
7145 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
7146 NULL, NULL);
7147 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error);
7148 if (error != 0)
7149 goto nfsmout;
7150 /*
7151 * Get rid of Op# and status for next op.
7152 */
7153 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7154 if (*++tl != 0)
7155 nd->nd_flag |= ND_NOMOREDATA;
7156 }
7157 if (nd->nd_repstat == 0) {
7158 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7159 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
7160 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
7161 NULL, NULL);
7162 } else
7163 error = nd->nd_repstat;
7164 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
7165 nfsmout:
7166 m_freem(nd->nd_mrep);
7167 free(nd, M_TEMP);
7168 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
7169 return (error);
7170 }
7171
7172 /*
7173 * Start up the thread that will execute nfsrv_deallocatedsdorpc().
7174 */
7175 static void
start_deallocatedsdorpc(void * arg,int pending)7176 start_deallocatedsdorpc(void *arg, int pending)
7177 {
7178 struct nfsrvdeallocatedsdorpc *drpc;
7179
7180 drpc = (struct nfsrvdeallocatedsdorpc *)arg;
7181 drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
7182 drpc->len, NULL, drpc->cred, drpc->p);
7183 drpc->done = 1;
7184 NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
7185 }
7186
7187 static int
nfsrv_deallocatedsrpc(fhandle_t * fhp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,int * failposp)7188 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
7189 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
7190 int *failposp)
7191 {
7192 struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
7193 struct nfsvattr na;
7194 int error, i, ret, timo;
7195
7196 NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
7197 drpc = NULL;
7198 if (mirrorcnt > 1)
7199 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
7200 M_WAITOK);
7201
7202 /*
7203 * Do the deallocate RPC for every DS, using a separate kernel process
7204 * for every DS except the last one.
7205 */
7206 error = 0;
7207 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7208 tdrpc->done = 0;
7209 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
7210 tdrpc->off = off;
7211 tdrpc->len = len;
7212 tdrpc->nmp = *nmpp;
7213 tdrpc->cred = cred;
7214 tdrpc->p = p;
7215 tdrpc->inprog = 0;
7216 tdrpc->err = 0;
7217 ret = EIO;
7218 if (nfs_pnfsiothreads != 0) {
7219 ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
7220 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
7221 ret);
7222 }
7223 if (ret != 0) {
7224 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
7225 NULL, cred, p);
7226 if (nfsds_failerr(ret) && *failposp == -1)
7227 *failposp = i;
7228 else if (error == 0 && ret != 0)
7229 error = ret;
7230 }
7231 nmpp++;
7232 fhp++;
7233 }
7234 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
7235 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
7236 *failposp = mirrorcnt - 1;
7237 else if (error == 0 && ret != 0)
7238 error = ret;
7239 if (error == 0)
7240 error = nfsrv_setextattr(vp, &na, p);
7241 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
7242 tdrpc = drpc;
7243 timo = hz / 50; /* Wait for 20msec. */
7244 if (timo < 1)
7245 timo = 1;
7246 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7247 /* Wait for RPCs on separate threads to complete. */
7248 while (tdrpc->inprog != 0 && tdrpc->done == 0)
7249 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
7250 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
7251 *failposp = i;
7252 else if (error == 0 && tdrpc->err != 0)
7253 error = tdrpc->err;
7254 }
7255 free(drpc, M_TEMP);
7256 return (error);
7257 }
7258 #endif
7259
7260 static int
nfsrv_setattrdsdorpc(fhandle_t * fhp,struct vnode * vp,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp,struct nfsvattr * nap,struct nfsvattr * dsnap)7261 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred,
7262 NFSPROC_T *p, struct nfsmount *nmp, struct nfsvattr *nap,
7263 struct nfsvattr *dsnap)
7264 {
7265 uint32_t *tl;
7266 struct nfsrv_descript *nd;
7267 nfsv4stateid_t st;
7268 nfsattrbit_t attrbits;
7269 int error;
7270
7271 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
7272 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
7273 /*
7274 * Use a stateid where other is an alternating 01010 pattern and
7275 * seqid is 0xffffffff. This value is not defined as special by
7276 * the RFC and is used by the FreeBSD NFS server to indicate an
7277 * MDS->DS proxy operation.
7278 */
7279 st.other[0] = 0x55555555;
7280 st.other[1] = 0x55555555;
7281 st.other[2] = 0x55555555;
7282 st.seqid = 0xffffffff;
7283 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (uint8_t *)fhp, sizeof(*fhp),
7284 NULL, NULL, 0, 0, cred);
7285 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
7286 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
7287
7288 /* Do a Getattr for the attributes that change due to writing. */
7289 NFSZERO_ATTRBIT(&attrbits);
7290 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
7291 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7292 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
7293 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7294 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
7295 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7296 *tl = txdr_unsigned(NFSV4OP_GETATTR);
7297 (void) nfsrv_putattrbit(nd, &attrbits);
7298 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
7299 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7300 if (error != 0) {
7301 free(nd, M_TEMP);
7302 return (error);
7303 }
7304 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
7305 nd->nd_repstat);
7306 /* Get rid of weak cache consistency data for now. */
7307 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
7308 (ND_NFSV4 | ND_V4WCCATTR)) {
7309 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
7310 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
7311 NULL, NULL, NULL);
7312 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
7313 if (error != 0)
7314 goto nfsmout;
7315 /*
7316 * Get rid of Op# and status for next op.
7317 */
7318 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7319 if (*++tl != 0)
7320 nd->nd_flag |= ND_NOMOREDATA;
7321 }
7322 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7323 if (error != 0)
7324 goto nfsmout;
7325 if (nd->nd_repstat != 0)
7326 error = nd->nd_repstat;
7327 /*
7328 * Get the Change, Size, Access Time and Modify Time attributes and set
7329 * on the Metadata file, so its attributes will be what the file's
7330 * would be if it had been written.
7331 */
7332 if (error == 0) {
7333 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7334 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
7335 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
7336 NULL, NULL, NULL);
7337 }
7338 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
7339 nfsmout:
7340 m_freem(nd->nd_mrep);
7341 free(nd, M_TEMP);
7342 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
7343 return (error);
7344 }
7345
7346 struct nfsrvsetattrdsdorpc {
7347 int done;
7348 int inprog;
7349 struct task tsk;
7350 fhandle_t fh;
7351 struct nfsmount *nmp;
7352 struct vnode *vp;
7353 struct ucred *cred;
7354 NFSPROC_T *p;
7355 struct nfsvattr na;
7356 struct nfsvattr dsna;
7357 int err;
7358 };
7359
7360 /*
7361 * Start up the thread that will execute nfsrv_setattrdsdorpc().
7362 */
7363 static void
start_setattrdsdorpc(void * arg,int pending)7364 start_setattrdsdorpc(void *arg, int pending)
7365 {
7366 struct nfsrvsetattrdsdorpc *drpc;
7367
7368 drpc = (struct nfsrvsetattrdsdorpc *)arg;
7369 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->vp, drpc->cred,
7370 drpc->p, drpc->nmp, &drpc->na, &drpc->dsna);
7371 drpc->done = 1;
7372 }
7373
7374 static int
nfsrv_setattrdsrpc(fhandle_t * fhp,struct vnode * vp,struct ucred * cred,NFSPROC_T * p,struct nfsmount ** nmp,int mirrorcnt,int stripecnt,struct nfsvattr * nap,int * failposp)7375 nfsrv_setattrdsrpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred,
7376 NFSPROC_T *p, struct nfsmount **nmp, int mirrorcnt, int stripecnt,
7377 struct nfsvattr *nap, int *failposp)
7378 {
7379 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */
7380 fhandle_t *tfhp;
7381 struct nfsmount **tnmp;
7382 struct nfsvattr na;
7383 int error, i, j, ret, timo;
7384
7385 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
7386 drpc = NULL;
7387 *failposp = -1;
7388 error = 0;
7389 if (mirrorcnt > 1 || stripecnt > 1)
7390 tdrpc = drpc = malloc(sizeof(*drpc) * stripecnt * mirrorcnt,
7391 M_TEMP, M_WAITOK);
7392
7393 /* For each stripe, write to all the mirrors. */
7394 tfhp = fhp;
7395 tnmp = nmp;
7396 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++, tfhp++,
7397 tnmp++) {
7398 j = i / stripecnt;
7399 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: stripe=%d mirror=%d\n",
7400 i, j);
7401 tdrpc->done = 0;
7402 NFSBCOPY(nap, &tdrpc->na, sizeof(*nap));
7403 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp));
7404 tdrpc->vp = vp;
7405 tdrpc->nmp = *tnmp;
7406 tdrpc->cred = cred;
7407 tdrpc->p = p;
7408 tdrpc->inprog = 0;
7409 tdrpc->err = 0;
7410 ret = EIO;
7411 if (nfs_pnfsiothreads != 0) {
7412 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
7413 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: "
7414 "nfs_pnfsio=%d\n", ret);
7415 }
7416 if (ret != 0) {
7417 ret = nfsrv_setattrdsdorpc(tfhp, vp, cred, p,
7418 *tnmp, &tdrpc->na, &tdrpc->dsna);
7419 if (nfsds_failerr(ret) && *failposp == -1)
7420 *failposp = i;
7421 else if (error == 0 && ret != 0)
7422 tdrpc->err = ret;
7423 tdrpc->inprog = 0;
7424 tdrpc->done = 1;
7425 }
7426 }
7427 ret = nfsrv_setattrdsdorpc(tfhp, vp, cred, p, *tnmp, nap, &na);
7428 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
7429 *failposp = i;
7430 else if (error == 0 && ret != 0)
7431 error = ret;
7432 tdrpc = drpc;
7433 timo = hz / 50; /* Wait for 20msec. */
7434 if (timo < 1)
7435 timo = 1;
7436 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++) {
7437 /* Wait for RPCs on separate threads to complete. */
7438 while (tdrpc->inprog != 0 && tdrpc->done == 0)
7439 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
7440 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
7441 *failposp = i;
7442 else if (error == 0 && tdrpc->err != 0)
7443 error = tdrpc->err;
7444 }
7445
7446 /* Find the reply attribute with the largest size and set that one. */
7447 if (error == 0 && (mirrorcnt > 1 || stripecnt > 1)) {
7448 tdrpc = drpc;
7449 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++) {
7450 if (tdrpc->dsna.na_size > na.na_size)
7451 NFSBCOPY(&tdrpc->dsna, &na, sizeof(*nap));
7452 }
7453 }
7454 if (error == 0)
7455 error = nfsrv_setextattr(vp, &na, p);
7456 free(drpc, M_TEMP);
7457 return (error);
7458 }
7459
7460 #ifdef notnow
7461 /*
7462 * Do a Setattr of an NFSv4 ACL on the DS file.
7463 */
7464 static int
nfsrv_setacldsdorpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct acl * aclp)7465 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
7466 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
7467 {
7468 struct nfsrv_descript *nd;
7469 nfsv4stateid_t st;
7470 nfsattrbit_t attrbits;
7471 int error;
7472
7473 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
7474 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
7475 /*
7476 * Use a stateid where other is an alternating 01010 pattern and
7477 * seqid is 0xffffffff. This value is not defined as special by
7478 * the RFC and is used by the FreeBSD NFS server to indicate an
7479 * MDS->DS proxy operation.
7480 */
7481 st.other[0] = 0x55555555;
7482 st.other[1] = 0x55555555;
7483 st.other[2] = 0x55555555;
7484 st.seqid = 0xffffffff;
7485 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
7486 NULL, NULL, 0, 0, cred);
7487 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
7488 NFSZERO_ATTRBIT(&attrbits);
7489 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
7490 /*
7491 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
7492 * so passing in the metadata "vp" will be ok, since it is of
7493 * the same type (VREG).
7494 */
7495 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
7496 NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0, NULL, false);
7497 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
7498 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7499 if (error != 0) {
7500 free(nd, M_TEMP);
7501 return (error);
7502 }
7503 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
7504 nd->nd_repstat);
7505 error = nd->nd_repstat;
7506 m_freem(nd->nd_mrep);
7507 free(nd, M_TEMP);
7508 return (error);
7509 }
7510
7511 struct nfsrvsetacldsdorpc {
7512 int done;
7513 int inprog;
7514 struct task tsk;
7515 fhandle_t fh;
7516 struct nfsmount *nmp;
7517 struct vnode *vp;
7518 struct ucred *cred;
7519 NFSPROC_T *p;
7520 struct acl *aclp;
7521 int err;
7522 };
7523
7524 /*
7525 * Start up the thread that will execute nfsrv_setacldsdorpc().
7526 */
7527 static void
start_setacldsdorpc(void * arg,int pending)7528 start_setacldsdorpc(void *arg, int pending)
7529 {
7530 struct nfsrvsetacldsdorpc *drpc;
7531
7532 drpc = (struct nfsrvsetacldsdorpc *)arg;
7533 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
7534 drpc->vp, drpc->nmp, drpc->aclp);
7535 drpc->done = 1;
7536 }
7537
7538 static int
nfsrv_setacldsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct acl * aclp,int * failposp)7539 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
7540 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
7541 int *failposp)
7542 {
7543 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL;
7544 int error, i, ret, timo;
7545
7546 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
7547 drpc = NULL;
7548 if (mirrorcnt > 1)
7549 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
7550 M_WAITOK);
7551
7552 /*
7553 * Do the setattr RPC for every DS, using a separate kernel process
7554 * for every DS except the last one.
7555 */
7556 error = 0;
7557 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7558 tdrpc->done = 0;
7559 tdrpc->inprog = 0;
7560 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
7561 tdrpc->nmp = *nmpp;
7562 tdrpc->vp = vp;
7563 tdrpc->cred = cred;
7564 tdrpc->p = p;
7565 tdrpc->aclp = aclp;
7566 tdrpc->err = 0;
7567 ret = EIO;
7568 if (nfs_pnfsiothreads != 0) {
7569 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
7570 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
7571 ret);
7572 }
7573 if (ret != 0) {
7574 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
7575 aclp);
7576 if (nfsds_failerr(ret) && *failposp == -1)
7577 *failposp = i;
7578 else if (error == 0 && ret != 0)
7579 error = ret;
7580 }
7581 nmpp++;
7582 fhp++;
7583 }
7584 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
7585 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
7586 *failposp = mirrorcnt - 1;
7587 else if (error == 0 && ret != 0)
7588 error = ret;
7589 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
7590 tdrpc = drpc;
7591 timo = hz / 50; /* Wait for 20msec. */
7592 if (timo < 1)
7593 timo = 1;
7594 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
7595 /* Wait for RPCs on separate threads to complete. */
7596 while (tdrpc->inprog != 0 && tdrpc->done == 0)
7597 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
7598 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
7599 *failposp = i;
7600 else if (error == 0 && tdrpc->err != 0)
7601 error = tdrpc->err;
7602 }
7603 free(drpc, M_TEMP);
7604 return (error);
7605 }
7606 #endif
7607
7608 struct nfsrvgetattrdsdorpc {
7609 int done;
7610 int inprog;
7611 struct task tsk;
7612 fhandle_t fh;
7613 struct vnode *vp;
7614 struct nfsvattr na;
7615 struct nfsmount *nmp;
7616 struct ucred *cred;
7617 NFSPROC_T *p;
7618 int err;
7619 };
7620
7621 /*
7622 * Start up the thread that will execute nfsrv_getattrdsdorpc().
7623 */
7624 static void
start_getattrdsdorpc(void * arg,int pending)7625 start_getattrdsdorpc(void *arg, int pending)
7626 {
7627 struct nfsrvgetattrdsdorpc *drpc;
7628
7629 drpc = (struct nfsrvgetattrdsdorpc *)arg;
7630 drpc->err = nfsrv_getattrdsdorpc(&drpc->fh, drpc->vp, drpc->cred,
7631 drpc->p, drpc->nmp, &drpc->na);
7632 drpc->done = 1;
7633 NFSD_DEBUG(4, "start_getattrdsdorpc: err=%d\n", drpc->err);
7634 }
7635
7636 /*
7637 * For a striped configuration, a getattr RPC must be done on all stripes,
7638 * since there is no way of knowing which DS currently stores the last
7639 * bytes of the file.
7640 */
7641 static int
nfsrv_getattrdsrpc(fhandle_t * fhp,struct vnode * vp,struct ucred * cred,NFSPROC_T * p,struct nfsmount ** nmp,int stripecnt,struct nfsvattr * nap,int * failposp)7642 nfsrv_getattrdsrpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred,
7643 NFSPROC_T *p, struct nfsmount **nmp, int stripecnt, struct nfsvattr *nap,
7644 int *failposp)
7645 {
7646 struct nfsrvgetattrdsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */
7647 fhandle_t *tfhp;
7648 struct nfsmount **tnmp;
7649 int error, i, ret, timo;
7650
7651 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
7652 drpc = NULL;
7653 *failposp = -1;
7654 if (stripecnt > 1)
7655 tdrpc = drpc = malloc(sizeof(*drpc) * stripecnt, M_TEMP,
7656 M_WAITOK);
7657
7658 /* For each stripe except last one, do a Getattr.. */
7659 tfhp = fhp;
7660 tnmp = nmp;
7661 for (i = 0; i < stripecnt - 1; i++, tdrpc++, tfhp++, tnmp++) {
7662 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: stripe=%d\n", i);
7663 error = 0;
7664 tdrpc->done = 0;
7665 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp));
7666 tdrpc->nmp = *tnmp;
7667 tdrpc->vp = vp;
7668 tdrpc->cred = cred;
7669 tdrpc->p = p;
7670 tdrpc->inprog = 0;
7671 tdrpc->err = 0;
7672 ret = EIO;
7673 if (nfs_pnfsiothreads != 0) {
7674 ret = nfs_pnfsio(start_getattrdsdorpc, tdrpc);
7675 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: "
7676 "nfs_pnfsio=%d\n", ret);
7677 }
7678 if (ret != 0) {
7679 ret = nfsrv_getattrdsdorpc(tfhp, vp, cred, p, *tnmp,
7680 &tdrpc->na);
7681 if (nfsds_failerr(ret) && *failposp == -1)
7682 *failposp = i;
7683 else if (error == 0 && ret != 0)
7684 tdrpc->err = ret;
7685 tdrpc->inprog = 0;
7686 tdrpc->done = 1;
7687 }
7688 }
7689 ret = nfsrv_getattrdsdorpc(tfhp, vp, cred, p, *tnmp, nap);
7690 if (nfsds_failerr(ret) && *failposp == -1)
7691 *failposp = i;
7692 if (error == 0 && ret != 0)
7693 error = ret;
7694 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft stripes=%d\n", error);
7695 timo = hz / 50; /* Wait for 20msec. */
7696 if (timo < 1)
7697 timo = 1;
7698 tdrpc = drpc;
7699 for (i = 0; i < stripecnt - 1; i++, tdrpc++) {
7700 /* Wait for RPCs on separate threads to complete. */
7701 while (tdrpc->inprog != 0 && tdrpc->done == 0)
7702 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
7703 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
7704 *failposp = i;
7705 else if (error == 0 && tdrpc->err != 0)
7706 error = tdrpc->err;
7707 }
7708
7709 /* Find the attribute with the largest size and return that one. */
7710 if (stripecnt > 1) {
7711 tdrpc = drpc;
7712 for (i = 0; i < stripecnt - 1; i++, tdrpc++) {
7713 if (tdrpc->na.na_size > nap->na_size)
7714 NFSBCOPY(&tdrpc->na, nap, sizeof(*nap));
7715 }
7716 }
7717 /*
7718 * We can only save the updated values in the extended
7719 * attribute if the vp is exclusively locked.
7720 */
7721 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
7722 error = nfsrv_setextattr(vp, nap, p);
7723 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
7724 error);
7725 }
7726
7727 free(drpc, M_TEMP);
7728 return (error);
7729 }
7730
7731 /*
7732 * Getattr call to the DS for the attributes that change due to writing.
7733 */
7734 static int
nfsrv_getattrdsdorpc(fhandle_t * fhp,struct vnode * vp,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp,struct nfsvattr * nap)7735 nfsrv_getattrdsdorpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred,
7736 NFSPROC_T *p, struct nfsmount *nmp, struct nfsvattr *nap)
7737 {
7738 struct nfsrv_descript *nd;
7739 int error;
7740 nfsattrbit_t attrbits;
7741
7742 NFSD_DEBUG(4, "in nfsrv_getattrdsdorpc\n");
7743 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
7744 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
7745 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
7746 NFSZERO_ATTRBIT(&attrbits);
7747 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
7748 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7749 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
7750 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7751 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
7752 (void) nfsrv_putattrbit(nd, &attrbits);
7753 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
7754 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7755 if (error != 0) {
7756 free(nd, M_TEMP);
7757 return (error);
7758 }
7759 NFSD_DEBUG(4, "nfsrv_getattrdsdorpc: aft getattrrpc=%d\n",
7760 nd->nd_repstat);
7761 if (nd->nd_repstat == 0)
7762 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
7763 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
7764 NULL, NULL, NULL, NULL, NULL);
7765 else
7766 error = nd->nd_repstat;
7767 m_freem(nd->nd_mrep);
7768 free(nd, M_TEMP);
7769 NFSD_DEBUG(4, "nfsrv_getattrdsdorpc error=%d\n", error);
7770 return (error);
7771 }
7772
7773 #ifdef notnow
7774 /*
7775 * Seek call to a DS.
7776 */
7777 static int
nfsrv_seekdsrpc(fhandle_t * fhp,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp)7778 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp,
7779 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp)
7780 {
7781 uint32_t *tl;
7782 struct nfsrv_descript *nd;
7783 nfsv4stateid_t st;
7784 int error;
7785
7786 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n");
7787 /*
7788 * Use a stateid where other is an alternating 01010 pattern and
7789 * seqid is 0xffffffff. This value is not defined as special by
7790 * the RFC and is used by the FreeBSD NFS server to indicate an
7791 * MDS->DS proxy operation.
7792 */
7793 st.other[0] = 0x55555555;
7794 st.other[1] = 0x55555555;
7795 st.other[2] = 0x55555555;
7796 st.seqid = 0xffffffff;
7797 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
7798 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp,
7799 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
7800 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
7801 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7802 txdr_hyper(*offp, tl); tl += 2;
7803 *tl = txdr_unsigned(content);
7804 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
7805 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7806 if (error != 0) {
7807 free(nd, M_TEMP);
7808 return (error);
7809 }
7810 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat);
7811 if (nd->nd_repstat == 0) {
7812 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
7813 if (*tl++ == newnfs_true)
7814 *eofp = true;
7815 else
7816 *eofp = false;
7817 *offp = fxdr_hyper(tl);
7818 } else
7819 error = nd->nd_repstat;
7820 nfsmout:
7821 m_freem(nd->nd_mrep);
7822 free(nd, M_TEMP);
7823 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error);
7824 return (error);
7825 }
7826 #endif
7827
7828 /*
7829 * Get the device id and file handle for a DS file.
7830 */
7831 int
nfsrv_dsgetdevandfh(struct vnode * vp,NFSPROC_T * p,int * mirrorcntp,uint64_t * stripesizp,int * stripecntp,fhandle_t ** fhpp,char ** devid)7832 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
7833 uint64_t *stripesizp, int *stripecntp, fhandle_t **fhpp, char **devid)
7834 {
7835 int buflen, error;
7836 char *buf;
7837
7838 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS *
7839 NFSDEV_MAXSTRIPE;
7840 buf = malloc(buflen, M_TEMP, M_WAITOK);
7841 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, stripecntp,
7842 stripesizp, p, NULL, fhpp, devid, NULL, NULL, NULL, NULL, NULL,
7843 NULL);
7844 free(buf, M_TEMP);
7845 return (error);
7846 }
7847
7848 /*
7849 * Do a Lookup against the DS for the filename.
7850 */
7851 static int
nfsrv_pnfslookupds(struct vnode * vp,struct vnode * dvp,struct pnfsdsfile * pf,struct vnode ** nvpp,NFSPROC_T * p)7852 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
7853 struct vnode **nvpp, NFSPROC_T *p)
7854 {
7855 struct nameidata named;
7856 struct ucred *tcred;
7857 char *bufp;
7858 u_long *hashp;
7859 struct vnode *nvp;
7860 int error;
7861
7862 tcred = newnfs_getcred();
7863 named.ni_cnd.cn_nameiop = LOOKUP;
7864 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
7865 named.ni_cnd.cn_cred = tcred;
7866 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
7867 nfsvno_setpathbuf(&named, &bufp, &hashp);
7868 named.ni_cnd.cn_nameptr = bufp;
7869 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
7870 strlcpy(bufp, pf->dsf_filename, NAME_MAX);
7871 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
7872 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
7873 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
7874 NFSFREECRED(tcred);
7875 nfsvno_relpathbuf(&named);
7876 if (error == 0)
7877 *nvpp = nvp;
7878 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
7879 return (error);
7880 }
7881
7882 /*
7883 * Set the file handle to the correct one.
7884 */
7885 static void
nfsrv_pnfssetfh(struct vnode * vp,struct pnfsdsfile * pf,char ** devid,char * fnamep,struct vnode * nvp,NFSPROC_T * p)7886 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char **devid,
7887 char *fnamep, struct vnode *nvp, NFSPROC_T *p)
7888 {
7889 struct nfsnode *np;
7890 int ret = 0;
7891
7892 np = VTONFS(nvp);
7893 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
7894 /*
7895 * We can only do a vn_set_extattr() if the vnode is exclusively
7896 * locked and vn_start_write() has been done. If devid != NULL or
7897 * fnamep != NULL or the vnode is shared locked, vn_start_write()
7898 * may not have been done.
7899 * If not done now, it will be done on a future call.
7900 */
7901 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
7902 LK_EXCLUSIVE)
7903 ret = vn_extattr_set(vp, IO_NODELOCKED,
7904 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
7905 (char *)pf, p);
7906 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
7907 }
7908
7909 /*
7910 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point
7911 * when the DS has failed.
7912 */
7913 void
nfsrv_killrpcs(struct nfsmount * nmp)7914 nfsrv_killrpcs(struct nfsmount *nmp)
7915 {
7916
7917 /*
7918 * Call newnfs_nmcancelreqs() to cause
7919 * any RPCs in progress on the mount point to
7920 * fail.
7921 * This will cause any process waiting for an
7922 * RPC to complete while holding a vnode lock
7923 * on the mounted-on vnode (such as "df" or
7924 * a non-forced "umount") to fail.
7925 * This will unlock the mounted-on vnode so
7926 * a forced dismount can succeed.
7927 * The NFSMNTP_CANCELRPCS flag should be set when this function is
7928 * called.
7929 */
7930 newnfs_nmcancelreqs(nmp);
7931 }
7932
7933 /*
7934 * Sum up the statfs info for each of the DSs, so that the client will
7935 * receive the total for all DSs.
7936 */
7937 static int
nfsrv_pnfsstatfs(struct statfs * sf,struct mount * mp)7938 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
7939 {
7940 struct statfs *tsf;
7941 struct nfsdevice *ds;
7942 struct vnode **dvpp, **tdvpp, *dvp;
7943 uint64_t tot;
7944 int cnt, error = 0, i;
7945
7946 if (nfsrv_devidcnt <= 0)
7947 return (ENXIO);
7948 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
7949 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
7950
7951 /* Get an array of the dvps for the DSs. */
7952 tdvpp = dvpp;
7953 i = 0;
7954 NFSDDSLOCK();
7955 /* First, search for matches for same file system. */
7956 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7957 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
7958 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
7959 if (++i > nfsrv_devidcnt)
7960 break;
7961 *tdvpp++ = ds->nfsdev_dvp;
7962 }
7963 }
7964 /*
7965 * If no matches for same file system, total all servers not assigned
7966 * to a file system.
7967 */
7968 if (i == 0) {
7969 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7970 if (ds->nfsdev_nmp != NULL &&
7971 ds->nfsdev_mdsisset == 0) {
7972 if (++i > nfsrv_devidcnt)
7973 break;
7974 *tdvpp++ = ds->nfsdev_dvp;
7975 }
7976 }
7977 }
7978 NFSDDSUNLOCK();
7979 cnt = i;
7980
7981 /* Do a VFS_STATFS() for each of the DSs and sum them up. */
7982 tdvpp = dvpp;
7983 for (i = 0; i < cnt && error == 0; i++) {
7984 dvp = *tdvpp++;
7985 error = VFS_STATFS(dvp->v_mount, tsf);
7986 if (error == 0) {
7987 if (sf->f_bsize == 0) {
7988 if (tsf->f_bsize > 0)
7989 sf->f_bsize = tsf->f_bsize;
7990 else
7991 sf->f_bsize = 8192;
7992 }
7993 if (tsf->f_blocks > 0) {
7994 if (sf->f_bsize != tsf->f_bsize) {
7995 tot = tsf->f_blocks * tsf->f_bsize;
7996 sf->f_blocks += (tot / sf->f_bsize);
7997 } else
7998 sf->f_blocks += tsf->f_blocks;
7999 }
8000 if (tsf->f_bfree > 0) {
8001 if (sf->f_bsize != tsf->f_bsize) {
8002 tot = tsf->f_bfree * tsf->f_bsize;
8003 sf->f_bfree += (tot / sf->f_bsize);
8004 } else
8005 sf->f_bfree += tsf->f_bfree;
8006 }
8007 if (tsf->f_bavail > 0) {
8008 if (sf->f_bsize != tsf->f_bsize) {
8009 tot = tsf->f_bavail * tsf->f_bsize;
8010 sf->f_bavail += (tot / sf->f_bsize);
8011 } else
8012 sf->f_bavail += tsf->f_bavail;
8013 }
8014 }
8015 }
8016 free(tsf, M_TEMP);
8017 free(dvpp, M_TEMP);
8018 return (error);
8019 }
8020
8021 /*
8022 * Set an acl.
8023 */
8024 int
nfsrv_setacl(struct vnode * vp,NFSACL_T * aclp,acl_type_t atype,struct ucred * cred,NFSPROC_T * p)8025 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, acl_type_t atype,
8026 struct ucred *cred, NFSPROC_T *p)
8027 {
8028 int error;
8029
8030 if (nfsrv_useacl == 0 || (atype == ACL_TYPE_NFS4 &&
8031 nfs_supportsnfsv4acls(vp) == 0) || (atype != ACL_TYPE_NFS4 &&
8032 nfs_supportsposixacls(vp) == 0)) {
8033 error = NFSERR_ATTRNOTSUPP;
8034 goto out;
8035 }
8036 /*
8037 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
8038 * Make sure it has enough room for that - splitting every entry
8039 * into two and appending "canonical six" entries at the end.
8040 * Cribbed out of kern/vfs_acl.c - Rick M.
8041 */
8042 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
8043 error = NFSERR_ATTRNOTSUPP;
8044 goto out;
8045 }
8046 if (aclp->acl_cnt == 0) {
8047 if (atype != ACL_TYPE_DEFAULT || vp->v_type != VDIR) {
8048 error = NFSERR_INVAL;
8049 goto out;
8050 }
8051 error = VOP_SETACL(vp, atype, NULL, cred, p);
8052 } else
8053 error = VOP_SETACL(vp, atype, aclp, cred, p);
8054 if (error == 0) {
8055 error = nfsrv_dssetacl(vp, aclp, cred, p);
8056 if (error == ENOENT)
8057 error = 0;
8058 }
8059
8060 out:
8061 NFSEXITCODE(error);
8062 return (error);
8063 }
8064
8065 /*
8066 * Seek vnode op call (actually it is a VOP_IOCTL()).
8067 * This function is called with the vnode locked, but unlocks and vrele()s
8068 * the vp before returning.
8069 */
8070 int
nfsvno_seek(struct nfsrv_descript * nd,struct vnode * vp,u_long cmd,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p)8071 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd,
8072 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p)
8073 {
8074 struct nfsvattr at;
8075 int error, ret;
8076
8077 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp");
8078 #ifdef notnow
8079 /*
8080 * Attempt to seek on a DS file. A return of ENOENT implies
8081 * there is no DS file to seek on.
8082 */
8083 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL,
8084 NULL, NULL, NULL, NULL, offp, content, eofp);
8085 if (error != ENOENT) {
8086 vput(vp);
8087 return (error);
8088 }
8089 #endif
8090
8091 /*
8092 * Do the VOP_IOCTL() call. For the case where *offp == file_size,
8093 * VOP_IOCTL() will return ENXIO. However, the correct reply for
8094 * NFSv4.2 is *eofp == true and error == 0 for this case.
8095 */
8096 NFSVOPUNLOCK(vp);
8097 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p);
8098 *eofp = false;
8099 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) {
8100 /* Handle the cases where we might be at EOF. */
8101 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
8102 if (ret == 0 && *offp == at.na_size) {
8103 *eofp = true;
8104 error = 0;
8105 }
8106 if (ret != 0 && error == 0)
8107 error = ret;
8108 }
8109 vrele(vp);
8110 NFSEXITCODE(error);
8111 return (error);
8112 }
8113
8114 /*
8115 * Allocate vnode op call.
8116 */
8117 int
nfsvno_allocate(struct vnode * vp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p)8118 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
8119 NFSPROC_T *p)
8120 {
8121 int error;
8122 off_t olen;
8123
8124 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp");
8125 #ifdef notnow
8126 /*
8127 * Attempt to allocate on a DS file. A return of ENOENT implies
8128 * there is no DS file to allocate on.
8129 */
8130 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL,
8131 NULL, NULL, NULL, NULL, &len, 0, NULL);
8132 if (error != ENOENT)
8133 return (error);
8134 #endif
8135
8136 /*
8137 * Do the actual VOP_ALLOCATE(), looping so long as
8138 * progress is being made, to achieve completion.
8139 */
8140 do {
8141 olen = len;
8142 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred);
8143 if (error == 0 && len > 0 && olen > len)
8144 maybe_yield();
8145 } while (error == 0 && len > 0 && olen > len);
8146 if (error == 0 && len > 0)
8147 error = NFSERR_IO;
8148 NFSEXITCODE(error);
8149 return (error);
8150 }
8151
8152 /*
8153 * Deallocate vnode op call.
8154 */
8155 int
nfsvno_deallocate(struct vnode * vp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p)8156 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
8157 NFSPROC_T *p)
8158 {
8159 int error;
8160 off_t olen;
8161
8162 ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
8163 #ifdef notnow
8164 /*
8165 * Attempt to deallocate on a DS file. A return of ENOENT implies
8166 * there is no DS file to deallocate on.
8167 */
8168 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
8169 NULL, NULL, NULL, NULL, &len, 0, NULL);
8170 if (error != ENOENT)
8171 return (error);
8172 #endif
8173
8174 /*
8175 * Do the actual VOP_DEALLOCATE(), looping so long as
8176 * progress is being made, to achieve completion.
8177 */
8178 do {
8179 olen = len;
8180 error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
8181 if (error == 0 && len > 0 && olen > len)
8182 maybe_yield();
8183 } while (error == 0 && len > 0 && olen > len);
8184 if (error == 0 && len > 0)
8185 error = NFSERR_IO;
8186 NFSEXITCODE(error);
8187 return (error);
8188 }
8189
8190 /*
8191 * Get Extended Atribute vnode op into an mbuf list.
8192 */
8193 int
nfsvno_getxattr(struct vnode * vp,char * name,uint32_t maxresp,struct ucred * cred,uint64_t flag,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)8194 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp,
8195 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p,
8196 struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
8197 {
8198 struct iovec *iv;
8199 struct uio io, *uiop = &io;
8200 struct mbuf *m, *m2;
8201 int alen, error, len, tlen;
8202 size_t siz;
8203
8204 /* First, find out the size of the extended attribute. */
8205 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
8206 &siz, cred, p);
8207 if (error != 0)
8208 return (NFSERR_NOXATTR);
8209 if (siz > maxresp - NFS_MAXXDR)
8210 return (NFSERR_XATTR2BIG);
8211 len = siz;
8212 tlen = NFSM_RNDUP(len);
8213 if (tlen > 0) {
8214 /*
8215 * If cnt > MCLBYTES and the reply will not be saved, use
8216 * ext_pgs mbufs for TLS.
8217 * For NFSv4.0, we do not know for sure if the reply will
8218 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
8219 * Always use ext_pgs mbufs if ND_EXTPG is set.
8220 */
8221 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES &&
8222 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS &&
8223 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4))
8224 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen,
8225 maxextsiz, &m, &m2, &iv);
8226 else
8227 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2,
8228 &iv);
8229 uiop->uio_iov = iv;
8230 } else {
8231 uiop->uio_iovcnt = 0;
8232 uiop->uio_iov = iv = NULL;
8233 m = m2 = NULL;
8234 }
8235 uiop->uio_offset = 0;
8236 uiop->uio_resid = tlen;
8237 uiop->uio_rw = UIO_READ;
8238 uiop->uio_segflg = UIO_SYSSPACE;
8239 uiop->uio_td = p;
8240 #ifdef MAC
8241 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER,
8242 name);
8243 if (error != 0)
8244 goto out;
8245 #endif
8246
8247 if (tlen > 0)
8248 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
8249 NULL, cred, p);
8250 if (error != 0)
8251 goto out;
8252 if (uiop->uio_resid > 0) {
8253 alen = tlen;
8254 len = tlen - uiop->uio_resid;
8255 tlen = NFSM_RNDUP(len);
8256 if (alen != tlen)
8257 printf("nfsvno_getxattr: weird size read\n");
8258 if (tlen == 0) {
8259 m_freem(m);
8260 m = m2 = NULL;
8261 } else if (alen != tlen || tlen != len)
8262 m2 = nfsrv_adj(m, alen - tlen, tlen - len);
8263 }
8264 *lenp = len;
8265 *mpp = m;
8266 *mpendp = m2;
8267
8268 out:
8269 if (error != 0) {
8270 if (m != NULL)
8271 m_freem(m);
8272 *lenp = 0;
8273 }
8274 free(iv, M_TEMP);
8275 NFSEXITCODE(error);
8276 return (error);
8277 }
8278
8279 /*
8280 * Set Extended attribute vnode op from an mbuf list.
8281 */
8282 int
nfsvno_setxattr(struct vnode * vp,char * name,int len,struct mbuf * m,char * cp,struct ucred * cred,struct thread * p)8283 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m,
8284 char *cp, struct ucred *cred, struct thread *p)
8285 {
8286 struct iovec *iv;
8287 struct uio uio, *uiop = &uio;
8288 int cnt, error;
8289
8290 error = 0;
8291 #ifdef MAC
8292 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER,
8293 name);
8294 #endif
8295 if (error != 0)
8296 goto out;
8297
8298 uiop->uio_rw = UIO_WRITE;
8299 uiop->uio_segflg = UIO_SYSSPACE;
8300 uiop->uio_td = p;
8301 uiop->uio_offset = 0;
8302 uiop->uio_resid = len;
8303 if (len > 0) {
8304 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt);
8305 uiop->uio_iov = iv;
8306 uiop->uio_iovcnt = cnt;
8307 } else {
8308 uiop->uio_iov = iv = NULL;
8309 uiop->uio_iovcnt = 0;
8310 }
8311 if (error == 0) {
8312 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
8313 cred, p);
8314 if (error == 0) {
8315 if (vp->v_type == VREG && nfsrv_devidcnt != 0)
8316 nfsvno_updateds(vp, cred, p);
8317 error = VOP_FSYNC(vp, MNT_WAIT, p);
8318 }
8319 free(iv, M_TEMP);
8320 }
8321
8322 out:
8323 NFSEXITCODE(error);
8324 return (error);
8325 }
8326
8327 /*
8328 * For a pNFS server, the DS file's ctime and
8329 * va_filerev (TimeMetadata and Change) needs to
8330 * be updated. This is a hack, but works by
8331 * flipping the S_ISGID bit in va_mode and then
8332 * flipping it back.
8333 * It does result in two MDS->DS RPCs, but creating
8334 * a custom RPC just to do this seems overkill, since
8335 * Setxattr/Rmxattr will not be done that frequently.
8336 * If it fails part way through, that is not too
8337 * serious, since the DS file is never executed.
8338 */
8339 static void
nfsvno_updateds(struct vnode * vp,struct ucred * cred,NFSPROC_T * p)8340 nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p)
8341 {
8342 struct nfsvattr nva;
8343 int ret;
8344 u_short tmode;
8345
8346 ret = VOP_GETATTR(vp, &nva.na_vattr, cred);
8347 if (ret == 0) {
8348 tmode = nva.na_mode;
8349 NFSVNO_ATTRINIT(&nva);
8350 tmode ^= S_ISGID;
8351 NFSVNO_SETATTRVAL(&nva, mode, tmode);
8352 ret = nfsrv_proxyds(vp, 0, 0, cred, p,
8353 NFSPROC_SETATTR, NULL, NULL, NULL, &nva,
8354 NULL, NULL, 0, NULL);
8355 if (ret == 0) {
8356 tmode ^= S_ISGID;
8357 NFSVNO_SETATTRVAL(&nva, mode, tmode);
8358 ret = nfsrv_proxyds(vp, 0, 0, cred, p,
8359 NFSPROC_SETATTR, NULL, NULL, NULL,
8360 &nva, NULL, NULL, 0, NULL);
8361 }
8362 }
8363 }
8364
8365 /*
8366 * Remove Extended attribute vnode op.
8367 */
8368 int
nfsvno_rmxattr(struct nfsrv_descript * nd,struct vnode * vp,char * name,struct ucred * cred,struct thread * p)8369 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name,
8370 struct ucred *cred, struct thread *p)
8371 {
8372 int error;
8373
8374 /*
8375 * Get rid of any delegations. I am not sure why this is required,
8376 * but RFC-8276 says so.
8377 */
8378 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p);
8379 if (error != 0)
8380 goto out;
8381 #ifdef MAC
8382 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER,
8383 name);
8384 if (error != 0)
8385 goto out;
8386 #endif
8387
8388 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p);
8389 if (error == EOPNOTSUPP)
8390 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
8391 cred, p);
8392 if (error == 0) {
8393 if (vp->v_type == VREG && nfsrv_devidcnt != 0)
8394 nfsvno_updateds(vp, cred, p);
8395 error = VOP_FSYNC(vp, MNT_WAIT, p);
8396 }
8397 out:
8398 NFSEXITCODE(error);
8399 return (error);
8400 }
8401
8402 /*
8403 * List Extended Atribute vnode op into an mbuf list.
8404 */
8405 int
nfsvno_listxattr(struct vnode * vp,uint64_t cookie,struct ucred * cred,struct thread * p,u_char ** bufp,uint32_t * lenp,bool * eofp)8406 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred,
8407 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp)
8408 {
8409 struct iovec iv;
8410 struct uio io;
8411 int error;
8412 size_t siz;
8413
8414 *bufp = NULL;
8415 /* First, find out the size of the extended attribute. */
8416 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred,
8417 p);
8418 if (error != 0)
8419 return (NFSERR_NOXATTR);
8420 if (siz <= cookie) {
8421 *lenp = 0;
8422 *eofp = true;
8423 goto out;
8424 }
8425 if (siz > cookie + *lenp) {
8426 siz = cookie + *lenp;
8427 *eofp = false;
8428 } else
8429 *eofp = true;
8430 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */
8431 if (siz > 10 * 1024 * 1024) {
8432 error = NFSERR_XATTR2BIG;
8433 goto out;
8434 }
8435 *bufp = malloc(siz, M_TEMP, M_WAITOK);
8436 iv.iov_base = *bufp;
8437 iv.iov_len = siz;
8438 io.uio_iovcnt = 1;
8439 io.uio_iov = &iv;
8440 io.uio_offset = 0;
8441 io.uio_resid = siz;
8442 io.uio_rw = UIO_READ;
8443 io.uio_segflg = UIO_SYSSPACE;
8444 io.uio_td = p;
8445 #ifdef MAC
8446 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER);
8447 if (error != 0)
8448 goto out;
8449 #endif
8450
8451 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred,
8452 p);
8453 if (error != 0)
8454 goto out;
8455 if (io.uio_resid > 0)
8456 siz -= io.uio_resid;
8457 *lenp = siz;
8458
8459 out:
8460 if (error != 0) {
8461 free(*bufp, M_TEMP);
8462 *bufp = NULL;
8463 }
8464 NFSEXITCODE(error);
8465 return (error);
8466 }
8467
8468 /*
8469 * Trim trailing data off the mbuf list being built.
8470 */
8471 void
nfsm_trimtrailing(struct nfsrv_descript * nd,struct mbuf * mb,char * bpos,int bextpg,int bextpgsiz)8472 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos,
8473 int bextpg, int bextpgsiz)
8474 {
8475 vm_page_t pg;
8476 int fullpgsiz, i;
8477
8478 if (mb->m_next != NULL) {
8479 m_freem(mb->m_next);
8480 mb->m_next = NULL;
8481 }
8482 if ((mb->m_flags & M_EXTPG) != 0) {
8483 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs,
8484 ("nfsm_trimtrailing: bextpg out of range"));
8485 KASSERT(bpos == (char *)
8486 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz,
8487 ("nfsm_trimtrailing: bextpgsiz bad!"));
8488
8489 /* First, get rid of any pages after this position. */
8490 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) {
8491 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]);
8492 vm_page_unwire_noq(pg);
8493 vm_page_free(pg);
8494 }
8495 mb->m_epg_npgs = bextpg + 1;
8496 if (bextpg == 0)
8497 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off;
8498 else
8499 fullpgsiz = PAGE_SIZE;
8500 mb->m_epg_last_len = fullpgsiz - bextpgsiz;
8501 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off);
8502 for (i = 1; i < mb->m_epg_npgs; i++)
8503 mb->m_len += m_epg_pagelen(mb, i, 0);
8504 nd->nd_bextpgsiz = bextpgsiz;
8505 nd->nd_bextpg = bextpg;
8506 } else
8507 mb->m_len = bpos - mtod(mb, char *);
8508 nd->nd_mb = mb;
8509 nd->nd_bpos = bpos;
8510 }
8511
8512
8513 /*
8514 * Check to see if a put file handle operation should test for
8515 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR.
8516 * When Open is the next operation, NFSERR_WRONGSEC cannot be
8517 * replied for the Open cases that use a component. This can
8518 * be identified by the fact that the file handle's type is VDIR.
8519 */
8520 bool
nfsrv_checkwrongsec(struct nfsrv_descript * nd,int nextop,__enum_uint8 (vtype)vtyp)8521 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, __enum_uint8(vtype) vtyp)
8522 {
8523
8524 if ((nd->nd_flag & ND_NFSV4) == 0)
8525 return (true);
8526
8527 if ((nd->nd_flag & ND_LASTOP) != 0)
8528 return (false);
8529
8530 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH ||
8531 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH ||
8532 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP ||
8533 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME)
8534 return (false);
8535 if (nextop == NFSV4OP_OPEN && vtyp == VDIR)
8536 return (false);
8537 return (true);
8538 }
8539
8540 /*
8541 * Check DSs marked no space.
8542 */
8543 void
nfsrv_checknospc(void)8544 nfsrv_checknospc(void)
8545 {
8546 struct statfs *tsf;
8547 struct nfsdevice *ds;
8548 struct vnode **dvpp, **tdvpp, *dvp;
8549 char *devid, *tdevid;
8550 int cnt, error = 0, i;
8551
8552 if (nfsrv_devidcnt <= 0)
8553 return;
8554 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
8555 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK);
8556 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
8557
8558 /* Get an array of the dvps for the DSs. */
8559 tdvpp = dvpp;
8560 tdevid = devid;
8561 i = 0;
8562 NFSDDSLOCK();
8563 /* First, search for matches for same file system. */
8564 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8565 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) {
8566 if (++i > nfsrv_devidcnt)
8567 break;
8568 *tdvpp++ = ds->nfsdev_dvp;
8569 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID);
8570 tdevid += NFSX_V4DEVICEID;
8571 }
8572 }
8573 NFSDDSUNLOCK();
8574
8575 /* Do a VFS_STATFS() for each of the DSs and clear no space. */
8576 cnt = i;
8577 tdvpp = dvpp;
8578 tdevid = devid;
8579 for (i = 0; i < cnt && error == 0; i++) {
8580 dvp = *tdvpp++;
8581 error = VFS_STATFS(dvp->v_mount, tsf);
8582 if (error == 0 && tsf->f_bavail > 0) {
8583 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n");
8584 nfsrv_marknospc(tdevid, false);
8585 }
8586 tdevid += NFSX_V4DEVICEID;
8587 }
8588 free(tsf, M_TEMP);
8589 free(dvpp, M_TEMP);
8590 free(devid, M_TEMP);
8591 }
8592
8593 /*
8594 * Return the correct ACL support value for a vnode.
8595 */
8596 int
nfs_supportsacls(struct vnode * vp)8597 nfs_supportsacls(struct vnode *vp)
8598 {
8599
8600 if (nfs_supportsnfsv4acls(vp) != 0)
8601 return (SUPPACL_NFSV4);
8602 else if (nfs_supportsposixacls(vp) != 0)
8603 return (SUPPACL_POSIX);
8604 return (SUPPACL_NONE);
8605 }
8606
8607 /*
8608 * Initialize everything that needs to be initialized for a vnet.
8609 */
8610 static void
nfsrv_vnetinit(const void * unused __unused)8611 nfsrv_vnetinit(const void *unused __unused)
8612 {
8613
8614 nfsd_mntinit();
8615 }
8616 VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY,
8617 nfsrv_vnetinit, NULL);
8618
8619 /*
8620 * Clean up everything that is in a vnet and needs to be
8621 * done when the jail is destroyed or the module unloaded.
8622 */
8623 static void
nfsrv_cleanup(const void * unused __unused)8624 nfsrv_cleanup(const void *unused __unused)
8625 {
8626 int i;
8627
8628 NFSD_LOCK();
8629 if (!VNET(nfsrv_mntinited)) {
8630 NFSD_UNLOCK();
8631 return;
8632 }
8633 VNET(nfsrv_mntinited) = false;
8634 NFSD_UNLOCK();
8635
8636 /* Clean out all NFSv4 state. */
8637 nfsrv_throwawayallstate(curthread);
8638
8639 /* Clean the NFS server reply cache */
8640 nfsrvd_cleancache();
8641
8642 /* Clean out v4root exports. */
8643 if (VNET(nfsv4root_mnt)->mnt_export != NULL) {
8644 vfs_free_addrlist(VNET(nfsv4root_mnt)->mnt_export);
8645 free(VNET(nfsv4root_mnt)->mnt_export, M_MOUNT);
8646 VNET(nfsv4root_mnt)->mnt_export = NULL;
8647 }
8648
8649 /* Free up the krpc server pool. */
8650 if (VNET(nfsrvd_pool) != NULL)
8651 svcpool_destroy(VNET(nfsrvd_pool));
8652
8653 /* and get rid of the locks */
8654 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
8655 mtx_destroy(&VNET(nfsrchash_table)[i].mtx);
8656 mtx_destroy(&VNET(nfsrcahash_table)[i].mtx);
8657 }
8658 mtx_destroy(&VNET(nfsv4root_mnt)->mnt_mtx);
8659 for (i = 0; i < nfsrv_sessionhashsize; i++)
8660 mtx_destroy(&VNET(nfssessionhash)[i].mtx);
8661 lockdestroy(&VNET(nfsv4root_mnt)->mnt_explock);
8662 free(VNET(nfsrvudphashtbl), M_NFSRVCACHE);
8663 free(VNET(nfsrchash_table), M_NFSRVCACHE);
8664 free(VNET(nfsrcahash_table), M_NFSRVCACHE);
8665 free(VNET(nfsclienthash), M_NFSDCLIENT);
8666 free(VNET(nfslockhash), M_NFSDLOCKFILE);
8667 free(VNET(nfssessionhash), M_NFSDSESSION);
8668 free(VNET(nfsv4root_mnt), M_TEMP);
8669 VNET(nfsv4root_mnt) = NULL;
8670 }
8671 VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY,
8672 nfsrv_cleanup, NULL);
8673
8674 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
8675
8676 /*
8677 * Called once to initialize data structures...
8678 */
8679 static int
nfsd_modevent(module_t mod,int type,void * data)8680 nfsd_modevent(module_t mod, int type, void *data)
8681 {
8682 int error = 0, i;
8683 static int loaded = 0;
8684
8685 switch (type) {
8686 case MOD_LOAD:
8687 if (loaded)
8688 goto out;
8689 newnfs_portinit();
8690 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
8691 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
8692 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
8693 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
8694 #ifdef VV_DISABLEDELEG
8695 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
8696 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
8697 #endif
8698 nfsd_call_nfsd = nfssvc_nfsd;
8699 loaded = 1;
8700 break;
8701
8702 case MOD_UNLOAD:
8703 if (newnfs_numnfsd != 0) {
8704 error = EBUSY;
8705 break;
8706 }
8707
8708 #ifdef VV_DISABLEDELEG
8709 vn_deleg_ops.vndeleg_recall = NULL;
8710 vn_deleg_ops.vndeleg_disable = NULL;
8711 #endif
8712 nfsd_call_nfsd = NULL;
8713 mtx_destroy(&nfsrc_udpmtx);
8714 mtx_destroy(&nfs_v4root_mutex);
8715 mtx_destroy(&nfsrv_dontlistlock_mtx);
8716 mtx_destroy(&nfsrv_recalllock_mtx);
8717 if (nfslayouthash != NULL) {
8718 for (i = 0; i < nfsrv_layouthashsize; i++)
8719 mtx_destroy(&nfslayouthash[i].mtx);
8720 free(nfslayouthash, M_NFSDSESSION);
8721 }
8722 loaded = 0;
8723 break;
8724 default:
8725 error = EOPNOTSUPP;
8726 break;
8727 }
8728
8729 out:
8730 NFSEXITCODE(error);
8731 return (error);
8732 }
8733 static moduledata_t nfsd_mod = {
8734 "nfsd",
8735 nfsd_modevent,
8736 NULL,
8737 };
8738 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
8739
8740 /* So that loader and kldload(2) can find us, wherever we are.. */
8741 MODULE_VERSION(nfsd, 1);
8742 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
8743 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
8744 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
8745 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
8746