1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Rick Macklem, University of Guelph
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include <sys/extattr.h>
34 #include <fs/nfs/nfsport.h>
35
36 int nfsrv_issuedelegs = 0;
37 int nfsrv_dolocallocks = 0;
38 struct nfsv4lock nfsv4rootfs_lock;
39 time_t nfsdev_time = 0;
40 int nfsrv_layouthashsize;
41 volatile int nfsrv_layoutcnt = 0;
42
43 NFSD_VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst);
44
45 NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
46 NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
47
48 extern uint32_t nfs_srvmaxio;
49 extern int nfsrv_lease;
50 extern struct timeval nfsboottime;
51 extern u_int32_t newnfs_true, newnfs_false;
52 extern struct mtx nfsrv_dslock_mtx;
53 extern struct mtx nfsrv_recalllock_mtx;
54 extern struct mtx nfsrv_dontlistlock_mtx;
55 extern int nfsd_debuglevel;
56 extern u_int nfsrv_dsdirsize;
57 extern struct nfsdevicehead nfsrv_devidhead;
58 extern int nfsrv_doflexfile;
59 extern int nfsrv_maxpnfsmirror;
60 NFSV4ROOTLOCKMUTEX;
61 NFSSTATESPINLOCK;
62 extern struct nfsdontlisthead nfsrv_dontlisthead;
63 extern volatile int nfsrv_devidcnt;
64 extern struct nfslayouthead nfsrv_recalllisthead;
65 extern char *nfsrv_zeropnfsdat;
66
67 SYSCTL_DECL(_vfs_nfsd);
68 int nfsrv_statehashsize = NFSSTATEHASHSIZE;
69 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
70 &nfsrv_statehashsize, 0,
71 "Size of state hash table set via loader.conf");
72
73 int nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
74 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
75 &nfsrv_clienthashsize, 0,
76 "Size of client hash table set via loader.conf");
77
78 int nfsrv_lockhashsize = NFSLOCKHASHSIZE;
79 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
80 &nfsrv_lockhashsize, 0,
81 "Size of file handle hash table set via loader.conf");
82
83 int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
84 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
85 &nfsrv_sessionhashsize, 0,
86 "Size of session hash table set via loader.conf");
87
88 int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
89 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
90 &nfsrv_layouthighwater, 0,
91 "High water mark for number of layouts set via loader.conf");
92
93 static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
94 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
95 &nfsrv_v4statelimit, 0,
96 "High water limit for NFSv4 opens+locks+delegations");
97
98 static int nfsrv_writedelegifpos = 0;
99 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
100 &nfsrv_writedelegifpos, 0,
101 "Issue a write delegation for read opens if possible");
102
103 static int nfsrv_allowreadforwriteopen = 1;
104 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
105 &nfsrv_allowreadforwriteopen, 0,
106 "Allow Reads to be done with Write Access StateIDs");
107
108 int nfsrv_pnfsatime = 0;
109 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
110 &nfsrv_pnfsatime, 0,
111 "For pNFS service, do Getattr ops to keep atime up-to-date");
112
113 int nfsrv_flexlinuxhack = 0;
114 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
115 &nfsrv_flexlinuxhack, 0,
116 "For Linux clients, hack around Flex File Layout bug");
117
118 /*
119 * Hash lists for nfs V4.
120 */
121 NFSD_VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash);
122 NFSD_VNET_DEFINE(struct nfslockhashhead *, nfslockhash);
123 NFSD_VNET_DEFINE(struct nfssessionhash *, nfssessionhash);
124
125 struct nfslayouthash *nfslayouthash;
126 volatile int nfsrv_dontlistlen = 0;
127
128 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
129 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
130 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
131 static int nfsrv_nogsscallback = 0;
132 static volatile int nfsrv_writedelegcnt = 0;
133 static int nfsrv_faildscnt;
134
135 NFSD_VNET_DEFINE_STATIC(time_t, nfsrvboottime);
136
137 /* local functions */
138 static void nfsrv_dumpaclient(struct nfsclient *clp,
139 struct nfsd_dumpclients *dumpp);
140 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
141 NFSPROC_T *p);
142 static void nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
143 NFSPROC_T *p);
144 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
145 NFSPROC_T *p);
146 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
147 int cansleep, NFSPROC_T *p);
148 static void nfsrv_freenfslock(struct nfslock *lop);
149 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
150 static void nfsrv_freedeleg(struct nfsstate *);
151 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
152 u_int32_t flags, struct nfsstate **stpp);
153 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
154 struct nfsstate **stpp);
155 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
156 struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
157 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
158 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
159 static void nfsrv_insertlock(struct nfslock *new_lop,
160 struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
161 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
162 struct nfslock **other_lopp, struct nfslockfile *lfp);
163 static int nfsrv_getipnumber(u_char *cp);
164 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
165 nfsv4stateid_t *stateidp, int specialid);
166 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
167 u_int32_t flags);
168 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
169 nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
170 struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
171 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
172 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
173 int *slotposp);
174 static u_int32_t nfsrv_nextclientindex(void);
175 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
176 static void nfsrv_markstable(struct nfsclient *clp);
177 static void nfsrv_markreclaim(struct nfsclient *clp);
178 static int nfsrv_checkstable(struct nfsclient *clp);
179 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
180 vnode *vp, NFSPROC_T *p);
181 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
182 NFSPROC_T *p, vnode_t vp);
183 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
184 struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
185 static int nfsrv_notsamecredname(int op, struct nfsrv_descript *nd,
186 struct nfsclient *clp);
187 static time_t nfsrv_leaseexpiry(void);
188 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
189 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
190 struct nfsstate *stp, struct nfsrvcache *op);
191 static int nfsrv_nootherstate(struct nfsstate *stp);
192 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
193 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
194 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
195 uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
196 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
197 int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
198 NFSPROC_T *p);
199 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
200 NFSPROC_T *p);
201 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
202 uint64_t first, uint64_t end);
203 static void nfsrv_locklf(struct nfslockfile *lfp);
204 static void nfsrv_unlocklf(struct nfslockfile *lfp);
205 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
206 static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
207 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp);
208 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
209 int dont_replycache, struct nfsdsession **sepp, int *slotposp);
210 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
211 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
212 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
213 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
214 static void nfsrv_freelayoutlist(nfsquad_t clientid);
215 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
216 int iomode);
217 static void nfsrv_freealllayouts(void);
218 static void nfsrv_freedevid(struct nfsdevice *ds);
219 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
220 struct nfsdevice **dsp);
221 static void nfsrv_deleteds(struct nfsdevice *fndds);
222 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
223 static void nfsrv_freealldevids(void);
224 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
225 int maxcnt, NFSPROC_T *p);
226 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
227 fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
228 NFSPROC_T *p);
229 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
230 NFSPROC_T *, struct nfslayout **lypp);
231 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
232 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
233 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
234 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
235 int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
236 static int nfsrv_dontlayout(fhandle_t *fhp);
237 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
238 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
239 vnode_t *tvpp);
240 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
241 static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
242 struct nfsclient *clp);
243 static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
244 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
245 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
246 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
247 nfsv4stateid_t *delegstateidp);
248 static void nfsrv_clientlock(bool mlocked);
249 static void nfsrv_clientunlock(bool mlocked);
250
251 /*
252 * Lock the client structure, either with the mutex or the exclusive nfsd lock.
253 */
254 static void
nfsrv_clientlock(bool mlocked)255 nfsrv_clientlock(bool mlocked)
256 {
257 int igotlock;
258
259 if (mlocked) {
260 NFSLOCKSTATE();
261 } else {
262 NFSLOCKV4ROOTMUTEX();
263 nfsv4_relref(&nfsv4rootfs_lock);
264 do {
265 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
266 NFSV4ROOTLOCKMUTEXPTR, NULL);
267 } while (!igotlock);
268 NFSUNLOCKV4ROOTMUTEX();
269 }
270 }
271
272 /*
273 * Unlock the client structure.
274 */
275 static void
nfsrv_clientunlock(bool mlocked)276 nfsrv_clientunlock(bool mlocked)
277 {
278
279 if (mlocked) {
280 NFSUNLOCKSTATE();
281 } else {
282 NFSLOCKV4ROOTMUTEX();
283 nfsv4_unlock(&nfsv4rootfs_lock, 1);
284 NFSUNLOCKV4ROOTMUTEX();
285 }
286 }
287
288 /*
289 * Scan the client list for a match and either return the current one,
290 * create a new entry or return an error.
291 * If returning a non-error, the clp structure must either be linked into
292 * the client list or free'd.
293 */
294 int
nfsrv_setclient(struct nfsrv_descript * nd,struct nfsclient ** new_clpp,nfsquad_t * clientidp,nfsquad_t * confirmp,NFSPROC_T * p)295 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
296 nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
297 {
298 struct nfsclient *clp = NULL, *new_clp = *new_clpp;
299 int i, error = 0, ret;
300 struct nfsstate *stp, *tstp;
301 #ifdef INET
302 struct sockaddr_in *sin, *rin;
303 #endif
304 #ifdef INET6
305 struct sockaddr_in6 *sin6, *rin6;
306 #endif
307 struct nfsdsession *sep, *nsep;
308 SVCXPRT *old_xprt;
309 struct nfssessionhead old_sess;
310 int zapit = 0, gotit, hasstate = 0;
311 bool mlocked;
312 static u_int64_t confirm_index = 0;
313
314 /*
315 * Check for state resource limit exceeded.
316 */
317 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
318 error = NFSERR_RESOURCE;
319 goto out;
320 }
321
322 if (nfsrv_issuedelegs == 0 ||
323 ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
324 /*
325 * Don't do callbacks when delegations are disabled or
326 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
327 * If establishing a callback connection is attempted
328 * when a firewall is blocking the callback path, the
329 * server may wait too long for the connect attempt to
330 * succeed during the Open. Some clients, such as Linux,
331 * may timeout and give up on the Open before the server
332 * replies. Also, since AUTH_GSS callbacks are not
333 * yet interoperability tested, they might cause the
334 * server to crap out, if they get past the Init call to
335 * the client.
336 */
337 new_clp->lc_program = 0;
338
339 mlocked = true;
340 if (nfsrv_dolocallocks != 0)
341 mlocked = false;
342 /* Lock out other nfsd threads */
343 nfsrv_clientlock(mlocked);
344
345 /*
346 * Search for a match in the client list.
347 */
348 gotit = i = 0;
349 while (i < nfsrv_clienthashsize && !gotit) {
350 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
351 if (new_clp->lc_idlen == clp->lc_idlen &&
352 !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
353 gotit = 1;
354 break;
355 }
356 }
357 if (gotit == 0)
358 i++;
359 }
360 old_xprt = NULL;
361 if (!gotit ||
362 (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
363 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
364 /*
365 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
366 * client is trying to update a confirmed clientid.
367 */
368 nfsrv_clientunlock(mlocked);
369 confirmp->lval[1] = 0;
370 error = NFSERR_NOENT;
371 goto out;
372 }
373 /*
374 * Get rid of the old one.
375 */
376 if (i != nfsrv_clienthashsize) {
377 LIST_REMOVE(clp, lc_hash);
378 if (mlocked)
379 nfsrv_cleanclient(clp, p, true, &old_xprt);
380 else
381 nfsrv_cleanclient(clp, p, false, NULL);
382 nfsrv_freedeleglist(&clp->lc_deleg);
383 nfsrv_freedeleglist(&clp->lc_olddeleg);
384 zapit = 1;
385 }
386 /*
387 * Add it after assigning a client id to it.
388 */
389 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
390 if ((nd->nd_flag & ND_NFSV41) != 0) {
391 confirmp->lval[0] = ++confirm_index;
392 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
393 } else
394 confirmp->qval = new_clp->lc_confirm.qval =
395 ++confirm_index;
396 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
397 NFSD_VNET(nfsrvboottime);
398 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
399 nfsrv_nextclientindex();
400 new_clp->lc_stateindex = 0;
401 new_clp->lc_statemaxindex = 0;
402 new_clp->lc_prevsess = 0;
403 new_clp->lc_cbref = 0;
404 new_clp->lc_expiry = nfsrv_leaseexpiry();
405 LIST_INIT(&new_clp->lc_open);
406 LIST_INIT(&new_clp->lc_deleg);
407 LIST_INIT(&new_clp->lc_olddeleg);
408 LIST_INIT(&new_clp->lc_session);
409 for (i = 0; i < nfsrv_statehashsize; i++)
410 LIST_INIT(&new_clp->lc_stateid[i]);
411 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
412 lc_hash);
413 NFSD_VNET(nfsstatsv1_p)->srvclients++;
414 nfsrv_openpluslock++;
415 nfsrv_clients++;
416 nfsrv_clientunlock(mlocked);
417 if (zapit != 0) {
418 if (old_xprt != NULL)
419 SVC_RELEASE(old_xprt);
420 nfsrv_zapclient(clp, p);
421 }
422 *new_clpp = NULL;
423 goto out;
424 }
425
426 /*
427 * Now, handle the cases where the id is already issued.
428 */
429 if (nfsrv_notsamecredname(NFSV4OP_EXCHANGEID, nd, clp)) {
430 /*
431 * Check to see if there is expired state that should go away.
432 */
433 if (clp->lc_expiry < NFSD_MONOSEC &&
434 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
435 if (mlocked)
436 nfsrv_cleanclient(clp, p, true, &old_xprt);
437 else
438 nfsrv_cleanclient(clp, p, false, NULL);
439 nfsrv_freedeleglist(&clp->lc_deleg);
440 }
441
442 /*
443 * If there is outstanding state, then reply NFSERR_CLIDINUSE per
444 * RFC3530 Sec. 8.1.2 last para.
445 */
446 if (!LIST_EMPTY(&clp->lc_deleg)) {
447 hasstate = 1;
448 } else if (LIST_EMPTY(&clp->lc_open)) {
449 hasstate = 0;
450 } else {
451 hasstate = 0;
452 /* Look for an Open on the OpenOwner */
453 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
454 if (!LIST_EMPTY(&stp->ls_open)) {
455 hasstate = 1;
456 break;
457 }
458 }
459 }
460 if (hasstate) {
461 /*
462 * If the uid doesn't match, return NFSERR_CLIDINUSE after
463 * filling out the correct ipaddr and portnum.
464 */
465 switch (clp->lc_req.nr_nam->sa_family) {
466 #ifdef INET
467 case AF_INET:
468 sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
469 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
470 sin->sin_addr.s_addr = rin->sin_addr.s_addr;
471 sin->sin_port = rin->sin_port;
472 break;
473 #endif
474 #ifdef INET6
475 case AF_INET6:
476 sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
477 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
478 sin6->sin6_addr = rin6->sin6_addr;
479 sin6->sin6_port = rin6->sin6_port;
480 break;
481 #endif
482 }
483 nfsrv_clientunlock(mlocked);
484 if (old_xprt != NULL)
485 SVC_RELEASE(old_xprt);
486 error = NFSERR_CLIDINUSE;
487 goto out;
488 }
489 }
490
491 if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
492 /*
493 * If the verifier has changed, the client has rebooted
494 * and a new client id is issued. The old state info
495 * can be thrown away once the SetClientID_Confirm or
496 * Create_Session that confirms the clientid occurs.
497 */
498 LIST_REMOVE(clp, lc_hash);
499
500 LIST_NEWHEAD(&old_sess, &clp->lc_session, sess_list);
501
502 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
503 if ((nd->nd_flag & ND_NFSV41) != 0) {
504 confirmp->lval[0] = ++confirm_index;
505 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
506 } else
507 confirmp->qval = new_clp->lc_confirm.qval =
508 ++confirm_index;
509 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
510 NFSD_VNET(nfsrvboottime);
511 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
512 nfsrv_nextclientindex();
513 new_clp->lc_stateindex = 0;
514 new_clp->lc_statemaxindex = 0;
515 new_clp->lc_prevsess = 0;
516 new_clp->lc_cbref = 0;
517 new_clp->lc_expiry = nfsrv_leaseexpiry();
518
519 /*
520 * Save the state until confirmed.
521 */
522 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
523 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
524 tstp->ls_clp = new_clp;
525 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
526 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
527 tstp->ls_clp = new_clp;
528 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
529 ls_list);
530 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
531 tstp->ls_clp = new_clp;
532 for (i = 0; i < nfsrv_statehashsize; i++) {
533 LIST_NEWHEAD(&new_clp->lc_stateid[i],
534 &clp->lc_stateid[i], ls_hash);
535 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
536 tstp->ls_clp = new_clp;
537 }
538 LIST_INIT(&new_clp->lc_session);
539 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
540 lc_hash);
541 NFSD_VNET(nfsstatsv1_p)->srvclients++;
542 nfsrv_openpluslock++;
543 nfsrv_clients++;
544 if (!mlocked) {
545 nfsrv_clientunlock(mlocked);
546 NFSLOCKSTATE();
547 }
548
549 /*
550 * Must wait until any outstanding callback on the old clp
551 * completes.
552 */
553 while (clp->lc_cbref) {
554 clp->lc_flags |= LCL_WAKEUPWANTED;
555 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
556 "nfsd clp", 10 * hz);
557 }
558 NFSUNLOCKSTATE();
559 if (old_xprt != NULL)
560 SVC_RELEASE(old_xprt);
561 /* Get rid of all sessions on this clientid. */
562 LIST_FOREACH_SAFE(sep, &old_sess, sess_list, nsep) {
563 ret = nfsrv_freesession(NULL, sep, NULL, false, NULL);
564 if (ret != 0)
565 printf("nfsrv_setclient: verifier changed free"
566 " session failed=%d\n", ret);
567 }
568
569 nfsrv_zapclient(clp, p);
570 *new_clpp = NULL;
571 goto out;
572 }
573
574 /* For NFSv4.1, mark that we found a confirmed clientid. */
575 if ((nd->nd_flag & ND_NFSV41) != 0) {
576 clientidp->lval[0] = clp->lc_clientid.lval[0];
577 clientidp->lval[1] = clp->lc_clientid.lval[1];
578 confirmp->lval[0] = 0; /* Ignored by client */
579 confirmp->lval[1] = 1;
580 } else {
581 /*
582 * id and verifier match, so update the net address info
583 * and get rid of any existing callback authentication
584 * handle, so a new one will be acquired.
585 */
586 LIST_REMOVE(clp, lc_hash);
587 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
588 new_clp->lc_expiry = nfsrv_leaseexpiry();
589 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
590 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
591 clp->lc_clientid.lval[0];
592 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
593 clp->lc_clientid.lval[1];
594 new_clp->lc_delegtime = clp->lc_delegtime;
595 new_clp->lc_stateindex = clp->lc_stateindex;
596 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
597 new_clp->lc_cbref = 0;
598 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
599 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
600 tstp->ls_clp = new_clp;
601 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
602 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
603 tstp->ls_clp = new_clp;
604 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
605 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
606 tstp->ls_clp = new_clp;
607 for (i = 0; i < nfsrv_statehashsize; i++) {
608 LIST_NEWHEAD(&new_clp->lc_stateid[i],
609 &clp->lc_stateid[i], ls_hash);
610 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
611 tstp->ls_clp = new_clp;
612 }
613 LIST_INIT(&new_clp->lc_session);
614 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
615 lc_hash);
616 NFSD_VNET(nfsstatsv1_p)->srvclients++;
617 nfsrv_openpluslock++;
618 nfsrv_clients++;
619 }
620 if (!mlocked)
621 nfsrv_clientunlock(mlocked);
622
623 if ((nd->nd_flag & ND_NFSV41) == 0) {
624 /*
625 * Must wait until any outstanding callback on the old clp
626 * completes.
627 */
628 if (!mlocked)
629 NFSLOCKSTATE();
630 while (clp->lc_cbref) {
631 clp->lc_flags |= LCL_WAKEUPWANTED;
632 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
633 "nfsdclp", 10 * hz);
634 }
635 NFSUNLOCKSTATE();
636 if (old_xprt != NULL)
637 SVC_RELEASE(old_xprt);
638 nfsrv_zapclient(clp, p);
639 *new_clpp = NULL;
640 } else {
641 if (mlocked)
642 NFSUNLOCKSTATE();
643 if (old_xprt != NULL)
644 SVC_RELEASE(old_xprt);
645 }
646
647 out:
648 NFSEXITCODE2(error, nd);
649 return (error);
650 }
651
652 /*
653 * Check to see if the client id exists and optionally confirm it.
654 */
655 int
nfsrv_getclient(nfsquad_t clientid,int opflags,struct nfsclient ** clpp,struct nfsdsession * nsep,nfsquad_t confirm,uint32_t cbprogram,struct nfsrv_descript * nd,NFSPROC_T * p)656 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
657 struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
658 struct nfsrv_descript *nd, NFSPROC_T *p)
659 {
660 struct nfsclient *clp;
661 struct nfsstate *stp;
662 int i;
663 struct nfsclienthashhead *hp;
664 int error = 0, doneok, igotlock;
665 struct nfssessionhash *shp;
666 struct nfsdsession *sep;
667 uint64_t sessid[2];
668 CLIENT *client;
669 SVCXPRT *old_xprt;
670 bool mlocked, sess_replay;
671 static uint64_t next_sess = 0;
672
673 if (clpp)
674 *clpp = NULL;
675 if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
676 opflags != CLOPS_RENEW) && NFSD_VNET(nfsrvboottime) !=
677 clientid.lval[0]) {
678 error = NFSERR_STALECLIENTID;
679 goto out;
680 }
681
682 /*
683 * If called with opflags == CLOPS_RENEW, the State Lock is
684 * already held. Otherwise, we need to get either that or,
685 * for the case of Confirm, lock out the nfsd threads.
686 */
687 client = NULL;
688 old_xprt = NULL;
689 mlocked = true;
690 if (nfsrv_dolocallocks != 0)
691 mlocked = false;
692 if (opflags & CLOPS_CONFIRM) {
693 if (nsep != NULL &&
694 (nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
695 client = (struct __rpc_client *)
696 clnt_bck_create(nd->nd_xprt->xp_socket,
697 cbprogram, NFSV4_CBVERS);
698 if (mlocked) {
699 nfsrv_clientlock(mlocked);
700 } else {
701 NFSLOCKV4ROOTMUTEX();
702 nfsv4_relref(&nfsv4rootfs_lock);
703 do {
704 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1,
705 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
706 } while (!igotlock);
707 }
708 /*
709 * Create a new sessionid here, since we need to do it where
710 * there is a mutex held to serialize update of next_sess.
711 */
712 if ((nd->nd_flag & ND_NFSV41) != 0) {
713 sessid[0] = ++next_sess;
714 sessid[1] = clientid.qval;
715 }
716 if (!mlocked)
717 NFSUNLOCKV4ROOTMUTEX();
718 } else if (opflags != CLOPS_RENEW) {
719 NFSLOCKSTATE();
720 }
721
722 /* For NFSv4.1, the clp is acquired from the associated session. */
723 if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
724 opflags == CLOPS_RENEW) {
725 clp = NULL;
726 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
727 shp = NFSSESSIONHASH(nd->nd_sessionid);
728 NFSLOCKSESSION(shp);
729 sep = nfsrv_findsession(nd->nd_sessionid);
730 if (sep != NULL)
731 clp = sep->sess_clp;
732 NFSUNLOCKSESSION(shp);
733 }
734 } else {
735 hp = NFSCLIENTHASH(clientid);
736 LIST_FOREACH(clp, hp, lc_hash) {
737 if (clp->lc_clientid.lval[1] == clientid.lval[1])
738 break;
739 }
740 }
741 if (clp == NULL) {
742 if (opflags & CLOPS_CONFIRM)
743 error = NFSERR_STALECLIENTID;
744 else
745 error = NFSERR_EXPIRED;
746 } else if (clp->lc_flags & LCL_ADMINREVOKED) {
747 /*
748 * If marked admin revoked, just return the error.
749 */
750 error = NFSERR_ADMINREVOKED;
751 }
752 if (error) {
753 if (opflags & CLOPS_CONFIRM) {
754 nfsrv_clientunlock(mlocked);
755 if (client != NULL)
756 CLNT_RELEASE(client);
757 } else if (opflags != CLOPS_RENEW) {
758 NFSUNLOCKSTATE();
759 }
760 goto out;
761 }
762
763 /*
764 * Perform any operations specified by the opflags.
765 */
766 if (opflags & CLOPS_CONFIRM) {
767 sess_replay = false;
768 if ((nd->nd_flag & ND_NFSV41) != 0) {
769 /*
770 * For the case where lc_confirm.lval[0] == confirm.lval[0],
771 * use the new session, but with the previous sessionid.
772 * This is not exactly what the RFC describes, but should
773 * result in the same reply as the previous CreateSession.
774 */
775 if (clp->lc_confirm.lval[0] + 1 == confirm.lval[0]) {
776 clp->lc_confirm.lval[0] = confirm.lval[0];
777 clp->lc_prevsess = sessid[0];
778 } else if (clp->lc_confirm.lval[0] == confirm.lval[0]) {
779 if (clp->lc_prevsess == 0)
780 error = NFSERR_SEQMISORDERED;
781 else
782 sessid[0] = clp->lc_prevsess;
783 sess_replay = true;
784 } else
785 error = NFSERR_SEQMISORDERED;
786 } else if ((nd->nd_flag & ND_NFSV41) == 0 &&
787 clp->lc_confirm.qval != confirm.qval)
788 error = NFSERR_STALECLIENTID;
789 if (error == 0 && nfsrv_notsamecredname(NFSV4OP_CREATESESSION,
790 nd, clp))
791 error = NFSERR_CLIDINUSE;
792
793 if (!error) {
794 if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
795 LCL_NEEDSCONFIRM) {
796 /*
797 * Hang onto the delegations (as old delegations)
798 * for an Open with CLAIM_DELEGATE_PREV unless in
799 * grace, but get rid of the rest of the state.
800 */
801 if (mlocked)
802 nfsrv_cleanclient(clp, p, true, &old_xprt);
803 else
804 nfsrv_cleanclient(clp, p, false, NULL);
805 nfsrv_freedeleglist(&clp->lc_olddeleg);
806 if (nfsrv_checkgrace(nd, clp, 0)) {
807 /* In grace, so just delete delegations */
808 nfsrv_freedeleglist(&clp->lc_deleg);
809 } else {
810 LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
811 stp->ls_flags |= NFSLCK_OLDDELEG;
812 clp->lc_delegtime = NFSD_MONOSEC +
813 nfsrv_lease + NFSRV_LEASEDELTA;
814 LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
815 ls_list);
816 }
817 if ((nd->nd_flag & ND_NFSV41) != 0)
818 clp->lc_program = cbprogram;
819 }
820 clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
821 if (clp->lc_program)
822 clp->lc_flags |= LCL_NEEDSCBNULL;
823 /* For NFSv4.1, link the session onto the client. */
824 if (nsep != NULL) {
825 /* Hold a reference on the xprt for a backchannel. */
826 if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
827 != 0 && !sess_replay) {
828 if (clp->lc_req.nr_client == NULL) {
829 clp->lc_req.nr_client = client;
830 client = NULL;
831 }
832 if (clp->lc_req.nr_client != NULL) {
833 SVC_ACQUIRE(nd->nd_xprt);
834 CLNT_ACQUIRE(clp->lc_req.nr_client);
835 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
836 /* Disable idle timeout. */
837 nd->nd_xprt->xp_idletimeout = 0;
838 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
839 } else
840 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
841 }
842 NFSBCOPY(sessid, nsep->sess_sessionid,
843 NFSX_V4SESSIONID);
844 NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
845 NFSX_V4SESSIONID);
846 if (!sess_replay) {
847 shp = NFSSESSIONHASH(nsep->sess_sessionid);
848 if (!mlocked)
849 NFSLOCKSTATE();
850 NFSLOCKSESSION(shp);
851 LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
852 LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
853 nsep->sess_clp = clp;
854 NFSUNLOCKSESSION(shp);
855 if (!mlocked)
856 NFSUNLOCKSTATE();
857 }
858 }
859 }
860 } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
861 error = NFSERR_EXPIRED;
862 }
863
864 /*
865 * If called by the Renew Op, we must check the principal.
866 */
867 if (!error && (opflags & CLOPS_RENEWOP)) {
868 if (nfsrv_notsamecredname(0, nd, clp)) {
869 doneok = 0;
870 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
871 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
872 if ((stp->ls_flags & NFSLCK_OPEN) &&
873 stp->ls_uid == nd->nd_cred->cr_uid) {
874 doneok = 1;
875 break;
876 }
877 }
878 }
879 if (!doneok)
880 error = NFSERR_ACCES;
881 }
882 if (!error && (clp->lc_flags & LCL_CBDOWN))
883 error = NFSERR_CBPATHDOWN;
884 }
885 if ((!error || error == NFSERR_CBPATHDOWN) &&
886 (opflags & CLOPS_RENEW)) {
887 clp->lc_expiry = nfsrv_leaseexpiry();
888 }
889 if (opflags & CLOPS_CONFIRM) {
890 nfsrv_clientunlock(mlocked);
891 if (client != NULL)
892 CLNT_RELEASE(client);
893 if (old_xprt != NULL)
894 SVC_RELEASE(old_xprt);
895 } else if (opflags != CLOPS_RENEW) {
896 NFSUNLOCKSTATE();
897 }
898 if (clpp)
899 *clpp = clp;
900
901 out:
902 NFSEXITCODE2(error, nd);
903 return (error);
904 }
905
906 /*
907 * Perform the NFSv4.1 destroy clientid.
908 */
909 int
nfsrv_destroyclient(struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)910 nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
911 {
912 struct nfsclient *clp;
913 struct nfsclienthashhead *hp;
914 SVCXPRT *old_xprt;
915 int error = 0, i;
916 bool mlocked;
917
918 if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
919 error = NFSERR_STALECLIENTID;
920 goto out;
921 }
922
923 mlocked = true;
924 if (nfsrv_dolocallocks != 0)
925 mlocked = false;
926 /* Lock out other nfsd threads */
927 nfsrv_clientlock(mlocked);
928
929 hp = NFSCLIENTHASH(clientid);
930 LIST_FOREACH(clp, hp, lc_hash) {
931 if (clp->lc_clientid.lval[1] == clientid.lval[1])
932 break;
933 }
934 if (clp == NULL) {
935 nfsrv_clientunlock(mlocked);
936 /* Just return ok, since it is gone. */
937 goto out;
938 }
939
940 /* Check for the SP4_MACH_CRED case. */
941 error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
942 if (error != 0) {
943 nfsrv_clientunlock(mlocked);
944 goto out;
945 }
946
947 /*
948 * Free up all layouts on the clientid. Should the client return the
949 * layouts?
950 */
951 nfsrv_freelayoutlist(clientid);
952
953 /* Scan for state on the clientid. */
954 for (i = 0; i < nfsrv_statehashsize; i++)
955 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
956 nfsrv_clientunlock(mlocked);
957 error = NFSERR_CLIENTIDBUSY;
958 goto out;
959 }
960 if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
961 nfsrv_clientunlock(mlocked);
962 error = NFSERR_CLIENTIDBUSY;
963 goto out;
964 }
965
966 /* Destroy the clientid and return ok. */
967 old_xprt = NULL;
968 if (mlocked)
969 nfsrv_cleanclient(clp, p, true, &old_xprt);
970 else
971 nfsrv_cleanclient(clp, p, false, NULL);
972 nfsrv_freedeleglist(&clp->lc_deleg);
973 nfsrv_freedeleglist(&clp->lc_olddeleg);
974 LIST_REMOVE(clp, lc_hash);
975 nfsrv_clientunlock(mlocked);
976 if (old_xprt != NULL)
977 SVC_RELEASE(old_xprt);
978 nfsrv_zapclient(clp, p);
979 out:
980 NFSEXITCODE2(error, nd);
981 return (error);
982 }
983
984 /*
985 * Called from the new nfssvc syscall to admin revoke a clientid.
986 * Returns 0 for success, error otherwise.
987 */
988 int
nfsrv_adminrevoke(struct nfsd_clid * revokep,NFSPROC_T * p)989 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
990 {
991 struct nfsclient *clp = NULL;
992 int i, error = 0;
993 int gotit, igotlock;
994
995 /*
996 * First, lock out the nfsd so that state won't change while the
997 * revocation record is being written to the stable storage restart
998 * file.
999 */
1000 NFSLOCKV4ROOTMUTEX();
1001 do {
1002 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
1003 NFSV4ROOTLOCKMUTEXPTR, NULL);
1004 } while (!igotlock);
1005 NFSUNLOCKV4ROOTMUTEX();
1006
1007 /*
1008 * Search for a match in the client list.
1009 */
1010 gotit = i = 0;
1011 while (i < nfsrv_clienthashsize && !gotit) {
1012 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
1013 if (revokep->nclid_idlen == clp->lc_idlen &&
1014 !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
1015 gotit = 1;
1016 break;
1017 }
1018 }
1019 i++;
1020 }
1021 if (!gotit) {
1022 NFSLOCKV4ROOTMUTEX();
1023 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1024 NFSUNLOCKV4ROOTMUTEX();
1025 error = EPERM;
1026 goto out;
1027 }
1028
1029 /*
1030 * Now, write out the revocation record
1031 */
1032 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
1033 nfsrv_backupstable();
1034
1035 /*
1036 * and clear out the state, marking the clientid revoked.
1037 */
1038 clp->lc_flags &= ~LCL_CALLBACKSON;
1039 clp->lc_flags |= LCL_ADMINREVOKED;
1040 nfsrv_cleanclient(clp, p, false, NULL);
1041 nfsrv_freedeleglist(&clp->lc_deleg);
1042 nfsrv_freedeleglist(&clp->lc_olddeleg);
1043 NFSLOCKV4ROOTMUTEX();
1044 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1045 NFSUNLOCKV4ROOTMUTEX();
1046
1047 out:
1048 NFSEXITCODE(error);
1049 return (error);
1050 }
1051
1052 /*
1053 * Dump out stats for all clients. Called from nfssvc(2), that is used
1054 * nfsstatsv1.
1055 */
1056 void
nfsrv_dumpclients(struct nfsd_dumpclients * dumpp,int maxcnt)1057 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
1058 {
1059 struct nfsclient *clp;
1060 int i = 0, cnt = 0;
1061
1062 /*
1063 * First, get a reference on the nfsv4rootfs_lock so that an
1064 * exclusive lock cannot be acquired while dumping the clients.
1065 */
1066 NFSLOCKV4ROOTMUTEX();
1067 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1068 NFSUNLOCKV4ROOTMUTEX();
1069 NFSLOCKSTATE();
1070 /*
1071 * Rattle through the client lists until done.
1072 */
1073 while (i < nfsrv_clienthashsize && cnt < maxcnt) {
1074 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1075 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i]) && cnt <
1076 maxcnt) {
1077 nfsrv_dumpaclient(clp, &dumpp[cnt]);
1078 cnt++;
1079 clp = LIST_NEXT(clp, lc_hash);
1080 }
1081 i++;
1082 }
1083 if (cnt < maxcnt)
1084 dumpp[cnt].ndcl_clid.nclid_idlen = 0;
1085 NFSUNLOCKSTATE();
1086 NFSLOCKV4ROOTMUTEX();
1087 nfsv4_relref(&nfsv4rootfs_lock);
1088 NFSUNLOCKV4ROOTMUTEX();
1089 }
1090
1091 /*
1092 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
1093 */
1094 static void
nfsrv_dumpaclient(struct nfsclient * clp,struct nfsd_dumpclients * dumpp)1095 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
1096 {
1097 struct nfsstate *stp, *openstp, *lckownstp;
1098 struct nfslock *lop;
1099 sa_family_t af;
1100 #ifdef INET
1101 struct sockaddr_in *rin;
1102 #endif
1103 #ifdef INET6
1104 struct sockaddr_in6 *rin6;
1105 #endif
1106
1107 dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
1108 dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
1109 dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
1110 dumpp->ndcl_flags = clp->lc_flags;
1111 dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
1112 NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
1113 af = clp->lc_req.nr_nam->sa_family;
1114 dumpp->ndcl_addrfam = af;
1115 switch (af) {
1116 #ifdef INET
1117 case AF_INET:
1118 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
1119 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
1120 break;
1121 #endif
1122 #ifdef INET6
1123 case AF_INET6:
1124 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
1125 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
1126 break;
1127 #endif
1128 }
1129
1130 /*
1131 * Now, scan the state lists and total up the opens and locks.
1132 */
1133 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
1134 dumpp->ndcl_nopenowners++;
1135 LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
1136 dumpp->ndcl_nopens++;
1137 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
1138 dumpp->ndcl_nlockowners++;
1139 LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
1140 dumpp->ndcl_nlocks++;
1141 }
1142 }
1143 }
1144 }
1145
1146 /*
1147 * and the delegation lists.
1148 */
1149 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
1150 dumpp->ndcl_ndelegs++;
1151 }
1152 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
1153 dumpp->ndcl_nolddelegs++;
1154 }
1155 }
1156
1157 /*
1158 * Dump out lock stats for a file.
1159 */
1160 void
nfsrv_dumplocks(vnode_t vp,struct nfsd_dumplocks * ldumpp,int maxcnt,NFSPROC_T * p)1161 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
1162 NFSPROC_T *p)
1163 {
1164 struct nfsstate *stp;
1165 struct nfslock *lop;
1166 int cnt = 0;
1167 struct nfslockfile *lfp;
1168 sa_family_t af;
1169 #ifdef INET
1170 struct sockaddr_in *rin;
1171 #endif
1172 #ifdef INET6
1173 struct sockaddr_in6 *rin6;
1174 #endif
1175 int ret;
1176 fhandle_t nfh;
1177
1178 ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
1179 /*
1180 * First, get a reference on the nfsv4rootfs_lock so that an
1181 * exclusive lock on it cannot be acquired while dumping the locks.
1182 */
1183 NFSLOCKV4ROOTMUTEX();
1184 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1185 NFSUNLOCKV4ROOTMUTEX();
1186 NFSLOCKSTATE();
1187 if (!ret)
1188 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
1189 if (ret) {
1190 ldumpp[0].ndlck_clid.nclid_idlen = 0;
1191 NFSUNLOCKSTATE();
1192 NFSLOCKV4ROOTMUTEX();
1193 nfsv4_relref(&nfsv4rootfs_lock);
1194 NFSUNLOCKV4ROOTMUTEX();
1195 return;
1196 }
1197
1198 /*
1199 * For each open share on file, dump it out.
1200 */
1201 stp = LIST_FIRST(&lfp->lf_open);
1202 while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1203 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1204 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1205 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1206 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1207 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1208 ldumpp[cnt].ndlck_owner.nclid_idlen =
1209 stp->ls_openowner->ls_ownerlen;
1210 NFSBCOPY(stp->ls_openowner->ls_owner,
1211 ldumpp[cnt].ndlck_owner.nclid_id,
1212 stp->ls_openowner->ls_ownerlen);
1213 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1214 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1215 stp->ls_clp->lc_idlen);
1216 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1217 ldumpp[cnt].ndlck_addrfam = af;
1218 switch (af) {
1219 #ifdef INET
1220 case AF_INET:
1221 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1222 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1223 break;
1224 #endif
1225 #ifdef INET6
1226 case AF_INET6:
1227 rin6 = (struct sockaddr_in6 *)
1228 stp->ls_clp->lc_req.nr_nam;
1229 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1230 break;
1231 #endif
1232 }
1233 stp = LIST_NEXT(stp, ls_file);
1234 cnt++;
1235 }
1236
1237 /*
1238 * and all locks.
1239 */
1240 lop = LIST_FIRST(&lfp->lf_lock);
1241 while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1242 stp = lop->lo_stp;
1243 ldumpp[cnt].ndlck_flags = lop->lo_flags;
1244 ldumpp[cnt].ndlck_first = lop->lo_first;
1245 ldumpp[cnt].ndlck_end = lop->lo_end;
1246 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1247 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1248 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1249 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1250 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1251 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1252 stp->ls_ownerlen);
1253 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1254 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1255 stp->ls_clp->lc_idlen);
1256 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1257 ldumpp[cnt].ndlck_addrfam = af;
1258 switch (af) {
1259 #ifdef INET
1260 case AF_INET:
1261 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1262 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1263 break;
1264 #endif
1265 #ifdef INET6
1266 case AF_INET6:
1267 rin6 = (struct sockaddr_in6 *)
1268 stp->ls_clp->lc_req.nr_nam;
1269 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1270 break;
1271 #endif
1272 }
1273 lop = LIST_NEXT(lop, lo_lckfile);
1274 cnt++;
1275 }
1276
1277 /*
1278 * and the delegations.
1279 */
1280 stp = LIST_FIRST(&lfp->lf_deleg);
1281 while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1282 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1283 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1284 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1285 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1286 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1287 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1288 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1289 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1290 stp->ls_clp->lc_idlen);
1291 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1292 ldumpp[cnt].ndlck_addrfam = af;
1293 switch (af) {
1294 #ifdef INET
1295 case AF_INET:
1296 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1297 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1298 break;
1299 #endif
1300 #ifdef INET6
1301 case AF_INET6:
1302 rin6 = (struct sockaddr_in6 *)
1303 stp->ls_clp->lc_req.nr_nam;
1304 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1305 break;
1306 #endif
1307 }
1308 stp = LIST_NEXT(stp, ls_file);
1309 cnt++;
1310 }
1311
1312 /*
1313 * If list isn't full, mark end of list by setting the client name
1314 * to zero length.
1315 */
1316 if (cnt < maxcnt)
1317 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1318 NFSUNLOCKSTATE();
1319 NFSLOCKV4ROOTMUTEX();
1320 nfsv4_relref(&nfsv4rootfs_lock);
1321 NFSUNLOCKV4ROOTMUTEX();
1322 }
1323
1324 /*
1325 * Server timer routine. It can scan any linked list, so long
1326 * as it holds the spin/mutex lock and there is no exclusive lock on
1327 * nfsv4rootfs_lock.
1328 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1329 * to do this from a callout, since the spin locks work. For
1330 * Darwin, I'm not sure what will work correctly yet.)
1331 * Should be called once per second.
1332 */
1333 void
nfsrv_servertimer(void * arg __unused)1334 nfsrv_servertimer(void *arg __unused)
1335 {
1336 struct nfsclient *clp, *nclp;
1337 struct nfsstate *stp, *nstp;
1338 int got_ref, i;
1339
1340 /*
1341 * Make sure nfsboottime is set. This is used by V3 as well
1342 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1343 * only used by the V4 server for leases.
1344 */
1345 if (nfsboottime.tv_sec == 0)
1346 NFSSETBOOTTIME(nfsboottime);
1347
1348 /*
1349 * If server hasn't started yet, just return.
1350 */
1351 NFSLOCKSTATE();
1352 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce == 0) {
1353 NFSUNLOCKSTATE();
1354 return;
1355 }
1356 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) {
1357 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags &
1358 NFSNSF_GRACEOVER) &&
1359 NFSD_MONOSEC > NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
1360 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1361 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1362 NFSUNLOCKSTATE();
1363 return;
1364 }
1365
1366 /*
1367 * Try and get a reference count on the nfsv4rootfs_lock so that
1368 * no nfsd thread can acquire an exclusive lock on it before this
1369 * call is done. If it is already exclusively locked, just return.
1370 */
1371 NFSLOCKV4ROOTMUTEX();
1372 got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1373 NFSUNLOCKV4ROOTMUTEX();
1374 if (got_ref == 0) {
1375 NFSUNLOCKSTATE();
1376 return;
1377 }
1378
1379 /*
1380 * For each client...
1381 */
1382 for (i = 0; i < nfsrv_clienthashsize; i++) {
1383 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1384 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i])) {
1385 nclp = LIST_NEXT(clp, lc_hash);
1386 if (!(clp->lc_flags & LCL_EXPIREIT)) {
1387 if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1388 && ((LIST_EMPTY(&clp->lc_deleg)
1389 && LIST_EMPTY(&clp->lc_open)) ||
1390 nfsrv_clients > nfsrv_clienthighwater)) ||
1391 (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1392 (clp->lc_expiry < NFSD_MONOSEC &&
1393 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1394 /*
1395 * Lease has expired several nfsrv_lease times ago:
1396 * PLUS
1397 * - no state is associated with it
1398 * OR
1399 * - above high water mark for number of clients
1400 * (nfsrv_clienthighwater should be large enough
1401 * that this only occurs when clients fail to
1402 * use the same nfs_client_id4.id. Maybe somewhat
1403 * higher that the maximum number of clients that
1404 * will mount this server?)
1405 * OR
1406 * Lease has expired a very long time ago
1407 * OR
1408 * Lease has expired PLUS the number of opens + locks
1409 * has exceeded 90% of capacity
1410 *
1411 * --> Mark for expiry. The actual expiry will be done
1412 * by an nfsd sometime soon.
1413 */
1414 clp->lc_flags |= LCL_EXPIREIT;
1415 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1416 (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1417 } else {
1418 /*
1419 * If there are no opens, increment no open tick cnt
1420 * If time exceeds NFSNOOPEN, mark it to be thrown away
1421 * otherwise, if there is an open, reset no open time
1422 * Hopefully, this will avoid excessive re-creation
1423 * of open owners and subsequent open confirms.
1424 */
1425 stp = LIST_FIRST(&clp->lc_open);
1426 while (stp != LIST_END(&clp->lc_open)) {
1427 nstp = LIST_NEXT(stp, ls_list);
1428 if (LIST_EMPTY(&stp->ls_open)) {
1429 stp->ls_noopens++;
1430 if (stp->ls_noopens > NFSNOOPEN ||
1431 (nfsrv_openpluslock * 2) >
1432 nfsrv_v4statelimit)
1433 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1434 NFSNSF_NOOPENS;
1435 } else {
1436 stp->ls_noopens = 0;
1437 }
1438 stp = nstp;
1439 }
1440 }
1441 }
1442 clp = nclp;
1443 }
1444 }
1445 NFSUNLOCKSTATE();
1446 NFSLOCKV4ROOTMUTEX();
1447 nfsv4_relref(&nfsv4rootfs_lock);
1448 NFSUNLOCKV4ROOTMUTEX();
1449 }
1450
1451 /*
1452 * The following set of functions free up the various data structures.
1453 */
1454 /*
1455 * Clear out all open/lock state related to this nfsclient.
1456 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1457 * there are no other active nfsd threads.
1458 */
1459 void
nfsrv_cleanclient(struct nfsclient * clp,NFSPROC_T * p,bool locked,SVCXPRT ** old_xprtp)1460 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p, bool locked,
1461 SVCXPRT **old_xprtp)
1462 {
1463 struct nfsstate *stp, *nstp;
1464 struct nfsdsession *sep, *nsep;
1465
1466 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
1467 if (locked)
1468 nfsrv_freeopenowner(stp, 0, p);
1469 else
1470 nfsrv_freeopenowner(stp, 1, p);
1471 }
1472 if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1473 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1474 (void)nfsrv_freesession(NULL, sep, NULL, locked,
1475 old_xprtp);
1476 }
1477
1478 /*
1479 * Free a client that has been cleaned. It should also already have been
1480 * removed from the lists.
1481 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1482 * softclock interrupts are enabled.)
1483 */
1484 void
nfsrv_zapclient(struct nfsclient * clp,NFSPROC_T * p)1485 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1486 {
1487
1488 #ifdef notyet
1489 if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1490 (LCL_GSS | LCL_CALLBACKSON) &&
1491 (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1492 clp->lc_handlelen > 0) {
1493 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1494 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1495 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1496 NULL, 0, NULL, NULL, NULL, 0, p);
1497 }
1498 #endif
1499 newnfs_disconnect(NULL, &clp->lc_req);
1500 free(clp->lc_req.nr_nam, M_SONAME);
1501 NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1502 free(clp->lc_stateid, M_NFSDCLIENT);
1503 free(clp, M_NFSDCLIENT);
1504 NFSLOCKSTATE();
1505 NFSD_VNET(nfsstatsv1_p)->srvclients--;
1506 nfsrv_openpluslock--;
1507 nfsrv_clients--;
1508 NFSUNLOCKSTATE();
1509 }
1510
1511 /*
1512 * Free a list of delegation state structures.
1513 * (This function will also free all nfslockfile structures that no
1514 * longer have associated state.)
1515 */
1516 void
nfsrv_freedeleglist(struct nfsstatehead * sthp)1517 nfsrv_freedeleglist(struct nfsstatehead *sthp)
1518 {
1519 struct nfsstate *stp, *nstp;
1520
1521 LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1522 nfsrv_freedeleg(stp);
1523 }
1524 LIST_INIT(sthp);
1525 }
1526
1527 /*
1528 * Free up a delegation.
1529 */
1530 static void
nfsrv_freedeleg(struct nfsstate * stp)1531 nfsrv_freedeleg(struct nfsstate *stp)
1532 {
1533 struct nfslockfile *lfp;
1534
1535 LIST_REMOVE(stp, ls_hash);
1536 LIST_REMOVE(stp, ls_list);
1537 LIST_REMOVE(stp, ls_file);
1538 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1539 nfsrv_writedelegcnt--;
1540 lfp = stp->ls_lfp;
1541 if (LIST_EMPTY(&lfp->lf_open) &&
1542 LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1543 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1544 lfp->lf_usecount == 0 &&
1545 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1546 nfsrv_freenfslockfile(lfp);
1547 free(stp, M_NFSDSTATE);
1548 NFSD_VNET(nfsstatsv1_p)->srvdelegates--;
1549 nfsrv_openpluslock--;
1550 nfsrv_delegatecnt--;
1551 }
1552
1553 /*
1554 * This function frees an open owner and all associated opens.
1555 */
1556 static void
nfsrv_freeopenowner(struct nfsstate * stp,int cansleep,NFSPROC_T * p)1557 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1558 {
1559 struct nfsstate *nstp, *tstp;
1560
1561 LIST_REMOVE(stp, ls_list);
1562 /*
1563 * Now, free all associated opens.
1564 */
1565 nstp = LIST_FIRST(&stp->ls_open);
1566 while (nstp != LIST_END(&stp->ls_open)) {
1567 tstp = nstp;
1568 nstp = LIST_NEXT(nstp, ls_list);
1569 nfsrv_freeopen(tstp, NULL, cansleep, p);
1570 }
1571 if (stp->ls_op)
1572 nfsrvd_derefcache(stp->ls_op);
1573 free(stp, M_NFSDSTATE);
1574 NFSD_VNET(nfsstatsv1_p)->srvopenowners--;
1575 nfsrv_openpluslock--;
1576 }
1577
1578 /*
1579 * This function frees an open (nfsstate open structure) with all associated
1580 * lock_owners and locks. It also frees the nfslockfile structure iff there
1581 * are no other opens on the file.
1582 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1583 */
1584 static void
nfsrv_freeopen(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1585 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1586 {
1587 struct nfsstate *nstp, *tstp;
1588 struct nfslockfile *lfp;
1589
1590 LIST_REMOVE(stp, ls_hash);
1591 LIST_REMOVE(stp, ls_list);
1592 LIST_REMOVE(stp, ls_file);
1593
1594 lfp = stp->ls_lfp;
1595 /*
1596 * Now, free all lockowners associated with this open.
1597 * Note that, if vp != NULL, nfsrv_freelockowner() will
1598 * not call nfsrv_freeallnfslocks(), so it needs to be called, below.
1599 */
1600 LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1601 nfsrv_freelockowner(tstp, vp, cansleep, p);
1602
1603 if (vp != NULL) {
1604 KASSERT(cansleep != 0, ("nfsrv_freeopen: cansleep == 0"));
1605 mtx_assert(NFSSTATEMUTEXPTR, MA_OWNED);
1606 /*
1607 * Only called with vp != NULL for Close when
1608 * vfs.nfsd.enable_locallocks != 0.
1609 * Lock the lfp so that it will not go away and do the
1610 * nfsrv_freeallnfslocks() call that was not done by
1611 * nfsrv_freelockowner().
1612 */
1613 nfsrv_locklf(lfp);
1614 NFSUNLOCKSTATE();
1615 NFSVOPUNLOCK(vp);
1616 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1617 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1618 NFSLOCKSTATE();
1619 nfsrv_unlocklf(lfp);
1620 }
1621
1622 /*
1623 * The nfslockfile is freed here if there are no locks
1624 * associated with the open.
1625 * If there are locks associated with the open, the
1626 * nfslockfile structure can be freed via nfsrv_freelockowner().
1627 */
1628 if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1629 LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1630 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1631 lfp->lf_usecount == 0 &&
1632 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1633 nfsrv_freenfslockfile(lfp);
1634 free(stp, M_NFSDSTATE);
1635 NFSD_VNET(nfsstatsv1_p)->srvopens--;
1636 nfsrv_openpluslock--;
1637 }
1638
1639 /*
1640 * Frees a lockowner and all associated locks.
1641 */
1642 static void
nfsrv_freelockowner(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1643 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1644 NFSPROC_T *p)
1645 {
1646
1647 LIST_REMOVE(stp, ls_hash);
1648 LIST_REMOVE(stp, ls_list);
1649 if (vp == NULL)
1650 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1651 if (stp->ls_op)
1652 nfsrvd_derefcache(stp->ls_op);
1653 free(stp, M_NFSDSTATE);
1654 NFSD_VNET(nfsstatsv1_p)->srvlockowners--;
1655 nfsrv_openpluslock--;
1656 }
1657
1658 /*
1659 * Free all the nfs locks on a lockowner.
1660 */
1661 static void
nfsrv_freeallnfslocks(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1662 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1663 NFSPROC_T *p)
1664 {
1665 struct nfslock *lop, *nlop;
1666 struct nfsrollback *rlp, *nrlp;
1667 struct nfslockfile *lfp = NULL;
1668 int gottvp = 0;
1669 vnode_t tvp = NULL;
1670 uint64_t first, end;
1671
1672 if (vp != NULL)
1673 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1674 lop = LIST_FIRST(&stp->ls_lock);
1675 while (lop != LIST_END(&stp->ls_lock)) {
1676 nlop = LIST_NEXT(lop, lo_lckowner);
1677 /*
1678 * Since all locks should be for the same file, lfp should
1679 * not change.
1680 */
1681 if (lfp == NULL)
1682 lfp = lop->lo_lfp;
1683 else if (lfp != lop->lo_lfp)
1684 panic("allnfslocks");
1685 /*
1686 * If vp is NULL and cansleep != 0, a vnode must be acquired
1687 * from the file handle. This only occurs when called from
1688 * nfsrv_cleanclient().
1689 */
1690 if (gottvp == 0) {
1691 if (nfsrv_dolocallocks == 0)
1692 tvp = NULL;
1693 else if (vp == NULL && cansleep != 0) {
1694 tvp = nfsvno_getvp(&lfp->lf_fh);
1695 if (tvp != NULL)
1696 NFSVOPUNLOCK(tvp);
1697 } else
1698 tvp = vp;
1699 gottvp = 1;
1700 }
1701
1702 if (tvp != NULL) {
1703 if (cansleep == 0)
1704 panic("allnfs2");
1705 first = lop->lo_first;
1706 end = lop->lo_end;
1707 nfsrv_freenfslock(lop);
1708 nfsrv_localunlock(tvp, lfp, first, end, p);
1709 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1710 nrlp)
1711 free(rlp, M_NFSDROLLBACK);
1712 LIST_INIT(&lfp->lf_rollback);
1713 } else
1714 nfsrv_freenfslock(lop);
1715 lop = nlop;
1716 }
1717 if (vp == NULL && tvp != NULL)
1718 vrele(tvp);
1719 }
1720
1721 /*
1722 * Free an nfslock structure.
1723 */
1724 static void
nfsrv_freenfslock(struct nfslock * lop)1725 nfsrv_freenfslock(struct nfslock *lop)
1726 {
1727
1728 if (lop->lo_lckfile.le_prev != NULL) {
1729 LIST_REMOVE(lop, lo_lckfile);
1730 NFSD_VNET(nfsstatsv1_p)->srvlocks--;
1731 nfsrv_openpluslock--;
1732 }
1733 LIST_REMOVE(lop, lo_lckowner);
1734 free(lop, M_NFSDLOCK);
1735 }
1736
1737 /*
1738 * This function frees an nfslockfile structure.
1739 */
1740 static void
nfsrv_freenfslockfile(struct nfslockfile * lfp)1741 nfsrv_freenfslockfile(struct nfslockfile *lfp)
1742 {
1743
1744 LIST_REMOVE(lfp, lf_hash);
1745 free(lfp, M_NFSDLOCKFILE);
1746 }
1747
1748 /*
1749 * This function looks up an nfsstate structure via stateid.
1750 */
1751 static int
nfsrv_getstate(struct nfsclient * clp,nfsv4stateid_t * stateidp,__unused u_int32_t flags,struct nfsstate ** stpp)1752 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1753 struct nfsstate **stpp)
1754 {
1755 struct nfsstate *stp;
1756 struct nfsstatehead *hp;
1757 int error = 0;
1758
1759 *stpp = NULL;
1760 hp = NFSSTATEHASH(clp, *stateidp);
1761 LIST_FOREACH(stp, hp, ls_hash) {
1762 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1763 NFSX_STATEIDOTHER))
1764 break;
1765 }
1766
1767 /*
1768 * If no state id in list, return NFSERR_BADSTATEID.
1769 */
1770 if (stp == LIST_END(hp)) {
1771 error = NFSERR_BADSTATEID;
1772 goto out;
1773 }
1774 *stpp = stp;
1775
1776 out:
1777 NFSEXITCODE(error);
1778 return (error);
1779 }
1780
1781 /*
1782 * This function gets an nfsstate structure via owner string.
1783 */
1784 static void
nfsrv_getowner(struct nfsstatehead * hp,struct nfsstate * new_stp,struct nfsstate ** stpp)1785 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1786 struct nfsstate **stpp)
1787 {
1788 struct nfsstate *stp;
1789
1790 *stpp = NULL;
1791 LIST_FOREACH(stp, hp, ls_list) {
1792 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1793 !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1794 *stpp = stp;
1795 return;
1796 }
1797 }
1798 }
1799
1800 /*
1801 * Lock control function called to update lock status.
1802 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1803 * that one isn't to be created and an NFSERR_xxx for other errors.
1804 * The structures new_stp and new_lop are passed in as pointers that should
1805 * be set to NULL if the structure is used and shouldn't be free'd.
1806 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1807 * never used and can safely be allocated on the stack. For all other
1808 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1809 * in case they are used.
1810 */
1811 int
nfsrv_lockctrl(vnode_t vp,struct nfsstate ** new_stpp,struct nfslock ** new_lopp,struct nfslockconflict * cfp,nfsquad_t clientid,nfsv4stateid_t * stateidp,__unused struct nfsexstuff * exp,struct nfsrv_descript * nd,NFSPROC_T * p)1812 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1813 struct nfslock **new_lopp, struct nfslockconflict *cfp,
1814 nfsquad_t clientid, nfsv4stateid_t *stateidp,
1815 __unused struct nfsexstuff *exp,
1816 struct nfsrv_descript *nd, NFSPROC_T *p)
1817 {
1818 struct nfslock *lop;
1819 struct nfsstate *new_stp = *new_stpp;
1820 struct nfslock *new_lop = *new_lopp;
1821 struct nfsstate *tstp, *mystp, *nstp;
1822 int specialid = 0;
1823 struct nfslockfile *lfp;
1824 struct nfslock *other_lop = NULL;
1825 struct nfsstate *stp, *lckstp = NULL;
1826 struct nfsclient *clp = NULL;
1827 u_int32_t bits;
1828 int error = 0, haslock = 0, ret, reterr;
1829 int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1830 fhandle_t nfh;
1831 uint64_t first, end;
1832 uint32_t lock_flags;
1833
1834 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1835 /*
1836 * Note the special cases of "all 1s" or "all 0s" stateids and
1837 * let reads with all 1s go ahead.
1838 */
1839 if (new_stp->ls_stateid.seqid == 0x0 &&
1840 new_stp->ls_stateid.other[0] == 0x0 &&
1841 new_stp->ls_stateid.other[1] == 0x0 &&
1842 new_stp->ls_stateid.other[2] == 0x0)
1843 specialid = 1;
1844 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1845 new_stp->ls_stateid.other[0] == 0xffffffff &&
1846 new_stp->ls_stateid.other[1] == 0xffffffff &&
1847 new_stp->ls_stateid.other[2] == 0xffffffff)
1848 specialid = 2;
1849 }
1850
1851 /*
1852 * Check for restart conditions (client and server).
1853 */
1854 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1855 &new_stp->ls_stateid, specialid);
1856 if (error)
1857 goto out;
1858
1859 /*
1860 * Check for state resource limit exceeded.
1861 */
1862 if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1863 nfsrv_openpluslock > nfsrv_v4statelimit) {
1864 error = NFSERR_RESOURCE;
1865 goto out;
1866 }
1867
1868 /*
1869 * For the lock case, get another nfslock structure,
1870 * just in case we need it.
1871 * Malloc now, before we start sifting through the linked lists,
1872 * in case we have to wait for memory.
1873 */
1874 tryagain:
1875 if (new_stp->ls_flags & NFSLCK_LOCK)
1876 other_lop = malloc(sizeof (struct nfslock),
1877 M_NFSDLOCK, M_WAITOK);
1878 filestruct_locked = 0;
1879 reterr = 0;
1880 lfp = NULL;
1881
1882 /*
1883 * Get the lockfile structure for CFH now, so we can do a sanity
1884 * check against the stateid, before incrementing the seqid#, since
1885 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1886 * shouldn't be incremented for this case.
1887 * If nfsrv_getlockfile() returns -1, it means "not found", which
1888 * will be handled later.
1889 * If we are doing Lock/LockU and local locking is enabled, sleep
1890 * lock the nfslockfile structure.
1891 */
1892 getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1893 NFSLOCKSTATE();
1894 if (getlckret == 0) {
1895 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1896 nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1897 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1898 &lfp, &nfh, 1);
1899 if (getlckret == 0)
1900 filestruct_locked = 1;
1901 } else
1902 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1903 &lfp, &nfh, 0);
1904 }
1905 if (getlckret != 0 && getlckret != -1)
1906 reterr = getlckret;
1907
1908 if (filestruct_locked != 0) {
1909 LIST_INIT(&lfp->lf_rollback);
1910 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1911 /*
1912 * For local locking, do the advisory locking now, so
1913 * that any conflict can be detected. A failure later
1914 * can be rolled back locally. If an error is returned,
1915 * struct nfslockfile has been unlocked and any local
1916 * locking rolled back.
1917 */
1918 NFSUNLOCKSTATE();
1919 if (vnode_unlocked == 0) {
1920 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1921 vnode_unlocked = 1;
1922 NFSVOPUNLOCK(vp);
1923 }
1924 reterr = nfsrv_locallock(vp, lfp,
1925 (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1926 new_lop->lo_first, new_lop->lo_end, cfp, p);
1927 NFSLOCKSTATE();
1928 }
1929 }
1930
1931 if (specialid == 0) {
1932 if (new_stp->ls_flags & NFSLCK_TEST) {
1933 /*
1934 * RFC 3530 does not list LockT as an op that renews a
1935 * lease, but the consensus seems to be that it is ok
1936 * for a server to do so.
1937 */
1938 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1939 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1940
1941 /*
1942 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1943 * error returns for LockT, just go ahead and test for a lock,
1944 * since there are no locks for this client, but other locks
1945 * can conflict. (ie. same client will always be false)
1946 */
1947 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1948 error = 0;
1949 lckstp = new_stp;
1950 } else {
1951 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1952 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1953 if (error == 0)
1954 /*
1955 * Look up the stateid
1956 */
1957 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1958 new_stp->ls_flags, &stp);
1959 /*
1960 * do some sanity checks for an unconfirmed open or a
1961 * stateid that refers to the wrong file, for an open stateid
1962 */
1963 if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1964 ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1965 (getlckret == 0 && stp->ls_lfp != lfp))){
1966 /*
1967 * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1968 * The only exception is using SETATTR with SIZE.
1969 * */
1970 if ((new_stp->ls_flags &
1971 (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1972 error = NFSERR_BADSTATEID;
1973 }
1974
1975 if (error == 0 &&
1976 (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1977 getlckret == 0 && stp->ls_lfp != lfp)
1978 error = NFSERR_BADSTATEID;
1979
1980 /*
1981 * If the lockowner stateid doesn't refer to the same file,
1982 * I believe that is considered ok, since some clients will
1983 * only create a single lockowner and use that for all locks
1984 * on all files.
1985 * For now, log it as a diagnostic, instead of considering it
1986 * a BadStateid.
1987 */
1988 if (error == 0 && (stp->ls_flags &
1989 (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1990 getlckret == 0 && stp->ls_lfp != lfp) {
1991 #ifdef DIAGNOSTIC
1992 printf("Got a lock statid for different file open\n");
1993 #endif
1994 /*
1995 error = NFSERR_BADSTATEID;
1996 */
1997 }
1998
1999 if (error == 0) {
2000 if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
2001 /*
2002 * If haslock set, we've already checked the seqid.
2003 */
2004 if (!haslock) {
2005 if (stp->ls_flags & NFSLCK_OPEN)
2006 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2007 stp->ls_openowner, new_stp->ls_op);
2008 else
2009 error = NFSERR_BADSTATEID;
2010 }
2011 if (!error)
2012 nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
2013 if (lckstp) {
2014 /*
2015 * For NFSv4.1 and NFSv4.2 allow an
2016 * open_to_lock_owner when the lock_owner already
2017 * exists. Just clear NFSLCK_OPENTOLOCK so that
2018 * a new lock_owner will not be created.
2019 * RFC7530 states that the error for NFSv4.0
2020 * is NFS4ERR_BAD_SEQID.
2021 */
2022 if ((nd->nd_flag & ND_NFSV41) != 0)
2023 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
2024 else
2025 error = NFSERR_BADSEQID;
2026 } else
2027 lckstp = new_stp;
2028 } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
2029 /*
2030 * If haslock set, ditto above.
2031 */
2032 if (!haslock) {
2033 if (stp->ls_flags & NFSLCK_OPEN)
2034 error = NFSERR_BADSTATEID;
2035 else
2036 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2037 stp, new_stp->ls_op);
2038 }
2039 lckstp = stp;
2040 } else {
2041 lckstp = stp;
2042 }
2043 }
2044 /*
2045 * If the seqid part of the stateid isn't the same, return
2046 * NFSERR_OLDSTATEID for cases other than I/O Ops.
2047 * For I/O Ops, only return NFSERR_OLDSTATEID if
2048 * nfsrv_returnoldstateid is set. (The consensus on the email
2049 * list was that most clients would prefer to not receive
2050 * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
2051 * is what will happen, so I use the nfsrv_returnoldstateid to
2052 * allow for either server configuration.)
2053 */
2054 if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
2055 (((nd->nd_flag & ND_NFSV41) == 0 &&
2056 (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2057 nfsrv_returnoldstateid)) ||
2058 ((nd->nd_flag & ND_NFSV41) != 0 &&
2059 new_stp->ls_stateid.seqid != 0)))
2060 error = NFSERR_OLDSTATEID;
2061 }
2062 }
2063
2064 /*
2065 * Now we can check for grace.
2066 */
2067 if (!error)
2068 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2069 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2070 nfsrv_checkstable(clp))
2071 error = NFSERR_NOGRACE;
2072 /*
2073 * If we successfully Reclaimed state, note that.
2074 */
2075 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
2076 nfsrv_markstable(clp);
2077
2078 /*
2079 * At this point, either error == NFSERR_BADSTATEID or the
2080 * seqid# has been updated, so we can return any error.
2081 * If error == 0, there may be an error in:
2082 * nd_repstat - Set by the calling function.
2083 * reterr - Set above, if getting the nfslockfile structure
2084 * or acquiring the local lock failed.
2085 * (If both of these are set, nd_repstat should probably be
2086 * returned, since that error was detected before this
2087 * function call.)
2088 */
2089 if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
2090 if (error == 0) {
2091 if (nd->nd_repstat != 0)
2092 error = nd->nd_repstat;
2093 else
2094 error = reterr;
2095 }
2096 if (filestruct_locked != 0) {
2097 /* Roll back local locks. */
2098 NFSUNLOCKSTATE();
2099 if (vnode_unlocked == 0) {
2100 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
2101 vnode_unlocked = 1;
2102 NFSVOPUNLOCK(vp);
2103 }
2104 nfsrv_locallock_rollback(vp, lfp, p);
2105 NFSLOCKSTATE();
2106 nfsrv_unlocklf(lfp);
2107 }
2108 NFSUNLOCKSTATE();
2109 goto out;
2110 }
2111
2112 /*
2113 * Check the nfsrv_getlockfile return.
2114 * Returned -1 if no structure found.
2115 */
2116 if (getlckret == -1) {
2117 error = NFSERR_EXPIRED;
2118 /*
2119 * Called from lockt, so no lock is OK.
2120 */
2121 if (new_stp->ls_flags & NFSLCK_TEST) {
2122 error = 0;
2123 } else if (new_stp->ls_flags &
2124 (NFSLCK_CHECK | NFSLCK_SETATTR)) {
2125 /*
2126 * Called to check for a lock, OK if the stateid is all
2127 * 1s or all 0s, but there should be an nfsstate
2128 * otherwise.
2129 * (ie. If there is no open, I'll assume no share
2130 * deny bits.)
2131 */
2132 if (specialid)
2133 error = 0;
2134 else
2135 error = NFSERR_BADSTATEID;
2136 }
2137 NFSUNLOCKSTATE();
2138 goto out;
2139 }
2140
2141 /*
2142 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
2143 * For NFSLCK_CHECK, allow a read if write access is granted,
2144 * but check for a deny. For NFSLCK_LOCK, require correct access,
2145 * which implies a conflicting deny can't exist.
2146 */
2147 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
2148 /*
2149 * Four kinds of state id:
2150 * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
2151 * - stateid for an open
2152 * - stateid for a delegation
2153 * - stateid for a lock owner
2154 */
2155 if (!specialid) {
2156 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2157 delegation = 1;
2158 mystp = stp;
2159 nfsrv_delaydelegtimeout(stp);
2160 } else if (stp->ls_flags & NFSLCK_OPEN) {
2161 mystp = stp;
2162 } else {
2163 mystp = stp->ls_openstp;
2164 }
2165 /*
2166 * If locking or checking, require correct access
2167 * bit set.
2168 */
2169 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
2170 !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
2171 mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
2172 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
2173 (NFSLCK_CHECK | NFSLCK_READACCESS) &&
2174 !(mystp->ls_flags & NFSLCK_READACCESS) &&
2175 nfsrv_allowreadforwriteopen == 0) ||
2176 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
2177 (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
2178 !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
2179 if (filestruct_locked != 0) {
2180 /* Roll back local locks. */
2181 NFSUNLOCKSTATE();
2182 if (vnode_unlocked == 0) {
2183 ASSERT_VOP_ELOCKED(vp,
2184 "nfsrv_lockctrl3");
2185 vnode_unlocked = 1;
2186 NFSVOPUNLOCK(vp);
2187 }
2188 nfsrv_locallock_rollback(vp, lfp, p);
2189 NFSLOCKSTATE();
2190 nfsrv_unlocklf(lfp);
2191 }
2192 NFSUNLOCKSTATE();
2193 error = NFSERR_OPENMODE;
2194 goto out;
2195 }
2196 } else
2197 mystp = NULL;
2198 if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
2199 /*
2200 * Check for a conflicting deny bit.
2201 */
2202 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
2203 if (tstp != mystp) {
2204 bits = tstp->ls_flags;
2205 bits >>= NFSLCK_SHIFT;
2206 if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
2207 KASSERT(vnode_unlocked == 0,
2208 ("nfsrv_lockctrl: vnode unlocked1"));
2209 ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
2210 vp, p);
2211 if (ret == 1) {
2212 /*
2213 * nfsrv_clientconflict unlocks state
2214 * when it returns non-zero.
2215 */
2216 lckstp = NULL;
2217 goto tryagain;
2218 }
2219 if (ret == 0)
2220 NFSUNLOCKSTATE();
2221 if (ret == 2)
2222 error = NFSERR_PERM;
2223 else
2224 error = NFSERR_OPENMODE;
2225 goto out;
2226 }
2227 }
2228 }
2229
2230 /* We're outta here */
2231 NFSUNLOCKSTATE();
2232 goto out;
2233 }
2234 }
2235
2236 /*
2237 * For setattr, just get rid of all the Delegations for other clients.
2238 */
2239 if (new_stp->ls_flags & NFSLCK_SETATTR) {
2240 KASSERT(vnode_unlocked == 0,
2241 ("nfsrv_lockctrl: vnode unlocked2"));
2242 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2243 if (ret) {
2244 /*
2245 * nfsrv_cleandeleg() unlocks state when it
2246 * returns non-zero.
2247 */
2248 if (ret == -1) {
2249 lckstp = NULL;
2250 goto tryagain;
2251 }
2252 error = ret;
2253 goto out;
2254 }
2255 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2256 (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2257 LIST_EMPTY(&lfp->lf_deleg))) {
2258 NFSUNLOCKSTATE();
2259 goto out;
2260 }
2261 }
2262
2263 /*
2264 * Check for a conflicting delegation. If one is found, call
2265 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2266 * been set yet, it will get the lock. Otherwise, it will recall
2267 * the delegation. Then, we try try again...
2268 * I currently believe the conflict algorithm to be:
2269 * For Lock Ops (Lock/LockT/LockU)
2270 * - there is a conflict iff a different client has a write delegation
2271 * For Reading (Read Op)
2272 * - there is a conflict iff a different client has a write delegation
2273 * (the specialids are always a different client)
2274 * For Writing (Write/Setattr of size)
2275 * - there is a conflict if a different client has any delegation
2276 * - there is a conflict if the same client has a read delegation
2277 * (I don't understand why this isn't allowed, but that seems to be
2278 * the current consensus?)
2279 */
2280 tstp = LIST_FIRST(&lfp->lf_deleg);
2281 while (tstp != LIST_END(&lfp->lf_deleg)) {
2282 nstp = LIST_NEXT(tstp, ls_file);
2283 if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2284 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2285 (new_lop->lo_flags & NFSLCK_READ))) &&
2286 clp != tstp->ls_clp &&
2287 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2288 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2289 (new_lop->lo_flags & NFSLCK_WRITE) &&
2290 (clp != tstp->ls_clp ||
2291 (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2292 ret = 0;
2293 if (filestruct_locked != 0) {
2294 /* Roll back local locks. */
2295 NFSUNLOCKSTATE();
2296 if (vnode_unlocked == 0) {
2297 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2298 NFSVOPUNLOCK(vp);
2299 }
2300 nfsrv_locallock_rollback(vp, lfp, p);
2301 NFSLOCKSTATE();
2302 nfsrv_unlocklf(lfp);
2303 NFSUNLOCKSTATE();
2304 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2305 vnode_unlocked = 0;
2306 if (VN_IS_DOOMED(vp))
2307 ret = NFSERR_SERVERFAULT;
2308 NFSLOCKSTATE();
2309 }
2310 if (ret == 0)
2311 ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2312 if (ret) {
2313 /*
2314 * nfsrv_delegconflict unlocks state when it
2315 * returns non-zero, which it always does.
2316 */
2317 if (other_lop) {
2318 free(other_lop, M_NFSDLOCK);
2319 other_lop = NULL;
2320 }
2321 if (ret == -1) {
2322 lckstp = NULL;
2323 goto tryagain;
2324 }
2325 error = ret;
2326 goto out;
2327 }
2328 /* Never gets here. */
2329 }
2330 tstp = nstp;
2331 }
2332
2333 /*
2334 * Handle the unlock case by calling nfsrv_updatelock().
2335 * (Should I have done some access checking above for unlock? For now,
2336 * just let it happen.)
2337 */
2338 if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2339 first = new_lop->lo_first;
2340 end = new_lop->lo_end;
2341 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2342 stateidp->seqid = ++(stp->ls_stateid.seqid);
2343 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2344 stateidp->seqid = stp->ls_stateid.seqid = 1;
2345 stateidp->other[0] = stp->ls_stateid.other[0];
2346 stateidp->other[1] = stp->ls_stateid.other[1];
2347 stateidp->other[2] = stp->ls_stateid.other[2];
2348 if (filestruct_locked != 0) {
2349 NFSUNLOCKSTATE();
2350 if (vnode_unlocked == 0) {
2351 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2352 vnode_unlocked = 1;
2353 NFSVOPUNLOCK(vp);
2354 }
2355 /* Update the local locks. */
2356 nfsrv_localunlock(vp, lfp, first, end, p);
2357 NFSLOCKSTATE();
2358 nfsrv_unlocklf(lfp);
2359 }
2360 NFSUNLOCKSTATE();
2361 goto out;
2362 }
2363
2364 /*
2365 * Search for a conflicting lock. A lock conflicts if:
2366 * - the lock range overlaps and
2367 * - at least one lock is a write lock and
2368 * - it is not owned by the same lock owner
2369 */
2370 if (!delegation) {
2371 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2372 if (new_lop->lo_end > lop->lo_first &&
2373 new_lop->lo_first < lop->lo_end &&
2374 (new_lop->lo_flags == NFSLCK_WRITE ||
2375 lop->lo_flags == NFSLCK_WRITE) &&
2376 lckstp != lop->lo_stp &&
2377 (clp != lop->lo_stp->ls_clp ||
2378 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2379 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2380 lckstp->ls_ownerlen))) {
2381 if (other_lop) {
2382 free(other_lop, M_NFSDLOCK);
2383 other_lop = NULL;
2384 }
2385 if (vnode_unlocked != 0)
2386 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2387 NULL, p);
2388 else
2389 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2390 vp, p);
2391 if (ret == 1) {
2392 if (filestruct_locked != 0) {
2393 if (vnode_unlocked == 0) {
2394 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2395 NFSVOPUNLOCK(vp);
2396 }
2397 /* Roll back local locks. */
2398 nfsrv_locallock_rollback(vp, lfp, p);
2399 NFSLOCKSTATE();
2400 nfsrv_unlocklf(lfp);
2401 NFSUNLOCKSTATE();
2402 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2403 vnode_unlocked = 0;
2404 if (VN_IS_DOOMED(vp)) {
2405 error = NFSERR_SERVERFAULT;
2406 goto out;
2407 }
2408 }
2409 /*
2410 * nfsrv_clientconflict() unlocks state when it
2411 * returns non-zero.
2412 */
2413 lckstp = NULL;
2414 goto tryagain;
2415 }
2416 /*
2417 * Found a conflicting lock, so record the conflict and
2418 * return the error.
2419 */
2420 if (cfp != NULL && ret == 0) {
2421 cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2422 cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2423 cfp->cl_first = lop->lo_first;
2424 cfp->cl_end = lop->lo_end;
2425 cfp->cl_flags = lop->lo_flags;
2426 cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2427 NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2428 cfp->cl_ownerlen);
2429 }
2430 if (ret == 2)
2431 error = NFSERR_PERM;
2432 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2433 error = NFSERR_RECLAIMCONFLICT;
2434 else if (new_stp->ls_flags & NFSLCK_CHECK)
2435 error = NFSERR_LOCKED;
2436 else
2437 error = NFSERR_DENIED;
2438 if (filestruct_locked != 0 && ret == 0) {
2439 /* Roll back local locks. */
2440 NFSUNLOCKSTATE();
2441 if (vnode_unlocked == 0) {
2442 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2443 vnode_unlocked = 1;
2444 NFSVOPUNLOCK(vp);
2445 }
2446 nfsrv_locallock_rollback(vp, lfp, p);
2447 NFSLOCKSTATE();
2448 nfsrv_unlocklf(lfp);
2449 }
2450 if (ret == 0)
2451 NFSUNLOCKSTATE();
2452 goto out;
2453 }
2454 }
2455 }
2456
2457 /*
2458 * We only get here if there was no lock that conflicted.
2459 */
2460 if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2461 NFSUNLOCKSTATE();
2462 goto out;
2463 }
2464
2465 /*
2466 * We only get here when we are creating or modifying a lock.
2467 * There are two variants:
2468 * - exist_lock_owner where lock_owner exists
2469 * - open_to_lock_owner with new lock_owner
2470 */
2471 first = new_lop->lo_first;
2472 end = new_lop->lo_end;
2473 lock_flags = new_lop->lo_flags;
2474 if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2475 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2476 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2477 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2478 stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2479 stateidp->other[0] = lckstp->ls_stateid.other[0];
2480 stateidp->other[1] = lckstp->ls_stateid.other[1];
2481 stateidp->other[2] = lckstp->ls_stateid.other[2];
2482 } else {
2483 /*
2484 * The new open_to_lock_owner case.
2485 * Link the new nfsstate into the lists.
2486 */
2487 new_stp->ls_seq = new_stp->ls_opentolockseq;
2488 nfsrvd_refcache(new_stp->ls_op);
2489 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2490 stateidp->other[0] = new_stp->ls_stateid.other[0] =
2491 clp->lc_clientid.lval[0];
2492 stateidp->other[1] = new_stp->ls_stateid.other[1] =
2493 clp->lc_clientid.lval[1];
2494 stateidp->other[2] = new_stp->ls_stateid.other[2] =
2495 nfsrv_nextstateindex(clp);
2496 new_stp->ls_clp = clp;
2497 LIST_INIT(&new_stp->ls_lock);
2498 new_stp->ls_openstp = stp;
2499 new_stp->ls_lfp = lfp;
2500 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2501 lfp);
2502 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2503 new_stp, ls_hash);
2504 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2505 *new_lopp = NULL;
2506 *new_stpp = NULL;
2507 NFSD_VNET(nfsstatsv1_p)->srvlockowners++;
2508 nfsrv_openpluslock++;
2509 }
2510 if (filestruct_locked != 0) {
2511 NFSUNLOCKSTATE();
2512 nfsrv_locallock_commit(lfp, lock_flags, first, end);
2513 NFSLOCKSTATE();
2514 nfsrv_unlocklf(lfp);
2515 }
2516 NFSUNLOCKSTATE();
2517
2518 out:
2519 if (haslock) {
2520 NFSLOCKV4ROOTMUTEX();
2521 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2522 NFSUNLOCKV4ROOTMUTEX();
2523 }
2524 if (vnode_unlocked != 0) {
2525 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2526 if (error == 0 && VN_IS_DOOMED(vp))
2527 error = NFSERR_SERVERFAULT;
2528 }
2529 if (other_lop)
2530 free(other_lop, M_NFSDLOCK);
2531 NFSEXITCODE2(error, nd);
2532 return (error);
2533 }
2534
2535 /*
2536 * Check for state errors for Open.
2537 * repstat is passed back out as an error if more critical errors
2538 * are not detected.
2539 */
2540 int
nfsrv_opencheck(nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * new_stp,vnode_t vp,struct nfsrv_descript * nd,NFSPROC_T * p,int repstat)2541 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2542 struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2543 NFSPROC_T *p, int repstat)
2544 {
2545 struct nfsstate *stp, *nstp;
2546 struct nfsclient *clp;
2547 struct nfsstate *ownerstp;
2548 struct nfslockfile *lfp, *new_lfp;
2549 int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2550
2551 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2552 readonly = 1;
2553 /*
2554 * Check for restart conditions (client and server).
2555 */
2556 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2557 &new_stp->ls_stateid, 0);
2558 if (error)
2559 goto out;
2560
2561 /*
2562 * Check for state resource limit exceeded.
2563 * Technically this should be SMP protected, but the worst
2564 * case error is "out by one or two" on the count when it
2565 * returns NFSERR_RESOURCE and the limit is just a rather
2566 * arbitrary high water mark, so no harm is done.
2567 */
2568 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2569 error = NFSERR_RESOURCE;
2570 goto out;
2571 }
2572
2573 tryagain:
2574 new_lfp = malloc(sizeof (struct nfslockfile),
2575 M_NFSDLOCKFILE, M_WAITOK);
2576 if (vp)
2577 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2578 NULL, p);
2579 NFSLOCKSTATE();
2580 /*
2581 * Get the nfsclient structure.
2582 */
2583 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2584 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2585
2586 /*
2587 * Look up the open owner. See if it needs confirmation and
2588 * check the seq#, as required.
2589 */
2590 if (!error)
2591 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2592
2593 if (!error && ownerstp) {
2594 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2595 new_stp->ls_op);
2596 /*
2597 * If the OpenOwner hasn't been confirmed, assume the
2598 * old one was a replay and this one is ok.
2599 * See: RFC3530 Sec. 14.2.18.
2600 */
2601 if (error == NFSERR_BADSEQID &&
2602 (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2603 error = 0;
2604 }
2605
2606 /*
2607 * Check for grace.
2608 */
2609 if (!error)
2610 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2611 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2612 nfsrv_checkstable(clp))
2613 error = NFSERR_NOGRACE;
2614
2615 /*
2616 * If none of the above errors occurred, let repstat be
2617 * returned.
2618 */
2619 if (repstat && !error)
2620 error = repstat;
2621 if (error) {
2622 NFSUNLOCKSTATE();
2623 if (haslock) {
2624 NFSLOCKV4ROOTMUTEX();
2625 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2626 NFSUNLOCKV4ROOTMUTEX();
2627 }
2628 free(new_lfp, M_NFSDLOCKFILE);
2629 goto out;
2630 }
2631
2632 /*
2633 * If vp == NULL, the file doesn't exist yet, so return ok.
2634 * (This always happens on the first pass, so haslock must be 0.)
2635 */
2636 if (vp == NULL) {
2637 NFSUNLOCKSTATE();
2638 free(new_lfp, M_NFSDLOCKFILE);
2639 goto out;
2640 }
2641
2642 /*
2643 * Get the structure for the underlying file.
2644 */
2645 if (getfhret)
2646 error = getfhret;
2647 else
2648 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2649 NULL, 0);
2650 if (new_lfp)
2651 free(new_lfp, M_NFSDLOCKFILE);
2652 if (error) {
2653 NFSUNLOCKSTATE();
2654 if (haslock) {
2655 NFSLOCKV4ROOTMUTEX();
2656 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2657 NFSUNLOCKV4ROOTMUTEX();
2658 }
2659 goto out;
2660 }
2661
2662 /*
2663 * Search for a conflicting open/share.
2664 */
2665 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2666 /*
2667 * For Delegate_Cur, search for the matching Delegation,
2668 * which indicates no conflict.
2669 * An old delegation should have been recovered by the
2670 * client doing a Claim_DELEGATE_Prev, so I won't let
2671 * it match and return NFSERR_EXPIRED. Should I let it
2672 * match?
2673 */
2674 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2675 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2676 (((nd->nd_flag & ND_NFSV41) != 0 &&
2677 stateidp->seqid == 0) ||
2678 stateidp->seqid == stp->ls_stateid.seqid) &&
2679 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2680 NFSX_STATEIDOTHER))
2681 break;
2682 }
2683 if (stp == LIST_END(&lfp->lf_deleg) ||
2684 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2685 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2686 NFSUNLOCKSTATE();
2687 if (haslock) {
2688 NFSLOCKV4ROOTMUTEX();
2689 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2690 NFSUNLOCKV4ROOTMUTEX();
2691 }
2692 error = NFSERR_EXPIRED;
2693 goto out;
2694 }
2695 }
2696
2697 /*
2698 * Check for access/deny bit conflicts. I check for the same
2699 * owner as well, in case the client didn't bother.
2700 */
2701 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2702 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2703 (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2704 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2705 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2706 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2707 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2708 if (ret == 1) {
2709 /*
2710 * nfsrv_clientconflict() unlocks
2711 * state when it returns non-zero.
2712 */
2713 goto tryagain;
2714 }
2715 if (ret == 2)
2716 error = NFSERR_PERM;
2717 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2718 error = NFSERR_RECLAIMCONFLICT;
2719 else
2720 error = NFSERR_SHAREDENIED;
2721 if (ret == 0)
2722 NFSUNLOCKSTATE();
2723 if (haslock) {
2724 NFSLOCKV4ROOTMUTEX();
2725 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2726 NFSUNLOCKV4ROOTMUTEX();
2727 }
2728 goto out;
2729 }
2730 }
2731
2732 /*
2733 * Check for a conflicting delegation. If one is found, call
2734 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2735 * been set yet, it will get the lock. Otherwise, it will recall
2736 * the delegation. Then, we try try again...
2737 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2738 * isn't a conflict.)
2739 * I currently believe the conflict algorithm to be:
2740 * For Open with Read Access and Deny None
2741 * - there is a conflict iff a different client has a write delegation
2742 * For Open with other Write Access or any Deny except None
2743 * - there is a conflict if a different client has any delegation
2744 * - there is a conflict if the same client has a read delegation
2745 * (The current consensus is that this last case should be
2746 * considered a conflict since the client with a read delegation
2747 * could have done an Open with ReadAccess and WriteDeny
2748 * locally and then not have checked for the WriteDeny.)
2749 * The exception is a NFSv4.1/4.2 client that has requested
2750 * an atomic upgrade to a write delegation.
2751 * Don't check for a Reclaim, since that will be dealt with
2752 * by nfsrv_openctrl().
2753 */
2754 if (!(new_stp->ls_flags &
2755 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2756 stp = LIST_FIRST(&lfp->lf_deleg);
2757 while (stp != LIST_END(&lfp->lf_deleg)) {
2758 nstp = LIST_NEXT(stp, ls_file);
2759 if ((readonly && stp->ls_clp != clp &&
2760 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
2761 (!readonly && (stp->ls_clp != clp ||
2762 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
2763 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
2764 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2765 if (ret) {
2766 /*
2767 * nfsrv_delegconflict() unlocks state
2768 * when it returns non-zero.
2769 */
2770 if (ret == -1)
2771 goto tryagain;
2772 error = ret;
2773 goto out;
2774 }
2775 }
2776 stp = nstp;
2777 }
2778 }
2779 NFSUNLOCKSTATE();
2780 if (haslock) {
2781 NFSLOCKV4ROOTMUTEX();
2782 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2783 NFSUNLOCKV4ROOTMUTEX();
2784 }
2785
2786 out:
2787 NFSEXITCODE2(error, nd);
2788 return (error);
2789 }
2790
2791 /*
2792 * Open control function to create/update open state for an open.
2793 */
2794 int
nfsrv_openctrl(struct nfsrv_descript * nd,vnode_t vp,struct nfsstate ** new_stpp,nfsquad_t clientid,nfsv4stateid_t * stateidp,nfsv4stateid_t * delegstateidp,u_int32_t * rflagsp,struct nfsexstuff * exp,NFSPROC_T * p,u_quad_t filerev)2795 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2796 struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2797 nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2798 NFSPROC_T *p, u_quad_t filerev)
2799 {
2800 struct nfsstate *new_stp = *new_stpp;
2801 struct nfsstate *stp, *nstp;
2802 struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2803 struct nfslockfile *lfp, *new_lfp;
2804 struct nfsclient *clp;
2805 int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2806 int readonly = 0, cbret = 1, getfhret = 0;
2807 int gotstate = 0, len = 0;
2808 u_char *clidp = NULL;
2809
2810 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2811 readonly = 1;
2812 /*
2813 * Check for restart conditions (client and server).
2814 * (Paranoia, should have been detected by nfsrv_opencheck().)
2815 * If an error does show up, return NFSERR_EXPIRED, since the
2816 * the seqid# has already been incremented.
2817 */
2818 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2819 &new_stp->ls_stateid, 0);
2820 if (error) {
2821 printf("Nfsd: openctrl unexpected restart err=%d\n",
2822 error);
2823 error = NFSERR_EXPIRED;
2824 goto out;
2825 }
2826
2827 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2828 tryagain:
2829 new_lfp = malloc(sizeof (struct nfslockfile),
2830 M_NFSDLOCKFILE, M_WAITOK);
2831 new_open = malloc(sizeof (struct nfsstate),
2832 M_NFSDSTATE, M_WAITOK);
2833 new_deleg = malloc(sizeof (struct nfsstate),
2834 M_NFSDSTATE, M_WAITOK);
2835 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2836 NULL, p);
2837 NFSLOCKSTATE();
2838 /*
2839 * Get the client structure. Since the linked lists could be changed
2840 * by other nfsd processes if this process does a tsleep(), one of
2841 * two things must be done.
2842 * 1 - don't tsleep()
2843 * or
2844 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2845 * before using the lists, since this lock stops the other
2846 * nfsd. This should only be used for rare cases, since it
2847 * essentially single threads the nfsd.
2848 * At this time, it is only done for cases where the stable
2849 * storage file must be written prior to completion of state
2850 * expiration.
2851 */
2852 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2853 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2854 if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2855 clp->lc_program) {
2856 /*
2857 * This happens on the first open for a client
2858 * that supports callbacks.
2859 */
2860 NFSUNLOCKSTATE();
2861 /*
2862 * Although nfsrv_docallback() will sleep, clp won't
2863 * go away, since they are only removed when the
2864 * nfsv4_lock() has blocked the nfsd threads. The
2865 * fields in clp can change, but having multiple
2866 * threads do this Null callback RPC should be
2867 * harmless.
2868 */
2869 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2870 NULL, 0, NULL, NULL, NULL, 0, p);
2871 NFSLOCKSTATE();
2872 clp->lc_flags &= ~LCL_NEEDSCBNULL;
2873 if (!cbret)
2874 clp->lc_flags |= LCL_CALLBACKSON;
2875 }
2876
2877 /*
2878 * Look up the open owner. See if it needs confirmation and
2879 * check the seq#, as required.
2880 */
2881 if (!error)
2882 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2883
2884 if (error) {
2885 NFSUNLOCKSTATE();
2886 printf("Nfsd: openctrl unexpected state err=%d\n",
2887 error);
2888 free(new_lfp, M_NFSDLOCKFILE);
2889 free(new_open, M_NFSDSTATE);
2890 free(new_deleg, M_NFSDSTATE);
2891 if (haslock) {
2892 NFSLOCKV4ROOTMUTEX();
2893 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2894 NFSUNLOCKV4ROOTMUTEX();
2895 }
2896 error = NFSERR_EXPIRED;
2897 goto out;
2898 }
2899
2900 if (new_stp->ls_flags & NFSLCK_RECLAIM)
2901 nfsrv_markstable(clp);
2902
2903 /*
2904 * Get the structure for the underlying file.
2905 */
2906 if (getfhret)
2907 error = getfhret;
2908 else
2909 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2910 NULL, 0);
2911 if (new_lfp)
2912 free(new_lfp, M_NFSDLOCKFILE);
2913 if (error) {
2914 NFSUNLOCKSTATE();
2915 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2916 error);
2917 free(new_open, M_NFSDSTATE);
2918 free(new_deleg, M_NFSDSTATE);
2919 if (haslock) {
2920 NFSLOCKV4ROOTMUTEX();
2921 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2922 NFSUNLOCKV4ROOTMUTEX();
2923 }
2924 goto out;
2925 }
2926
2927 /*
2928 * Search for a conflicting open/share.
2929 */
2930 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2931 /*
2932 * For Delegate_Cur, search for the matching Delegation,
2933 * which indicates no conflict.
2934 * An old delegation should have been recovered by the
2935 * client doing a Claim_DELEGATE_Prev, so I won't let
2936 * it match and return NFSERR_EXPIRED. Should I let it
2937 * match?
2938 */
2939 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2940 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2941 (((nd->nd_flag & ND_NFSV41) != 0 &&
2942 stateidp->seqid == 0) ||
2943 stateidp->seqid == stp->ls_stateid.seqid) &&
2944 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2945 NFSX_STATEIDOTHER))
2946 break;
2947 }
2948 if (stp == LIST_END(&lfp->lf_deleg) ||
2949 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2950 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2951 NFSUNLOCKSTATE();
2952 printf("Nfsd openctrl unexpected expiry\n");
2953 free(new_open, M_NFSDSTATE);
2954 free(new_deleg, M_NFSDSTATE);
2955 if (haslock) {
2956 NFSLOCKV4ROOTMUTEX();
2957 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2958 NFSUNLOCKV4ROOTMUTEX();
2959 }
2960 error = NFSERR_EXPIRED;
2961 goto out;
2962 }
2963
2964 /*
2965 * Don't issue a Delegation, since one already exists and
2966 * delay delegation timeout, as required.
2967 */
2968 delegate = 0;
2969 nfsrv_delaydelegtimeout(stp);
2970 }
2971
2972 /*
2973 * Check for access/deny bit conflicts. I also check for the
2974 * same owner, since the client might not have bothered to check.
2975 * Also, note an open for the same file and owner, if found,
2976 * which is all we do here for Delegate_Cur, since conflict
2977 * checking is already done.
2978 */
2979 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2980 if (ownerstp && stp->ls_openowner == ownerstp)
2981 openstp = stp;
2982 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2983 /*
2984 * If another client has the file open, the only
2985 * delegation that can be issued is a Read delegation
2986 * and only if it is a Read open with Deny none.
2987 */
2988 if (clp != stp->ls_clp) {
2989 if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2990 NFSLCK_READACCESS)
2991 writedeleg = 0;
2992 else
2993 delegate = 0;
2994 }
2995 if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2996 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2997 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2998 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2999 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
3000 if (ret == 1) {
3001 /*
3002 * nfsrv_clientconflict() unlocks state
3003 * when it returns non-zero.
3004 */
3005 free(new_open, M_NFSDSTATE);
3006 free(new_deleg, M_NFSDSTATE);
3007 openstp = NULL;
3008 goto tryagain;
3009 }
3010 if (ret == 2)
3011 error = NFSERR_PERM;
3012 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
3013 error = NFSERR_RECLAIMCONFLICT;
3014 else
3015 error = NFSERR_SHAREDENIED;
3016 if (ret == 0)
3017 NFSUNLOCKSTATE();
3018 if (haslock) {
3019 NFSLOCKV4ROOTMUTEX();
3020 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3021 NFSUNLOCKV4ROOTMUTEX();
3022 }
3023 free(new_open, M_NFSDSTATE);
3024 free(new_deleg, M_NFSDSTATE);
3025 printf("nfsd openctrl unexpected client cnfl\n");
3026 goto out;
3027 }
3028 }
3029 }
3030
3031 /*
3032 * Check for a conflicting delegation. If one is found, call
3033 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
3034 * been set yet, it will get the lock. Otherwise, it will recall
3035 * the delegation. Then, we try try again...
3036 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
3037 * isn't a conflict.)
3038 * I currently believe the conflict algorithm to be:
3039 * For Open with Read Access and Deny None
3040 * - there is a conflict iff a different client has a write delegation
3041 * For Open with other Write Access or any Deny except None
3042 * - there is a conflict if a different client has any delegation
3043 * - there is a conflict if the same client has a read delegation
3044 * (The current consensus is that this last case should be
3045 * considered a conflict since the client with a read delegation
3046 * could have done an Open with ReadAccess and WriteDeny
3047 * locally and then not have checked for the WriteDeny.)
3048 * The exception is a NFSv4.1/4.2 client that has requested
3049 * an atomic upgrade to a write delegation.
3050 */
3051 if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
3052 stp = LIST_FIRST(&lfp->lf_deleg);
3053 while (stp != LIST_END(&lfp->lf_deleg)) {
3054 nstp = LIST_NEXT(stp, ls_file);
3055 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
3056 writedeleg = 0;
3057 else if (stp->ls_clp != clp ||
3058 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
3059 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
3060 delegate = 0;
3061 if ((readonly && stp->ls_clp != clp &&
3062 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
3063 (!readonly && (stp->ls_clp != clp ||
3064 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
3065 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
3066 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3067 delegate = 2;
3068 } else {
3069 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
3070 if (ret) {
3071 /*
3072 * nfsrv_delegconflict() unlocks state
3073 * when it returns non-zero.
3074 */
3075 printf("Nfsd openctrl unexpected deleg cnfl\n");
3076 free(new_open, M_NFSDSTATE);
3077 free(new_deleg, M_NFSDSTATE);
3078 if (ret == -1) {
3079 openstp = NULL;
3080 goto tryagain;
3081 }
3082 error = ret;
3083 goto out;
3084 }
3085 }
3086 }
3087 stp = nstp;
3088 }
3089 }
3090
3091 /*
3092 * We only get here if there was no open that conflicted.
3093 * If an open for the owner exists, or in the access/deny bits.
3094 * Otherwise it is a new open. If the open_owner hasn't been
3095 * confirmed, replace the open with the new one needing confirmation,
3096 * otherwise add the open.
3097 */
3098 if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
3099 /*
3100 * Handle NFSLCK_DELEGPREV by searching the old delegations for
3101 * a match. If found, just move the old delegation to the current
3102 * delegation list and issue open. If not found, return
3103 * NFSERR_EXPIRED.
3104 */
3105 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
3106 if (stp->ls_lfp == lfp) {
3107 /* Found it */
3108 if (stp->ls_clp != clp)
3109 panic("olddeleg clp");
3110 LIST_REMOVE(stp, ls_list);
3111 LIST_REMOVE(stp, ls_hash);
3112 stp->ls_flags &= ~NFSLCK_OLDDELEG;
3113 stp->ls_stateid.seqid = delegstateidp->seqid = 1;
3114 stp->ls_stateid.other[0] = delegstateidp->other[0] =
3115 clp->lc_clientid.lval[0];
3116 stp->ls_stateid.other[1] = delegstateidp->other[1] =
3117 clp->lc_clientid.lval[1];
3118 stp->ls_stateid.other[2] = delegstateidp->other[2] =
3119 nfsrv_nextstateindex(clp);
3120 stp->ls_compref = nd->nd_compref;
3121 LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
3122 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3123 stp->ls_stateid), stp, ls_hash);
3124 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3125 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3126 else
3127 *rflagsp |= NFSV4OPEN_READDELEGATE;
3128 clp->lc_delegtime = NFSD_MONOSEC +
3129 nfsrv_lease + NFSRV_LEASEDELTA;
3130
3131 /*
3132 * Now, do the associated open.
3133 */
3134 new_open->ls_stateid.seqid = 1;
3135 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3136 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3137 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3138 new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
3139 NFSLCK_OPEN;
3140 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3141 new_open->ls_flags |= (NFSLCK_READACCESS |
3142 NFSLCK_WRITEACCESS);
3143 else
3144 new_open->ls_flags |= NFSLCK_READACCESS;
3145 new_open->ls_uid = new_stp->ls_uid;
3146 new_open->ls_lfp = lfp;
3147 new_open->ls_clp = clp;
3148 LIST_INIT(&new_open->ls_open);
3149 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3150 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3151 new_open, ls_hash);
3152 /*
3153 * and handle the open owner
3154 */
3155 if (ownerstp) {
3156 new_open->ls_openowner = ownerstp;
3157 LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
3158 } else {
3159 new_open->ls_openowner = new_stp;
3160 new_stp->ls_flags = 0;
3161 nfsrvd_refcache(new_stp->ls_op);
3162 new_stp->ls_noopens = 0;
3163 LIST_INIT(&new_stp->ls_open);
3164 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3165 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3166 *new_stpp = NULL;
3167 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3168 nfsrv_openpluslock++;
3169 }
3170 openstp = new_open;
3171 new_open = NULL;
3172 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3173 nfsrv_openpluslock++;
3174 break;
3175 }
3176 }
3177 if (stp == LIST_END(&clp->lc_olddeleg))
3178 error = NFSERR_EXPIRED;
3179 } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
3180 /*
3181 * Scan to see that no delegation for this client and file
3182 * doesn't already exist.
3183 * There also shouldn't yet be an Open for this file and
3184 * openowner.
3185 */
3186 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
3187 if (stp->ls_clp == clp)
3188 break;
3189 }
3190 if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
3191 /*
3192 * This is the Claim_Previous case with a delegation
3193 * type != Delegate_None.
3194 */
3195 /*
3196 * First, add the delegation. (Although we must issue the
3197 * delegation, we can also ask for an immediate return.)
3198 */
3199 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3200 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
3201 clp->lc_clientid.lval[0];
3202 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
3203 clp->lc_clientid.lval[1];
3204 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
3205 nfsrv_nextstateindex(clp);
3206 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
3207 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3208 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3209 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3210 nfsrv_writedelegcnt++;
3211 } else {
3212 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3213 NFSLCK_READACCESS);
3214 *rflagsp |= NFSV4OPEN_READDELEGATE;
3215 }
3216 new_deleg->ls_uid = new_stp->ls_uid;
3217 new_deleg->ls_lfp = lfp;
3218 new_deleg->ls_clp = clp;
3219 new_deleg->ls_filerev = filerev;
3220 new_deleg->ls_compref = nd->nd_compref;
3221 new_deleg->ls_lastrecall = 0;
3222 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3223 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3224 new_deleg->ls_stateid), new_deleg, ls_hash);
3225 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3226 new_deleg = NULL;
3227 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3228 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3229 LCL_CALLBACKSON ||
3230 NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3231 !NFSVNO_DELEGOK(vp))
3232 *rflagsp |= NFSV4OPEN_RECALL;
3233 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3234 nfsrv_openpluslock++;
3235 nfsrv_delegatecnt++;
3236
3237 /*
3238 * Now, do the associated open.
3239 */
3240 new_open->ls_stateid.seqid = 1;
3241 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3242 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3243 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3244 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3245 NFSLCK_OPEN;
3246 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3247 new_open->ls_flags |= (NFSLCK_READACCESS |
3248 NFSLCK_WRITEACCESS);
3249 else
3250 new_open->ls_flags |= NFSLCK_READACCESS;
3251 new_open->ls_uid = new_stp->ls_uid;
3252 new_open->ls_lfp = lfp;
3253 new_open->ls_clp = clp;
3254 LIST_INIT(&new_open->ls_open);
3255 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3256 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3257 new_open, ls_hash);
3258 /*
3259 * and handle the open owner
3260 */
3261 if (ownerstp) {
3262 new_open->ls_openowner = ownerstp;
3263 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3264 } else {
3265 new_open->ls_openowner = new_stp;
3266 new_stp->ls_flags = 0;
3267 nfsrvd_refcache(new_stp->ls_op);
3268 new_stp->ls_noopens = 0;
3269 LIST_INIT(&new_stp->ls_open);
3270 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3271 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3272 *new_stpp = NULL;
3273 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3274 nfsrv_openpluslock++;
3275 }
3276 openstp = new_open;
3277 new_open = NULL;
3278 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3279 nfsrv_openpluslock++;
3280 } else {
3281 error = NFSERR_RECLAIMCONFLICT;
3282 }
3283 } else if (ownerstp) {
3284 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3285 /* Replace the open */
3286 if (ownerstp->ls_op)
3287 nfsrvd_derefcache(ownerstp->ls_op);
3288 ownerstp->ls_op = new_stp->ls_op;
3289 nfsrvd_refcache(ownerstp->ls_op);
3290 ownerstp->ls_seq = new_stp->ls_seq;
3291 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3292 stp = LIST_FIRST(&ownerstp->ls_open);
3293 stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3294 NFSLCK_OPEN;
3295 stp->ls_stateid.seqid = 1;
3296 stp->ls_uid = new_stp->ls_uid;
3297 if (lfp != stp->ls_lfp) {
3298 LIST_REMOVE(stp, ls_file);
3299 LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3300 stp->ls_lfp = lfp;
3301 }
3302 openstp = stp;
3303 } else if (openstp) {
3304 openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3305 openstp->ls_stateid.seqid++;
3306 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3307 openstp->ls_stateid.seqid == 0)
3308 openstp->ls_stateid.seqid = 1;
3309
3310 /*
3311 * This is where we can choose to issue a delegation.
3312 */
3313 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3314 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3315 new_stp, lfp, rflagsp, delegstateidp);
3316 } else {
3317 new_open->ls_stateid.seqid = 1;
3318 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3319 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3320 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3321 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3322 NFSLCK_OPEN;
3323 new_open->ls_uid = new_stp->ls_uid;
3324 new_open->ls_openowner = ownerstp;
3325 new_open->ls_lfp = lfp;
3326 new_open->ls_clp = clp;
3327 LIST_INIT(&new_open->ls_open);
3328 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3329 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3330 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3331 new_open, ls_hash);
3332 openstp = new_open;
3333 new_open = NULL;
3334 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3335 nfsrv_openpluslock++;
3336
3337 /*
3338 * This is where we can choose to issue a delegation.
3339 */
3340 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3341 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3342 new_stp, lfp, rflagsp, delegstateidp);
3343 }
3344 } else {
3345 /*
3346 * New owner case. Start the open_owner sequence with a
3347 * Needs confirmation (unless a reclaim) and hang the
3348 * new open off it.
3349 */
3350 new_open->ls_stateid.seqid = 1;
3351 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3352 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3353 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3354 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3355 NFSLCK_OPEN;
3356 new_open->ls_uid = new_stp->ls_uid;
3357 LIST_INIT(&new_open->ls_open);
3358 new_open->ls_openowner = new_stp;
3359 new_open->ls_lfp = lfp;
3360 new_open->ls_clp = clp;
3361 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3362 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3363 new_stp->ls_flags = 0;
3364 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
3365 /*
3366 * This is where we can choose to issue a delegation.
3367 */
3368 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3369 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3370 new_stp, lfp, rflagsp, delegstateidp);
3371 /* NFSv4.1 never needs confirmation. */
3372 new_stp->ls_flags = 0;
3373
3374 /*
3375 * Since NFSv4.1 never does an OpenConfirm, the first
3376 * open state will be acquired here.
3377 */
3378 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3379 clp->lc_flags |= LCL_STAMPEDSTABLE;
3380 len = clp->lc_idlen;
3381 NFSBCOPY(clp->lc_id, clidp, len);
3382 gotstate = 1;
3383 }
3384 } else {
3385 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3386 new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3387 }
3388 nfsrvd_refcache(new_stp->ls_op);
3389 new_stp->ls_noopens = 0;
3390 LIST_INIT(&new_stp->ls_open);
3391 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3392 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3393 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3394 new_open, ls_hash);
3395 openstp = new_open;
3396 new_open = NULL;
3397 *new_stpp = NULL;
3398 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3399 nfsrv_openpluslock++;
3400 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3401 nfsrv_openpluslock++;
3402 }
3403 if (!error) {
3404 stateidp->seqid = openstp->ls_stateid.seqid;
3405 stateidp->other[0] = openstp->ls_stateid.other[0];
3406 stateidp->other[1] = openstp->ls_stateid.other[1];
3407 stateidp->other[2] = openstp->ls_stateid.other[2];
3408 }
3409 NFSUNLOCKSTATE();
3410 if (haslock) {
3411 NFSLOCKV4ROOTMUTEX();
3412 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3413 NFSUNLOCKV4ROOTMUTEX();
3414 }
3415 if (new_open)
3416 free(new_open, M_NFSDSTATE);
3417 if (new_deleg)
3418 free(new_deleg, M_NFSDSTATE);
3419
3420 /*
3421 * If the NFSv4.1 client just acquired its first open, write a timestamp
3422 * to the stable storage file.
3423 */
3424 if (gotstate != 0) {
3425 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3426 nfsrv_backupstable();
3427 }
3428
3429 out:
3430 free(clidp, M_TEMP);
3431 NFSEXITCODE2(error, nd);
3432 return (error);
3433 }
3434
3435 /*
3436 * Open update. Does the confirm, downgrade and close.
3437 */
3438 int
nfsrv_openupdate(vnode_t vp,struct nfsstate * new_stp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsrv_descript * nd,NFSPROC_T * p,int * retwriteaccessp)3439 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3440 nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
3441 int *retwriteaccessp)
3442 {
3443 struct nfsstate *stp;
3444 struct nfsclient *clp;
3445 u_int32_t bits;
3446 int error = 0, gotstate = 0, len = 0;
3447 u_char *clidp = NULL;
3448
3449 /*
3450 * Check for restart conditions (client and server).
3451 */
3452 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3453 &new_stp->ls_stateid, 0);
3454 if (error)
3455 goto out;
3456
3457 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3458 NFSLOCKSTATE();
3459 /*
3460 * Get the open structure via clientid and stateid.
3461 */
3462 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3463 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3464 if (!error)
3465 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3466 new_stp->ls_flags, &stp);
3467
3468 /*
3469 * Sanity check the open.
3470 */
3471 if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3472 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3473 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3474 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3475 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3476 error = NFSERR_BADSTATEID;
3477
3478 if (!error)
3479 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3480 stp->ls_openowner, new_stp->ls_op);
3481 if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3482 (((nd->nd_flag & ND_NFSV41) == 0 &&
3483 !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3484 ((nd->nd_flag & ND_NFSV41) != 0 &&
3485 new_stp->ls_stateid.seqid != 0)))
3486 error = NFSERR_OLDSTATEID;
3487 if (!error && vp->v_type != VREG) {
3488 if (vp->v_type == VDIR)
3489 error = NFSERR_ISDIR;
3490 else
3491 error = NFSERR_INVAL;
3492 }
3493
3494 if (error) {
3495 /*
3496 * If a client tries to confirm an Open with a bad
3497 * seqid# and there are no byte range locks or other Opens
3498 * on the openowner, just throw it away, so the next use of the
3499 * openowner will start a fresh seq#.
3500 */
3501 if (error == NFSERR_BADSEQID &&
3502 (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3503 nfsrv_nootherstate(stp))
3504 nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3505 NFSUNLOCKSTATE();
3506 goto out;
3507 }
3508
3509 /*
3510 * Set the return stateid.
3511 */
3512 stateidp->seqid = stp->ls_stateid.seqid + 1;
3513 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3514 stateidp->seqid = 1;
3515 stateidp->other[0] = stp->ls_stateid.other[0];
3516 stateidp->other[1] = stp->ls_stateid.other[1];
3517 stateidp->other[2] = stp->ls_stateid.other[2];
3518 /*
3519 * Now, handle the three cases.
3520 */
3521 if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3522 /*
3523 * If the open doesn't need confirmation, it seems to me that
3524 * there is a client error, but I'll just log it and keep going?
3525 */
3526 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3527 printf("Nfsv4d: stray open confirm\n");
3528 stp->ls_openowner->ls_flags = 0;
3529 stp->ls_stateid.seqid++;
3530 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3531 stp->ls_stateid.seqid == 0)
3532 stp->ls_stateid.seqid = 1;
3533 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3534 clp->lc_flags |= LCL_STAMPEDSTABLE;
3535 len = clp->lc_idlen;
3536 NFSBCOPY(clp->lc_id, clidp, len);
3537 gotstate = 1;
3538 }
3539 } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3540 if (retwriteaccessp != NULL) {
3541 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
3542 *retwriteaccessp = 1;
3543 else
3544 *retwriteaccessp = 0;
3545 }
3546 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3547 ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3548 nfsrv_freeopen(stp, vp, 1, p);
3549 } else {
3550 nfsrv_freeopen(stp, NULL, 0, p);
3551 }
3552 } else {
3553 /*
3554 * Update the share bits, making sure that the new set are a
3555 * subset of the old ones.
3556 */
3557 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3558 if (~(stp->ls_flags) & bits) {
3559 NFSUNLOCKSTATE();
3560 error = NFSERR_INVAL;
3561 goto out;
3562 }
3563 stp->ls_flags = (bits | NFSLCK_OPEN);
3564 stp->ls_stateid.seqid++;
3565 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3566 stp->ls_stateid.seqid == 0)
3567 stp->ls_stateid.seqid = 1;
3568 }
3569 NFSUNLOCKSTATE();
3570
3571 /*
3572 * If the client just confirmed its first open, write a timestamp
3573 * to the stable storage file.
3574 */
3575 if (gotstate != 0) {
3576 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3577 nfsrv_backupstable();
3578 }
3579
3580 out:
3581 free(clidp, M_TEMP);
3582 NFSEXITCODE2(error, nd);
3583 return (error);
3584 }
3585
3586 /*
3587 * Delegation update. Does the purge and return.
3588 */
3589 int
nfsrv_delegupdate(struct nfsrv_descript * nd,nfsquad_t clientid,nfsv4stateid_t * stateidp,vnode_t vp,int op,struct ucred * cred,NFSPROC_T * p,int * retwriteaccessp)3590 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3591 nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3592 NFSPROC_T *p, int *retwriteaccessp)
3593 {
3594 struct nfsstate *stp;
3595 struct nfsclient *clp;
3596 int error = 0;
3597 fhandle_t fh;
3598
3599 /*
3600 * Do a sanity check against the file handle for DelegReturn.
3601 */
3602 if (vp) {
3603 error = nfsvno_getfh(vp, &fh, p);
3604 if (error)
3605 goto out;
3606 }
3607 /*
3608 * Check for restart conditions (client and server).
3609 */
3610 if (op == NFSV4OP_DELEGRETURN)
3611 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3612 stateidp, 0);
3613 else
3614 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3615 stateidp, 0);
3616
3617 NFSLOCKSTATE();
3618 /*
3619 * Get the open structure via clientid and stateid.
3620 */
3621 if (!error)
3622 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3623 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3624 if (error) {
3625 if (error == NFSERR_CBPATHDOWN)
3626 error = 0;
3627 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3628 error = NFSERR_STALESTATEID;
3629 }
3630 if (!error && op == NFSV4OP_DELEGRETURN) {
3631 error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3632 if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3633 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3634 error = NFSERR_OLDSTATEID;
3635 }
3636 /*
3637 * NFSERR_EXPIRED means that the state has gone away,
3638 * so Delegations have been purged. Just return ok.
3639 */
3640 if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3641 NFSUNLOCKSTATE();
3642 error = 0;
3643 goto out;
3644 }
3645 if (error) {
3646 NFSUNLOCKSTATE();
3647 goto out;
3648 }
3649
3650 if (op == NFSV4OP_DELEGRETURN) {
3651 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3652 sizeof (fhandle_t))) {
3653 NFSUNLOCKSTATE();
3654 error = NFSERR_BADSTATEID;
3655 goto out;
3656 }
3657 if (retwriteaccessp != NULL) {
3658 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
3659 *retwriteaccessp = 1;
3660 else
3661 *retwriteaccessp = 0;
3662 }
3663 nfsrv_freedeleg(stp);
3664 } else {
3665 nfsrv_freedeleglist(&clp->lc_olddeleg);
3666 }
3667 NFSUNLOCKSTATE();
3668 error = 0;
3669
3670 out:
3671 NFSEXITCODE(error);
3672 return (error);
3673 }
3674
3675 /*
3676 * Release lock owner.
3677 */
3678 int
nfsrv_releaselckown(struct nfsstate * new_stp,nfsquad_t clientid,NFSPROC_T * p)3679 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3680 NFSPROC_T *p)
3681 {
3682 struct nfsstate *stp, *nstp, *openstp, *ownstp;
3683 struct nfsclient *clp;
3684 int error = 0;
3685
3686 /*
3687 * Check for restart conditions (client and server).
3688 */
3689 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3690 &new_stp->ls_stateid, 0);
3691 if (error)
3692 goto out;
3693
3694 NFSLOCKSTATE();
3695 /*
3696 * Get the lock owner by name.
3697 */
3698 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3699 (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3700 if (error) {
3701 NFSUNLOCKSTATE();
3702 goto out;
3703 }
3704 LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3705 LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3706 stp = LIST_FIRST(&openstp->ls_open);
3707 while (stp != LIST_END(&openstp->ls_open)) {
3708 nstp = LIST_NEXT(stp, ls_list);
3709 /*
3710 * If the owner matches, check for locks and
3711 * then free or return an error.
3712 */
3713 if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3714 !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3715 stp->ls_ownerlen)){
3716 if (LIST_EMPTY(&stp->ls_lock)) {
3717 nfsrv_freelockowner(stp, NULL, 0, p);
3718 } else {
3719 NFSUNLOCKSTATE();
3720 error = NFSERR_LOCKSHELD;
3721 goto out;
3722 }
3723 }
3724 stp = nstp;
3725 }
3726 }
3727 }
3728 NFSUNLOCKSTATE();
3729
3730 out:
3731 NFSEXITCODE(error);
3732 return (error);
3733 }
3734
3735 /*
3736 * Get the file handle for a lock structure.
3737 */
3738 static int
nfsrv_getlockfh(vnode_t vp,u_short flags,struct nfslockfile * new_lfp,fhandle_t * nfhp,NFSPROC_T * p)3739 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3740 fhandle_t *nfhp, NFSPROC_T *p)
3741 {
3742 fhandle_t *fhp = NULL;
3743 int error;
3744
3745 /*
3746 * For lock, use the new nfslock structure, otherwise just
3747 * a fhandle_t on the stack.
3748 */
3749 if (flags & NFSLCK_OPEN) {
3750 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3751 fhp = &new_lfp->lf_fh;
3752 } else if (nfhp) {
3753 fhp = nfhp;
3754 } else {
3755 panic("nfsrv_getlockfh");
3756 }
3757 error = nfsvno_getfh(vp, fhp, p);
3758 NFSEXITCODE(error);
3759 return (error);
3760 }
3761
3762 /*
3763 * Get an nfs lock structure. Allocate one, as required, and return a
3764 * pointer to it.
3765 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3766 */
3767 static int
nfsrv_getlockfile(u_short flags,struct nfslockfile ** new_lfpp,struct nfslockfile ** lfpp,fhandle_t * nfhp,int lockit)3768 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3769 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3770 {
3771 struct nfslockfile *lfp;
3772 fhandle_t *fhp = NULL, *tfhp;
3773 struct nfslockhashhead *hp;
3774 struct nfslockfile *new_lfp = NULL;
3775
3776 /*
3777 * For lock, use the new nfslock structure, otherwise just
3778 * a fhandle_t on the stack.
3779 */
3780 if (flags & NFSLCK_OPEN) {
3781 new_lfp = *new_lfpp;
3782 fhp = &new_lfp->lf_fh;
3783 } else if (nfhp) {
3784 fhp = nfhp;
3785 } else {
3786 panic("nfsrv_getlockfile");
3787 }
3788
3789 hp = NFSLOCKHASH(fhp);
3790 LIST_FOREACH(lfp, hp, lf_hash) {
3791 tfhp = &lfp->lf_fh;
3792 if (NFSVNO_CMPFH(fhp, tfhp)) {
3793 if (lockit)
3794 nfsrv_locklf(lfp);
3795 *lfpp = lfp;
3796 return (0);
3797 }
3798 }
3799 if (!(flags & NFSLCK_OPEN))
3800 return (-1);
3801
3802 /*
3803 * No match, so chain the new one into the list.
3804 */
3805 LIST_INIT(&new_lfp->lf_open);
3806 LIST_INIT(&new_lfp->lf_lock);
3807 LIST_INIT(&new_lfp->lf_deleg);
3808 LIST_INIT(&new_lfp->lf_locallock);
3809 LIST_INIT(&new_lfp->lf_rollback);
3810 new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3811 new_lfp->lf_locallock_lck.nfslock_lock = 0;
3812 new_lfp->lf_usecount = 0;
3813 LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3814 *lfpp = new_lfp;
3815 *new_lfpp = NULL;
3816 return (0);
3817 }
3818
3819 /*
3820 * This function adds a nfslock lock structure to the list for the associated
3821 * nfsstate and nfslockfile structures. It will be inserted after the
3822 * entry pointed at by insert_lop.
3823 */
3824 static void
nfsrv_insertlock(struct nfslock * new_lop,struct nfslock * insert_lop,struct nfsstate * stp,struct nfslockfile * lfp)3825 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3826 struct nfsstate *stp, struct nfslockfile *lfp)
3827 {
3828 struct nfslock *lop, *nlop;
3829
3830 new_lop->lo_stp = stp;
3831 new_lop->lo_lfp = lfp;
3832
3833 if (stp != NULL) {
3834 /* Insert in increasing lo_first order */
3835 lop = LIST_FIRST(&lfp->lf_lock);
3836 if (lop == LIST_END(&lfp->lf_lock) ||
3837 new_lop->lo_first <= lop->lo_first) {
3838 LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3839 } else {
3840 nlop = LIST_NEXT(lop, lo_lckfile);
3841 while (nlop != LIST_END(&lfp->lf_lock) &&
3842 nlop->lo_first < new_lop->lo_first) {
3843 lop = nlop;
3844 nlop = LIST_NEXT(lop, lo_lckfile);
3845 }
3846 LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3847 }
3848 } else {
3849 new_lop->lo_lckfile.le_prev = NULL; /* list not used */
3850 }
3851
3852 /*
3853 * Insert after insert_lop, which is overloaded as stp or lfp for
3854 * an empty list.
3855 */
3856 if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3857 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3858 else if ((struct nfsstate *)insert_lop == stp)
3859 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3860 else
3861 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3862 if (stp != NULL) {
3863 NFSD_VNET(nfsstatsv1_p)->srvlocks++;
3864 nfsrv_openpluslock++;
3865 }
3866 }
3867
3868 /*
3869 * This function updates the locking for a lock owner and given file. It
3870 * maintains a list of lock ranges ordered on increasing file offset that
3871 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3872 * It always adds new_lop to the list and sometimes uses the one pointed
3873 * at by other_lopp.
3874 */
3875 static void
nfsrv_updatelock(struct nfsstate * stp,struct nfslock ** new_lopp,struct nfslock ** other_lopp,struct nfslockfile * lfp)3876 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3877 struct nfslock **other_lopp, struct nfslockfile *lfp)
3878 {
3879 struct nfslock *new_lop = *new_lopp;
3880 struct nfslock *lop, *tlop, *ilop;
3881 struct nfslock *other_lop = *other_lopp;
3882 int unlock = 0, myfile = 0;
3883 u_int64_t tmp;
3884
3885 /*
3886 * Work down the list until the lock is merged.
3887 */
3888 if (new_lop->lo_flags & NFSLCK_UNLOCK)
3889 unlock = 1;
3890 if (stp != NULL) {
3891 ilop = (struct nfslock *)stp;
3892 lop = LIST_FIRST(&stp->ls_lock);
3893 } else {
3894 ilop = (struct nfslock *)lfp;
3895 lop = LIST_FIRST(&lfp->lf_locallock);
3896 }
3897 while (lop != NULL) {
3898 /*
3899 * Only check locks for this file that aren't before the start of
3900 * new lock's range.
3901 */
3902 if (lop->lo_lfp == lfp) {
3903 myfile = 1;
3904 if (lop->lo_end >= new_lop->lo_first) {
3905 if (new_lop->lo_end < lop->lo_first) {
3906 /*
3907 * If the new lock ends before the start of the
3908 * current lock's range, no merge, just insert
3909 * the new lock.
3910 */
3911 break;
3912 }
3913 if (new_lop->lo_flags == lop->lo_flags ||
3914 (new_lop->lo_first <= lop->lo_first &&
3915 new_lop->lo_end >= lop->lo_end)) {
3916 /*
3917 * This lock can be absorbed by the new lock/unlock.
3918 * This happens when it covers the entire range
3919 * of the old lock or is contiguous
3920 * with the old lock and is of the same type or an
3921 * unlock.
3922 */
3923 if (lop->lo_first < new_lop->lo_first)
3924 new_lop->lo_first = lop->lo_first;
3925 if (lop->lo_end > new_lop->lo_end)
3926 new_lop->lo_end = lop->lo_end;
3927 tlop = lop;
3928 lop = LIST_NEXT(lop, lo_lckowner);
3929 nfsrv_freenfslock(tlop);
3930 continue;
3931 }
3932
3933 /*
3934 * All these cases are for contiguous locks that are not the
3935 * same type, so they can't be merged.
3936 */
3937 if (new_lop->lo_first <= lop->lo_first) {
3938 /*
3939 * This case is where the new lock overlaps with the
3940 * first part of the old lock. Move the start of the
3941 * old lock to just past the end of the new lock. The
3942 * new lock will be inserted in front of the old, since
3943 * ilop hasn't been updated. (We are done now.)
3944 */
3945 lop->lo_first = new_lop->lo_end;
3946 break;
3947 }
3948 if (new_lop->lo_end >= lop->lo_end) {
3949 /*
3950 * This case is where the new lock overlaps with the
3951 * end of the old lock's range. Move the old lock's
3952 * end to just before the new lock's first and insert
3953 * the new lock after the old lock.
3954 * Might not be done yet, since the new lock could
3955 * overlap further locks with higher ranges.
3956 */
3957 lop->lo_end = new_lop->lo_first;
3958 ilop = lop;
3959 lop = LIST_NEXT(lop, lo_lckowner);
3960 continue;
3961 }
3962 /*
3963 * The final case is where the new lock's range is in the
3964 * middle of the current lock's and splits the current lock
3965 * up. Use *other_lopp to handle the second part of the
3966 * split old lock range. (We are done now.)
3967 * For unlock, we use new_lop as other_lop and tmp, since
3968 * other_lop and new_lop are the same for this case.
3969 * We noted the unlock case above, so we don't need
3970 * new_lop->lo_flags any longer.
3971 */
3972 tmp = new_lop->lo_first;
3973 if (other_lop == NULL) {
3974 if (!unlock)
3975 panic("nfsd srv update unlock");
3976 other_lop = new_lop;
3977 *new_lopp = NULL;
3978 }
3979 other_lop->lo_first = new_lop->lo_end;
3980 other_lop->lo_end = lop->lo_end;
3981 other_lop->lo_flags = lop->lo_flags;
3982 other_lop->lo_stp = stp;
3983 other_lop->lo_lfp = lfp;
3984 lop->lo_end = tmp;
3985 nfsrv_insertlock(other_lop, lop, stp, lfp);
3986 *other_lopp = NULL;
3987 ilop = lop;
3988 break;
3989 }
3990 }
3991 ilop = lop;
3992 lop = LIST_NEXT(lop, lo_lckowner);
3993 if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3994 break;
3995 }
3996
3997 /*
3998 * Insert the new lock in the list at the appropriate place.
3999 */
4000 if (!unlock) {
4001 nfsrv_insertlock(new_lop, ilop, stp, lfp);
4002 *new_lopp = NULL;
4003 }
4004 }
4005
4006 /*
4007 * This function handles sequencing of locks, etc.
4008 * It returns an error that indicates what the caller should do.
4009 */
4010 static int
nfsrv_checkseqid(struct nfsrv_descript * nd,u_int32_t seqid,struct nfsstate * stp,struct nfsrvcache * op)4011 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
4012 struct nfsstate *stp, struct nfsrvcache *op)
4013 {
4014 int error = 0;
4015
4016 if ((nd->nd_flag & ND_NFSV41) != 0)
4017 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
4018 goto out;
4019 if (op != nd->nd_rp)
4020 panic("nfsrvstate checkseqid");
4021 if (!(op->rc_flag & RC_INPROG))
4022 panic("nfsrvstate not inprog");
4023 if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
4024 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
4025 panic("nfsrvstate op refcnt");
4026 }
4027
4028 /* If ND_ERELOOKUP is set, the seqid has already been handled. */
4029 if ((nd->nd_flag & ND_ERELOOKUP) != 0)
4030 goto out;
4031
4032 if ((stp->ls_seq + 1) == seqid) {
4033 if (stp->ls_op)
4034 nfsrvd_derefcache(stp->ls_op);
4035 stp->ls_op = op;
4036 nfsrvd_refcache(op);
4037 stp->ls_seq = seqid;
4038 goto out;
4039 } else if (stp->ls_seq == seqid && stp->ls_op &&
4040 op->rc_xid == stp->ls_op->rc_xid &&
4041 op->rc_refcnt == 0 &&
4042 op->rc_reqlen == stp->ls_op->rc_reqlen &&
4043 op->rc_cksum == stp->ls_op->rc_cksum) {
4044 if (stp->ls_op->rc_flag & RC_INPROG) {
4045 error = NFSERR_DONTREPLY;
4046 goto out;
4047 }
4048 nd->nd_rp = stp->ls_op;
4049 nd->nd_rp->rc_flag |= RC_INPROG;
4050 nfsrvd_delcache(op);
4051 error = NFSERR_REPLYFROMCACHE;
4052 goto out;
4053 }
4054 error = NFSERR_BADSEQID;
4055
4056 out:
4057 NFSEXITCODE2(error, nd);
4058 return (error);
4059 }
4060
4061 /*
4062 * Get the client ip address for callbacks. If the strings can't be parsed,
4063 * just set lc_program to 0 to indicate no callbacks are possible.
4064 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
4065 * the address to the client's transport address. This won't be used
4066 * for callbacks, but can be printed out by nfsstats for info.)
4067 * Return error if the xdr can't be parsed, 0 otherwise.
4068 */
4069 int
nfsrv_getclientipaddr(struct nfsrv_descript * nd,struct nfsclient * clp)4070 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
4071 {
4072 u_int32_t *tl;
4073 u_char *cp, *cp2;
4074 int i, j, maxalen = 0, minalen = 0;
4075 sa_family_t af;
4076 #ifdef INET
4077 struct sockaddr_in *rin = NULL, *sin;
4078 #endif
4079 #ifdef INET6
4080 struct sockaddr_in6 *rin6 = NULL, *sin6;
4081 #endif
4082 u_char *addr;
4083 int error = 0, cantparse = 0;
4084 union {
4085 in_addr_t ival;
4086 u_char cval[4];
4087 } ip;
4088 union {
4089 in_port_t sval;
4090 u_char cval[2];
4091 } port;
4092
4093 /* 8 is the maximum length of the port# string. */
4094 addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
4095 clp->lc_req.nr_client = NULL;
4096 clp->lc_req.nr_lock = 0;
4097 af = AF_UNSPEC;
4098 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4099 i = fxdr_unsigned(int, *tl);
4100 if (i >= 3 && i <= 4) {
4101 error = nfsrv_mtostr(nd, addr, i);
4102 if (error)
4103 goto nfsmout;
4104 #ifdef INET
4105 if (!strcmp(addr, "tcp")) {
4106 clp->lc_flags |= LCL_TCPCALLBACK;
4107 clp->lc_req.nr_sotype = SOCK_STREAM;
4108 clp->lc_req.nr_soproto = IPPROTO_TCP;
4109 af = AF_INET;
4110 } else if (!strcmp(addr, "udp")) {
4111 clp->lc_req.nr_sotype = SOCK_DGRAM;
4112 clp->lc_req.nr_soproto = IPPROTO_UDP;
4113 af = AF_INET;
4114 }
4115 #endif
4116 #ifdef INET6
4117 if (af == AF_UNSPEC) {
4118 if (!strcmp(addr, "tcp6")) {
4119 clp->lc_flags |= LCL_TCPCALLBACK;
4120 clp->lc_req.nr_sotype = SOCK_STREAM;
4121 clp->lc_req.nr_soproto = IPPROTO_TCP;
4122 af = AF_INET6;
4123 } else if (!strcmp(addr, "udp6")) {
4124 clp->lc_req.nr_sotype = SOCK_DGRAM;
4125 clp->lc_req.nr_soproto = IPPROTO_UDP;
4126 af = AF_INET6;
4127 }
4128 }
4129 #endif
4130 if (af == AF_UNSPEC) {
4131 cantparse = 1;
4132 }
4133 } else {
4134 cantparse = 1;
4135 if (i > 0) {
4136 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4137 if (error)
4138 goto nfsmout;
4139 }
4140 }
4141 /*
4142 * The caller has allocated clp->lc_req.nr_nam to be large enough
4143 * for either AF_INET or AF_INET6 and zeroed out the contents.
4144 * maxalen is set to the maximum length of the host IP address string
4145 * plus 8 for the maximum length of the port#.
4146 * minalen is set to the minimum length of the host IP address string
4147 * plus 4 for the minimum length of the port#.
4148 * These lengths do not include NULL termination,
4149 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
4150 */
4151 switch (af) {
4152 #ifdef INET
4153 case AF_INET:
4154 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4155 rin->sin_family = AF_INET;
4156 rin->sin_len = sizeof(struct sockaddr_in);
4157 maxalen = INET_ADDRSTRLEN - 1 + 8;
4158 minalen = 7 + 4;
4159 break;
4160 #endif
4161 #ifdef INET6
4162 case AF_INET6:
4163 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4164 rin6->sin6_family = AF_INET6;
4165 rin6->sin6_len = sizeof(struct sockaddr_in6);
4166 maxalen = INET6_ADDRSTRLEN - 1 + 8;
4167 minalen = 3 + 4;
4168 break;
4169 #endif
4170 }
4171 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4172 i = fxdr_unsigned(int, *tl);
4173 if (i < 0) {
4174 error = NFSERR_BADXDR;
4175 goto nfsmout;
4176 } else if (i == 0) {
4177 cantparse = 1;
4178 } else if (!cantparse && i <= maxalen && i >= minalen) {
4179 error = nfsrv_mtostr(nd, addr, i);
4180 if (error)
4181 goto nfsmout;
4182
4183 /*
4184 * Parse out the address fields. We expect 6 decimal numbers
4185 * separated by '.'s for AF_INET and two decimal numbers
4186 * preceeded by '.'s for AF_INET6.
4187 */
4188 cp = NULL;
4189 switch (af) {
4190 #ifdef INET6
4191 /*
4192 * For AF_INET6, first parse the host address.
4193 */
4194 case AF_INET6:
4195 cp = strchr(addr, '.');
4196 if (cp != NULL) {
4197 *cp++ = '\0';
4198 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
4199 i = 4;
4200 else {
4201 cp = NULL;
4202 cantparse = 1;
4203 }
4204 }
4205 break;
4206 #endif
4207 #ifdef INET
4208 case AF_INET:
4209 cp = addr;
4210 i = 0;
4211 break;
4212 #endif
4213 }
4214 while (cp != NULL && *cp && i < 6) {
4215 cp2 = cp;
4216 while (*cp2 && *cp2 != '.')
4217 cp2++;
4218 if (*cp2)
4219 *cp2++ = '\0';
4220 else if (i != 5) {
4221 cantparse = 1;
4222 break;
4223 }
4224 j = nfsrv_getipnumber(cp);
4225 if (j >= 0) {
4226 if (i < 4)
4227 ip.cval[3 - i] = j;
4228 else
4229 port.cval[5 - i] = j;
4230 } else {
4231 cantparse = 1;
4232 break;
4233 }
4234 cp = cp2;
4235 i++;
4236 }
4237 if (!cantparse) {
4238 /*
4239 * The host address INADDR_ANY is (mis)used to indicate
4240 * "there is no valid callback address".
4241 */
4242 switch (af) {
4243 #ifdef INET6
4244 case AF_INET6:
4245 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
4246 &in6addr_any))
4247 rin6->sin6_port = htons(port.sval);
4248 else
4249 cantparse = 1;
4250 break;
4251 #endif
4252 #ifdef INET
4253 case AF_INET:
4254 if (ip.ival != INADDR_ANY) {
4255 rin->sin_addr.s_addr = htonl(ip.ival);
4256 rin->sin_port = htons(port.sval);
4257 } else {
4258 cantparse = 1;
4259 }
4260 break;
4261 #endif
4262 }
4263 }
4264 } else {
4265 cantparse = 1;
4266 if (i > 0) {
4267 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4268 if (error)
4269 goto nfsmout;
4270 }
4271 }
4272 if (cantparse) {
4273 switch (nd->nd_nam->sa_family) {
4274 #ifdef INET
4275 case AF_INET:
4276 sin = (struct sockaddr_in *)nd->nd_nam;
4277 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4278 rin->sin_family = AF_INET;
4279 rin->sin_len = sizeof(struct sockaddr_in);
4280 rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4281 rin->sin_port = 0x0;
4282 break;
4283 #endif
4284 #ifdef INET6
4285 case AF_INET6:
4286 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4287 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4288 rin6->sin6_family = AF_INET6;
4289 rin6->sin6_len = sizeof(struct sockaddr_in6);
4290 rin6->sin6_addr = sin6->sin6_addr;
4291 rin6->sin6_port = 0x0;
4292 break;
4293 #endif
4294 }
4295 clp->lc_program = 0;
4296 }
4297 nfsmout:
4298 free(addr, M_TEMP);
4299 NFSEXITCODE2(error, nd);
4300 return (error);
4301 }
4302
4303 /*
4304 * Turn a string of up to three decimal digits into a number. Return -1 upon
4305 * error.
4306 */
4307 static int
nfsrv_getipnumber(u_char * cp)4308 nfsrv_getipnumber(u_char *cp)
4309 {
4310 int i = 0, j = 0;
4311
4312 while (*cp) {
4313 if (j > 2 || *cp < '0' || *cp > '9')
4314 return (-1);
4315 i *= 10;
4316 i += (*cp - '0');
4317 cp++;
4318 j++;
4319 }
4320 if (i < 256)
4321 return (i);
4322 return (-1);
4323 }
4324
4325 /*
4326 * This function checks for restart conditions.
4327 */
4328 static int
nfsrv_checkrestart(nfsquad_t clientid,u_int32_t flags,nfsv4stateid_t * stateidp,int specialid)4329 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4330 nfsv4stateid_t *stateidp, int specialid)
4331 {
4332 int ret = 0;
4333
4334 /*
4335 * First check for a server restart. Open, LockT, ReleaseLockOwner
4336 * and DelegPurge have a clientid, the rest a stateid.
4337 */
4338 if (flags &
4339 (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4340 if (clientid.lval[0] != NFSD_VNET(nfsrvboottime)) {
4341 ret = NFSERR_STALECLIENTID;
4342 goto out;
4343 }
4344 } else if (stateidp->other[0] != NFSD_VNET(nfsrvboottime) &&
4345 specialid == 0) {
4346 ret = NFSERR_STALESTATEID;
4347 goto out;
4348 }
4349
4350 /*
4351 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4352 * not use a lock/open owner seqid#, so the check can be done now.
4353 * (The others will be checked, as required, later.)
4354 */
4355 if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4356 goto out;
4357
4358 NFSLOCKSTATE();
4359 ret = nfsrv_checkgrace(NULL, NULL, flags);
4360 NFSUNLOCKSTATE();
4361
4362 out:
4363 NFSEXITCODE(ret);
4364 return (ret);
4365 }
4366
4367 /*
4368 * Check for grace.
4369 */
4370 static int
nfsrv_checkgrace(struct nfsrv_descript * nd,struct nfsclient * clp,u_int32_t flags)4371 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4372 u_int32_t flags)
4373 {
4374 int error = 0, notreclaimed;
4375 struct nfsrv_stable *sp;
4376
4377 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE |
4378 NFSNSF_GRACEOVER)) == 0) {
4379 /*
4380 * First, check to see if all of the clients have done a
4381 * ReclaimComplete. If so, grace can end now.
4382 */
4383 notreclaimed = 0;
4384 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
4385 nst_list) {
4386 if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4387 notreclaimed = 1;
4388 break;
4389 }
4390 }
4391 if (notreclaimed == 0)
4392 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
4393 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4394 }
4395
4396 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) {
4397 if (flags & NFSLCK_RECLAIM) {
4398 error = NFSERR_NOGRACE;
4399 goto out;
4400 }
4401 } else {
4402 if (!(flags & NFSLCK_RECLAIM)) {
4403 error = NFSERR_GRACE;
4404 goto out;
4405 }
4406 if (nd != NULL && clp != NULL &&
4407 (nd->nd_flag & ND_NFSV41) != 0 &&
4408 (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4409 error = NFSERR_NOGRACE;
4410 goto out;
4411 }
4412
4413 /*
4414 * If grace is almost over and we are still getting Reclaims,
4415 * extend grace a bit.
4416 */
4417 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4418 NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
4419 NFSD_VNET(nfsrv_stablefirst).nsf_eograce =
4420 NFSD_MONOSEC + NFSRV_LEASEDELTA;
4421 }
4422
4423 out:
4424 NFSEXITCODE(error);
4425 return (error);
4426 }
4427
4428 /*
4429 * Do a server callback.
4430 * The "trunc" argument is slightly overloaded and refers to different
4431 * boolean arguments for CBRECALL and CBLAYOUTRECALL.
4432 */
4433 static int
nfsrv_docallback(struct nfsclient * clp,int procnum,nfsv4stateid_t * stateidp,int trunc,fhandle_t * fhp,struct nfsvattr * nap,nfsattrbit_t * attrbitp,int laytype,NFSPROC_T * p)4434 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
4435 int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
4436 int laytype, NFSPROC_T *p)
4437 {
4438 struct mbuf *m;
4439 u_int32_t *tl;
4440 struct nfsrv_descript *nd;
4441 struct ucred *cred;
4442 int error = 0, slotpos;
4443 u_int32_t callback;
4444 struct nfsdsession *sep = NULL;
4445 uint64_t tval;
4446 bool dotls;
4447
4448 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
4449 cred = newnfs_getcred();
4450 NFSLOCKSTATE(); /* mostly for lc_cbref++ */
4451 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4452 NFSUNLOCKSTATE();
4453 panic("docallb");
4454 }
4455 clp->lc_cbref++;
4456
4457 /*
4458 * Fill the callback program# and version into the request
4459 * structure for newnfs_connect() to use.
4460 */
4461 clp->lc_req.nr_prog = clp->lc_program;
4462 #ifdef notnow
4463 if ((clp->lc_flags & LCL_NFSV41) != 0)
4464 clp->lc_req.nr_vers = NFSV41_CBVERS;
4465 else
4466 #endif
4467 clp->lc_req.nr_vers = NFSV4_CBVERS;
4468
4469 /*
4470 * First, fill in some of the fields of nd and cr.
4471 */
4472 nd->nd_flag = ND_NFSV4;
4473 if (clp->lc_flags & LCL_GSS)
4474 nd->nd_flag |= ND_KERBV;
4475 if ((clp->lc_flags & LCL_NFSV41) != 0)
4476 nd->nd_flag |= ND_NFSV41;
4477 if ((clp->lc_flags & LCL_NFSV42) != 0)
4478 nd->nd_flag |= ND_NFSV42;
4479 nd->nd_repstat = 0;
4480 cred->cr_uid = clp->lc_uid;
4481 cred->cr_gid = clp->lc_gid;
4482 callback = clp->lc_callback;
4483 NFSUNLOCKSTATE();
4484 cred->cr_ngroups = 1;
4485
4486 /*
4487 * Get the first mbuf for the request.
4488 */
4489 MGET(m, M_WAITOK, MT_DATA);
4490 m->m_len = 0;
4491 nd->nd_mreq = nd->nd_mb = m;
4492 nd->nd_bpos = mtod(m, caddr_t);
4493
4494 /*
4495 * and build the callback request.
4496 */
4497 if (procnum == NFSV4OP_CBGETATTR) {
4498 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4499 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4500 "CB Getattr", &sep, &slotpos);
4501 if (error != 0) {
4502 m_freem(nd->nd_mreq);
4503 goto errout;
4504 }
4505 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4506 (void)nfsrv_putattrbit(nd, attrbitp);
4507 } else if (procnum == NFSV4OP_CBRECALL) {
4508 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4509 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4510 "CB Recall", &sep, &slotpos);
4511 if (error != 0) {
4512 m_freem(nd->nd_mreq);
4513 goto errout;
4514 }
4515 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4516 *tl++ = txdr_unsigned(stateidp->seqid);
4517 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4518 NFSX_STATEIDOTHER);
4519 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4520 if (trunc)
4521 *tl = newnfs_true;
4522 else
4523 *tl = newnfs_false;
4524 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4525 } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
4526 NFSD_DEBUG(4, "docallback layout recall\n");
4527 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4528 error = nfsrv_cbcallargs(nd, clp, callback,
4529 NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
4530 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
4531 if (error != 0) {
4532 m_freem(nd->nd_mreq);
4533 goto errout;
4534 }
4535 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
4536 *tl++ = txdr_unsigned(laytype);
4537 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
4538 if (trunc)
4539 *tl++ = newnfs_true;
4540 else
4541 *tl++ = newnfs_false;
4542 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
4543 (void)nfsm_fhtom(NULL, nd, (uint8_t *)fhp, NFSX_MYFH, 0);
4544 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
4545 tval = 0;
4546 txdr_hyper(tval, tl); tl += 2;
4547 tval = UINT64_MAX;
4548 txdr_hyper(tval, tl); tl += 2;
4549 *tl++ = txdr_unsigned(stateidp->seqid);
4550 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
4551 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4552 NFSD_DEBUG(4, "aft args\n");
4553 } else if (procnum == NFSV4PROC_CBNULL) {
4554 nd->nd_procnum = NFSV4PROC_CBNULL;
4555 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4556 error = nfsv4_getcbsession(clp, &sep);
4557 if (error != 0) {
4558 m_freem(nd->nd_mreq);
4559 goto errout;
4560 }
4561 }
4562 } else {
4563 error = NFSERR_SERVERFAULT;
4564 m_freem(nd->nd_mreq);
4565 goto errout;
4566 }
4567
4568 /*
4569 * Call newnfs_connect(), as required, and then newnfs_request().
4570 */
4571 dotls = false;
4572 if ((clp->lc_flags & LCL_TLSCB) != 0)
4573 dotls = true;
4574 (void) newnfs_sndlock(&clp->lc_req.nr_lock);
4575 if (clp->lc_req.nr_client == NULL) {
4576 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4577 error = ECONNREFUSED;
4578 if (procnum != NFSV4PROC_CBNULL)
4579 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4580 true);
4581 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4582 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4583 error = newnfs_connect(NULL, &clp->lc_req, cred,
4584 NULL, 1, dotls, &clp->lc_req.nr_client);
4585 else
4586 error = newnfs_connect(NULL, &clp->lc_req, cred,
4587 NULL, 3, dotls, &clp->lc_req.nr_client);
4588 }
4589 newnfs_sndunlock(&clp->lc_req.nr_lock);
4590 NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
4591 if (!error) {
4592 if ((nd->nd_flag & ND_NFSV41) != 0) {
4593 KASSERT(sep != NULL, ("sep NULL"));
4594 if (sep->sess_cbsess.nfsess_xprt != NULL)
4595 error = newnfs_request(nd, NULL, clp,
4596 &clp->lc_req, NULL, NULL, cred,
4597 clp->lc_program, clp->lc_req.nr_vers, NULL,
4598 1, NULL, &sep->sess_cbsess);
4599 else {
4600 /*
4601 * This should probably never occur, but if a
4602 * client somehow does an RPC without a
4603 * SequenceID Op that causes a callback just
4604 * after the nfsd threads have been terminated
4605 * and restarted we could conceivably get here
4606 * without a backchannel xprt.
4607 */
4608 printf("nfsrv_docallback: no xprt\n");
4609 error = ECONNREFUSED;
4610 }
4611 NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
4612 if (error != 0 && procnum != NFSV4PROC_CBNULL) {
4613 /*
4614 * It is likely that the callback was never
4615 * processed by the client and, as such,
4616 * the sequence# for the session slot needs
4617 * to be backed up by one to avoid a
4618 * NFSERR_SEQMISORDERED error reply.
4619 * For the unlikely case where the callback
4620 * was processed by the client, this will
4621 * make the next callback on the slot
4622 * appear to be a retry.
4623 * Since callbacks never specify that the
4624 * reply be cached, this "apparent retry"
4625 * should not be a problem.
4626 */
4627 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4628 true);
4629 }
4630 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4631 } else
4632 error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4633 NULL, NULL, cred, clp->lc_program,
4634 clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4635 }
4636 errout:
4637 NFSFREECRED(cred);
4638
4639 /*
4640 * If error is set here, the Callback path isn't working
4641 * properly, so twiddle the appropriate LCL_ flags.
4642 * (nd_repstat != 0 indicates the Callback path is working,
4643 * but the callback failed on the client.)
4644 */
4645 if (error) {
4646 /*
4647 * Mark the callback pathway down, which disabled issuing
4648 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4649 */
4650 NFSLOCKSTATE();
4651 clp->lc_flags |= LCL_CBDOWN;
4652 NFSUNLOCKSTATE();
4653 } else {
4654 /*
4655 * Callback worked. If the callback path was down, disable
4656 * callbacks, so no more delegations will be issued. (This
4657 * is done on the assumption that the callback pathway is
4658 * flakey.)
4659 */
4660 NFSLOCKSTATE();
4661 if (clp->lc_flags & LCL_CBDOWN)
4662 clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4663 NFSUNLOCKSTATE();
4664 if (nd->nd_repstat) {
4665 error = nd->nd_repstat;
4666 NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
4667 procnum, error);
4668 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4669 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4670 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4671 p, NULL);
4672 m_freem(nd->nd_mrep);
4673 }
4674 NFSLOCKSTATE();
4675 clp->lc_cbref--;
4676 if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4677 clp->lc_flags &= ~LCL_WAKEUPWANTED;
4678 wakeup(clp);
4679 }
4680 NFSUNLOCKSTATE();
4681
4682 free(nd, M_TEMP);
4683 NFSEXITCODE(error);
4684 return (error);
4685 }
4686
4687 /*
4688 * Set up the compound RPC for the callback.
4689 */
4690 static int
nfsrv_cbcallargs(struct nfsrv_descript * nd,struct nfsclient * clp,uint32_t callback,int op,const char * optag,struct nfsdsession ** sepp,int * slotposp)4691 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4692 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
4693 int *slotposp)
4694 {
4695 uint32_t *tl;
4696 int error, len;
4697
4698 len = strlen(optag);
4699 (void)nfsm_strtom(nd, optag, len);
4700 NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4701 if ((nd->nd_flag & ND_NFSV41) != 0) {
4702 if ((nd->nd_flag & ND_NFSV42) != 0)
4703 *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
4704 else
4705 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4706 *tl++ = txdr_unsigned(callback);
4707 *tl++ = txdr_unsigned(2);
4708 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4709 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
4710 if (error != 0)
4711 return (error);
4712 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4713 *tl = txdr_unsigned(op);
4714 } else {
4715 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4716 *tl++ = txdr_unsigned(callback);
4717 *tl++ = txdr_unsigned(1);
4718 *tl = txdr_unsigned(op);
4719 }
4720 return (0);
4721 }
4722
4723 /*
4724 * Return the next index# for a clientid. Mostly just increment and return
4725 * the next one, but... if the 32bit unsigned does actually wrap around,
4726 * it should be rebooted.
4727 * At an average rate of one new client per second, it will wrap around in
4728 * approximately 136 years. (I think the server will have been shut
4729 * down or rebooted before then.)
4730 */
4731 static u_int32_t
nfsrv_nextclientindex(void)4732 nfsrv_nextclientindex(void)
4733 {
4734 static u_int32_t client_index = 0;
4735
4736 client_index++;
4737 if (client_index != 0)
4738 return (client_index);
4739
4740 printf("%s: out of clientids\n", __func__);
4741 return (client_index);
4742 }
4743
4744 /*
4745 * Return the next index# for a stateid. Mostly just increment and return
4746 * the next one, but... if the 32bit unsigned does actually wrap around
4747 * (will a BSD server stay up that long?), find
4748 * new start and end values.
4749 */
4750 static u_int32_t
nfsrv_nextstateindex(struct nfsclient * clp)4751 nfsrv_nextstateindex(struct nfsclient *clp)
4752 {
4753 struct nfsstate *stp;
4754 int i;
4755 u_int32_t canuse, min_index, max_index;
4756
4757 if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4758 clp->lc_stateindex++;
4759 if (clp->lc_stateindex != clp->lc_statemaxindex)
4760 return (clp->lc_stateindex);
4761 }
4762
4763 /*
4764 * Yuck, we've hit the end.
4765 * Look for a new min and max.
4766 */
4767 min_index = 0;
4768 max_index = 0xffffffff;
4769 for (i = 0; i < nfsrv_statehashsize; i++) {
4770 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4771 if (stp->ls_stateid.other[2] > 0x80000000) {
4772 if (stp->ls_stateid.other[2] < max_index)
4773 max_index = stp->ls_stateid.other[2];
4774 } else {
4775 if (stp->ls_stateid.other[2] > min_index)
4776 min_index = stp->ls_stateid.other[2];
4777 }
4778 }
4779 }
4780
4781 /*
4782 * Yikes, highly unlikely, but I'll handle it anyhow.
4783 */
4784 if (min_index == 0x80000000 && max_index == 0x80000001) {
4785 canuse = 0;
4786 /*
4787 * Loop around until we find an unused entry. Return that
4788 * and set LCL_INDEXNOTOK, so the search will continue next time.
4789 * (This is one of those rare cases where a goto is the
4790 * cleanest way to code the loop.)
4791 */
4792 tryagain:
4793 for (i = 0; i < nfsrv_statehashsize; i++) {
4794 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4795 if (stp->ls_stateid.other[2] == canuse) {
4796 canuse++;
4797 goto tryagain;
4798 }
4799 }
4800 }
4801 clp->lc_flags |= LCL_INDEXNOTOK;
4802 return (canuse);
4803 }
4804
4805 /*
4806 * Ok to start again from min + 1.
4807 */
4808 clp->lc_stateindex = min_index + 1;
4809 clp->lc_statemaxindex = max_index;
4810 clp->lc_flags &= ~LCL_INDEXNOTOK;
4811 return (clp->lc_stateindex);
4812 }
4813
4814 /*
4815 * The following functions handle the stable storage file that deals with
4816 * the edge conditions described in RFC3530 Sec. 8.6.3.
4817 * The file is as follows:
4818 * - a single record at the beginning that has the lease time of the
4819 * previous server instance (before the last reboot) and the nfsrvboottime
4820 * values for the previous server boots.
4821 * These previous boot times are used to ensure that the current
4822 * nfsrvboottime does not, somehow, get set to a previous one.
4823 * (This is important so that Stale ClientIDs and StateIDs can
4824 * be recognized.)
4825 * The number of previous nfsvrboottime values precedes the list.
4826 * - followed by some number of appended records with:
4827 * - client id string
4828 * - flag that indicates it is a record revoking state via lease
4829 * expiration or similar
4830 * OR has successfully acquired state.
4831 * These structures vary in length, with the client string at the end, up
4832 * to NFSV4_OPAQUELIMIT in size.
4833 *
4834 * At the end of the grace period, the file is truncated, the first
4835 * record is rewritten with updated information and any acquired state
4836 * records for successful reclaims of state are written.
4837 *
4838 * Subsequent records are appended when the first state is issued to
4839 * a client and when state is revoked for a client.
4840 *
4841 * When reading the file in, state issued records that come later in
4842 * the file override older ones, since the append log is in cronological order.
4843 * If, for some reason, the file can't be read, the grace period is
4844 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4845 */
4846
4847 /*
4848 * Read in the stable storage file. Called by nfssvc() before the nfsd
4849 * processes start servicing requests.
4850 */
4851 void
nfsrv_setupstable(NFSPROC_T * p)4852 nfsrv_setupstable(NFSPROC_T *p)
4853 {
4854 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4855 struct nfsrv_stable *sp, *nsp;
4856 struct nfst_rec *tsp;
4857 int error, i, tryagain;
4858 off_t off = 0;
4859 ssize_t aresid, len;
4860
4861 /*
4862 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4863 * a reboot, so state has not been lost.
4864 */
4865 if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4866 return;
4867 /*
4868 * Set Grace over just until the file reads successfully.
4869 */
4870 NFSD_VNET(nfsrvboottime) = time_second;
4871 LIST_INIT(&sf->nsf_head);
4872 sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4873 sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4874 if (sf->nsf_fp == NULL)
4875 return;
4876 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4877 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4878 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4879 if (error || aresid || sf->nsf_numboots == 0 ||
4880 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4881 return;
4882
4883 /*
4884 * Now, read in the boottimes.
4885 */
4886 sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4887 sizeof(time_t), M_TEMP, M_WAITOK);
4888 off = sizeof (struct nfsf_rec);
4889 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4890 (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4891 UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4892 if (error || aresid) {
4893 free(sf->nsf_bootvals, M_TEMP);
4894 sf->nsf_bootvals = NULL;
4895 return;
4896 }
4897
4898 /*
4899 * Make sure this nfsrvboottime is different from all recorded
4900 * previous ones.
4901 */
4902 do {
4903 tryagain = 0;
4904 for (i = 0; i < sf->nsf_numboots; i++) {
4905 if (NFSD_VNET(nfsrvboottime) == sf->nsf_bootvals[i]) {
4906 NFSD_VNET(nfsrvboottime)++;
4907 tryagain = 1;
4908 break;
4909 }
4910 }
4911 } while (tryagain);
4912
4913 sf->nsf_flags |= NFSNSF_OK;
4914 off += (sf->nsf_numboots * sizeof (time_t));
4915
4916 /*
4917 * Read through the file, building a list of records for grace
4918 * checking.
4919 * Each record is between sizeof (struct nfst_rec) and
4920 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4921 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4922 */
4923 tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4924 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4925 do {
4926 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4927 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4928 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4929 len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4930 if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4931 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4932 /*
4933 * Yuck, the file has been corrupted, so just return
4934 * after clearing out any restart state, so the grace period
4935 * is over.
4936 */
4937 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4938 LIST_REMOVE(sp, nst_list);
4939 free(sp, M_TEMP);
4940 }
4941 free(tsp, M_TEMP);
4942 sf->nsf_flags &= ~NFSNSF_OK;
4943 free(sf->nsf_bootvals, M_TEMP);
4944 sf->nsf_bootvals = NULL;
4945 return;
4946 }
4947 if (len > 0) {
4948 off += sizeof (struct nfst_rec) + tsp->len - 1;
4949 /*
4950 * Search the list for a matching client.
4951 */
4952 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4953 if (tsp->len == sp->nst_len &&
4954 !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4955 break;
4956 }
4957 if (sp == LIST_END(&sf->nsf_head)) {
4958 sp = (struct nfsrv_stable *)malloc(tsp->len +
4959 sizeof (struct nfsrv_stable) - 1, M_TEMP,
4960 M_WAITOK);
4961 NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4962 sizeof (struct nfst_rec) + tsp->len - 1);
4963 LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4964 } else {
4965 if (tsp->flag == NFSNST_REVOKE)
4966 sp->nst_flag |= NFSNST_REVOKE;
4967 else
4968 /*
4969 * A subsequent timestamp indicates the client
4970 * did a setclientid/confirm and any previous
4971 * revoke is no longer relevant.
4972 */
4973 sp->nst_flag &= ~NFSNST_REVOKE;
4974 }
4975 }
4976 } while (len > 0);
4977 free(tsp, M_TEMP);
4978 sf->nsf_flags = NFSNSF_OK;
4979 sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4980 NFSRV_LEASEDELTA;
4981 }
4982
4983 /*
4984 * Update the stable storage file, now that the grace period is over.
4985 */
4986 void
nfsrv_updatestable(NFSPROC_T * p)4987 nfsrv_updatestable(NFSPROC_T *p)
4988 {
4989 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4990 struct nfsrv_stable *sp, *nsp;
4991 int i;
4992 struct nfsvattr nva;
4993 vnode_t vp;
4994 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4995 mount_t mp = NULL;
4996 #endif
4997 int error;
4998
4999 if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
5000 return;
5001 sf->nsf_flags |= NFSNSF_UPDATEDONE;
5002 /*
5003 * Ok, we need to rewrite the stable storage file.
5004 * - truncate to 0 length
5005 * - write the new first structure
5006 * - loop through the data structures, writing out any that
5007 * have timestamps older than the old boot
5008 */
5009 if (sf->nsf_bootvals) {
5010 sf->nsf_numboots++;
5011 for (i = sf->nsf_numboots - 2; i >= 0; i--)
5012 sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
5013 } else {
5014 sf->nsf_numboots = 1;
5015 sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t),
5016 M_TEMP, M_WAITOK);
5017 }
5018 sf->nsf_bootvals[0] = NFSD_VNET(nfsrvboottime);
5019 sf->nsf_lease = nfsrv_lease;
5020 NFSVNO_ATTRINIT(&nva);
5021 NFSVNO_SETATTRVAL(&nva, size, 0);
5022 vp = NFSFPVNODE(sf->nsf_fp);
5023 vn_start_write(vp, &mp, V_WAIT);
5024 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5025 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
5026 NULL);
5027 NFSVOPUNLOCK(vp);
5028 } else
5029 error = EPERM;
5030 vn_finished_write(mp);
5031 if (!error)
5032 error = NFSD_RDWR(UIO_WRITE, vp,
5033 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
5034 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5035 if (!error)
5036 error = NFSD_RDWR(UIO_WRITE, vp,
5037 (caddr_t)sf->nsf_bootvals,
5038 sf->nsf_numboots * sizeof (time_t),
5039 (off_t)(sizeof (struct nfsf_rec)),
5040 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5041 free(sf->nsf_bootvals, M_TEMP);
5042 sf->nsf_bootvals = NULL;
5043 if (error) {
5044 sf->nsf_flags &= ~NFSNSF_OK;
5045 printf("EEK! Can't write NfsV4 stable storage file\n");
5046 return;
5047 }
5048 sf->nsf_flags |= NFSNSF_OK;
5049
5050 /*
5051 * Loop through the list and write out timestamp records for
5052 * any clients that successfully reclaimed state.
5053 */
5054 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
5055 if (sp->nst_flag & NFSNST_GOTSTATE) {
5056 nfsrv_writestable(sp->nst_client, sp->nst_len,
5057 NFSNST_NEWSTATE, p);
5058 sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
5059 }
5060 LIST_REMOVE(sp, nst_list);
5061 free(sp, M_TEMP);
5062 }
5063 nfsrv_backupstable();
5064 }
5065
5066 /*
5067 * Append a record to the stable storage file.
5068 */
5069 void
nfsrv_writestable(u_char * client,int len,int flag,NFSPROC_T * p)5070 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
5071 {
5072 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
5073 struct nfst_rec *sp;
5074 int error;
5075
5076 if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
5077 return;
5078 sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
5079 len - 1, M_TEMP, M_WAITOK);
5080 sp->len = len;
5081 NFSBCOPY(client, sp->client, len);
5082 sp->flag = flag;
5083 error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
5084 (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
5085 UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
5086 free(sp, M_TEMP);
5087 if (error) {
5088 sf->nsf_flags &= ~NFSNSF_OK;
5089 printf("EEK! Can't write NfsV4 stable storage file\n");
5090 }
5091 }
5092
5093 /*
5094 * This function is called during the grace period to mark a client
5095 * that successfully reclaimed state.
5096 */
5097 static void
nfsrv_markstable(struct nfsclient * clp)5098 nfsrv_markstable(struct nfsclient *clp)
5099 {
5100 struct nfsrv_stable *sp;
5101
5102 /*
5103 * First find the client structure.
5104 */
5105 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5106 if (sp->nst_len == clp->lc_idlen &&
5107 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5108 break;
5109 }
5110 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5111 return;
5112
5113 /*
5114 * Now, just mark it and set the nfsclient back pointer.
5115 */
5116 sp->nst_flag |= NFSNST_GOTSTATE;
5117 sp->nst_clp = clp;
5118 }
5119
5120 /*
5121 * This function is called when a NFSv4.1 client does a ReclaimComplete.
5122 * Very similar to nfsrv_markstable(), except for the flag being set.
5123 */
5124 static void
nfsrv_markreclaim(struct nfsclient * clp)5125 nfsrv_markreclaim(struct nfsclient *clp)
5126 {
5127 struct nfsrv_stable *sp;
5128
5129 /*
5130 * First find the client structure.
5131 */
5132 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5133 if (sp->nst_len == clp->lc_idlen &&
5134 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5135 break;
5136 }
5137 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5138 return;
5139
5140 /*
5141 * Now, just set the flag.
5142 */
5143 sp->nst_flag |= NFSNST_RECLAIMED;
5144
5145 /*
5146 * Free up any old delegations.
5147 */
5148 nfsrv_freedeleglist(&clp->lc_olddeleg);
5149 }
5150
5151 /*
5152 * This function is called for a reclaim, to see if it gets grace.
5153 * It returns 0 if a reclaim is allowed, 1 otherwise.
5154 */
5155 static int
nfsrv_checkstable(struct nfsclient * clp)5156 nfsrv_checkstable(struct nfsclient *clp)
5157 {
5158 struct nfsrv_stable *sp;
5159
5160 /*
5161 * First, find the entry for the client.
5162 */
5163 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5164 if (sp->nst_len == clp->lc_idlen &&
5165 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5166 break;
5167 }
5168
5169 /*
5170 * If not in the list, state was revoked or no state was issued
5171 * since the previous reboot, a reclaim is denied.
5172 */
5173 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head) ||
5174 (sp->nst_flag & NFSNST_REVOKE) ||
5175 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK))
5176 return (1);
5177 return (0);
5178 }
5179
5180 /*
5181 * Test for and try to clear out a conflicting client. This is called by
5182 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5183 * a found.
5184 * The trick here is that it can't revoke a conflicting client with an
5185 * expired lease unless it holds the v4root lock, so...
5186 * If no v4root lock, get the lock and return 1 to indicate "try again".
5187 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5188 * the revocation worked and the conflicting client is "bye, bye", so it
5189 * can be tried again.
5190 * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
5191 * Unlocks State before a non-zero value is returned.
5192 */
5193 static int
nfsrv_clientconflict(struct nfsclient * clp,int * haslockp,vnode_t vp,NFSPROC_T * p)5194 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5195 NFSPROC_T *p)
5196 {
5197 int gotlock, lktype = 0;
5198
5199 /*
5200 * If lease hasn't expired, we can't fix it.
5201 */
5202 if (clp->lc_expiry >= NFSD_MONOSEC ||
5203 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE))
5204 return (0);
5205 if (*haslockp == 0) {
5206 NFSUNLOCKSTATE();
5207 if (vp != NULL) {
5208 lktype = NFSVOPISLOCKED(vp);
5209 NFSVOPUNLOCK(vp);
5210 }
5211 NFSLOCKV4ROOTMUTEX();
5212 nfsv4_relref(&nfsv4rootfs_lock);
5213 do {
5214 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5215 NFSV4ROOTLOCKMUTEXPTR, NULL);
5216 } while (!gotlock);
5217 NFSUNLOCKV4ROOTMUTEX();
5218 *haslockp = 1;
5219 if (vp != NULL) {
5220 NFSVOPLOCK(vp, lktype | LK_RETRY);
5221 if (VN_IS_DOOMED(vp))
5222 return (2);
5223 }
5224 return (1);
5225 }
5226 NFSUNLOCKSTATE();
5227
5228 /*
5229 * Ok, we can expire the conflicting client.
5230 */
5231 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5232 nfsrv_backupstable();
5233 nfsrv_cleanclient(clp, p, false, NULL);
5234 nfsrv_freedeleglist(&clp->lc_deleg);
5235 nfsrv_freedeleglist(&clp->lc_olddeleg);
5236 LIST_REMOVE(clp, lc_hash);
5237 nfsrv_zapclient(clp, p);
5238 return (1);
5239 }
5240
5241 /*
5242 * Resolve a delegation conflict.
5243 * Returns 0 to indicate the conflict was resolved without sleeping.
5244 * Return -1 to indicate that the caller should check for conflicts again.
5245 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5246 *
5247 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5248 * for a return of 0, since there was no sleep and it could be required
5249 * later. It is released for a return of NFSERR_DELAY, since the caller
5250 * will return that error. It is released when a sleep was done waiting
5251 * for the delegation to be returned or expire (so that other nfsds can
5252 * handle ops). Then, it must be acquired for the write to stable storage.
5253 * (This function is somewhat similar to nfsrv_clientconflict(), but
5254 * the semantics differ in a couple of subtle ways. The return of 0
5255 * indicates the conflict was resolved without sleeping here, not
5256 * that the conflict can't be resolved and the handling of nfsv4root_lock
5257 * differs, as noted above.)
5258 * Unlocks State before returning a non-zero value.
5259 */
5260 static int
nfsrv_delegconflict(struct nfsstate * stp,int * haslockp,NFSPROC_T * p,vnode_t vp)5261 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5262 vnode_t vp)
5263 {
5264 struct nfsclient *clp = stp->ls_clp;
5265 int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5266 nfsv4stateid_t tstateid;
5267 fhandle_t tfh;
5268
5269 /*
5270 * If the conflict is with an old delegation...
5271 */
5272 if (stp->ls_flags & NFSLCK_OLDDELEG) {
5273 /*
5274 * You can delete it, if it has expired.
5275 */
5276 if (clp->lc_delegtime < NFSD_MONOSEC) {
5277 nfsrv_freedeleg(stp);
5278 NFSUNLOCKSTATE();
5279 error = -1;
5280 goto out;
5281 }
5282 NFSUNLOCKSTATE();
5283 /*
5284 * During this delay, the old delegation could expire or it
5285 * could be recovered by the client via an Open with
5286 * CLAIM_DELEGATE_PREV.
5287 * Release the nfsv4root_lock, if held.
5288 */
5289 if (*haslockp) {
5290 *haslockp = 0;
5291 NFSLOCKV4ROOTMUTEX();
5292 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5293 NFSUNLOCKV4ROOTMUTEX();
5294 }
5295 error = NFSERR_DELAY;
5296 goto out;
5297 }
5298
5299 /*
5300 * It's a current delegation, so:
5301 * - check to see if the delegation has expired
5302 * - if so, get the v4root lock and then expire it
5303 */
5304 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
5305 NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
5306 stp->ls_delegtime >= NFSD_MONOSEC)) {
5307 /*
5308 * - do a recall callback, since not yet done
5309 * For now, never allow truncate to be set. To use
5310 * truncate safely, it must be guaranteed that the
5311 * Remove, Rename or Setattr with size of 0 will
5312 * succeed and that would require major changes to
5313 * the VFS/Vnode OPs.
5314 * Set the expiry time large enough so that it won't expire
5315 * until after the callback, then set it correctly, once
5316 * the callback is done. (The delegation will now time
5317 * out whether or not the Recall worked ok. The timeout
5318 * will be extended when ops are done on the delegation
5319 * stateid, up to the timelimit.)
5320 */
5321 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
5322 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5323 NFSRV_LEASEDELTA;
5324 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
5325 nfsrv_lease) + NFSRV_LEASEDELTA;
5326 stp->ls_flags |= NFSLCK_DELEGRECALL;
5327 }
5328 stp->ls_lastrecall = time_uptime + 1;
5329
5330 /*
5331 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5332 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5333 * in order to try and avoid a race that could happen
5334 * when a CBRecall request passed the Open reply with
5335 * the delegation in it when transitting the network.
5336 * Since nfsrv_docallback will sleep, don't use stp after
5337 * the call.
5338 */
5339 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5340 sizeof (tstateid));
5341 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5342 sizeof (tfh));
5343 NFSUNLOCKSTATE();
5344 if (*haslockp) {
5345 *haslockp = 0;
5346 NFSLOCKV4ROOTMUTEX();
5347 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5348 NFSUNLOCKV4ROOTMUTEX();
5349 }
5350 retrycnt = 0;
5351 do {
5352 error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5353 &tstateid, 0, &tfh, NULL, NULL, 0, p);
5354 retrycnt++;
5355 } while ((error == NFSERR_BADSTATEID ||
5356 error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5357 error = NFSERR_DELAY;
5358 goto out;
5359 }
5360
5361 if (clp->lc_expiry >= NFSD_MONOSEC &&
5362 stp->ls_delegtime >= NFSD_MONOSEC) {
5363 NFSUNLOCKSTATE();
5364 /*
5365 * A recall has been done, but it has not yet expired.
5366 * So, RETURN_DELAY.
5367 */
5368 if (*haslockp) {
5369 *haslockp = 0;
5370 NFSLOCKV4ROOTMUTEX();
5371 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5372 NFSUNLOCKV4ROOTMUTEX();
5373 }
5374 error = NFSERR_DELAY;
5375 goto out;
5376 }
5377
5378 /*
5379 * If we don't yet have the lock, just get it and then return,
5380 * since we need that before deleting expired state, such as
5381 * this delegation.
5382 * When getting the lock, unlock the vnode, so other nfsds that
5383 * are in progress, won't get stuck waiting for the vnode lock.
5384 */
5385 if (*haslockp == 0) {
5386 NFSUNLOCKSTATE();
5387 if (vp != NULL) {
5388 lktype = NFSVOPISLOCKED(vp);
5389 NFSVOPUNLOCK(vp);
5390 }
5391 NFSLOCKV4ROOTMUTEX();
5392 nfsv4_relref(&nfsv4rootfs_lock);
5393 do {
5394 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5395 NFSV4ROOTLOCKMUTEXPTR, NULL);
5396 } while (!gotlock);
5397 NFSUNLOCKV4ROOTMUTEX();
5398 *haslockp = 1;
5399 if (vp != NULL) {
5400 NFSVOPLOCK(vp, lktype | LK_RETRY);
5401 if (VN_IS_DOOMED(vp)) {
5402 *haslockp = 0;
5403 NFSLOCKV4ROOTMUTEX();
5404 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5405 NFSUNLOCKV4ROOTMUTEX();
5406 error = NFSERR_PERM;
5407 goto out;
5408 }
5409 }
5410 error = -1;
5411 goto out;
5412 }
5413
5414 NFSUNLOCKSTATE();
5415 /*
5416 * Ok, we can delete the expired delegation.
5417 * First, write the Revoke record to stable storage and then
5418 * clear out the conflict.
5419 * Since all other nfsd threads are now blocked, we can safely
5420 * sleep without the state changing.
5421 */
5422 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5423 nfsrv_backupstable();
5424 if (clp->lc_expiry < NFSD_MONOSEC) {
5425 nfsrv_cleanclient(clp, p, false, NULL);
5426 nfsrv_freedeleglist(&clp->lc_deleg);
5427 nfsrv_freedeleglist(&clp->lc_olddeleg);
5428 LIST_REMOVE(clp, lc_hash);
5429 zapped_clp = 1;
5430 } else {
5431 nfsrv_freedeleg(stp);
5432 zapped_clp = 0;
5433 }
5434 if (zapped_clp)
5435 nfsrv_zapclient(clp, p);
5436 error = -1;
5437
5438 out:
5439 NFSEXITCODE(error);
5440 return (error);
5441 }
5442
5443 /*
5444 * Check for a remove allowed, if remove is set to 1 and get rid of
5445 * delegations.
5446 */
5447 int
nfsrv_checkremove(vnode_t vp,int remove,struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)5448 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
5449 nfsquad_t clientid, NFSPROC_T *p)
5450 {
5451 struct nfsclient *clp;
5452 struct nfsstate *stp;
5453 struct nfslockfile *lfp;
5454 int error, haslock = 0;
5455 fhandle_t nfh;
5456
5457 clp = NULL;
5458 /*
5459 * First, get the lock file structure.
5460 * (A return of -1 means no associated state, so remove ok.)
5461 */
5462 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5463 tryagain:
5464 NFSLOCKSTATE();
5465 if (error == 0 && clientid.qval != 0)
5466 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
5467 (nfsquad_t)((u_quad_t)0), 0, nd, p);
5468 if (!error)
5469 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5470 if (error) {
5471 NFSUNLOCKSTATE();
5472 if (haslock) {
5473 NFSLOCKV4ROOTMUTEX();
5474 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5475 NFSUNLOCKV4ROOTMUTEX();
5476 }
5477 if (error == -1)
5478 error = 0;
5479 goto out;
5480 }
5481
5482 /*
5483 * Now, we must Recall any delegations.
5484 */
5485 error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
5486 if (error) {
5487 /*
5488 * nfsrv_cleandeleg() unlocks state for non-zero
5489 * return.
5490 */
5491 if (error == -1)
5492 goto tryagain;
5493 if (haslock) {
5494 NFSLOCKV4ROOTMUTEX();
5495 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5496 NFSUNLOCKV4ROOTMUTEX();
5497 }
5498 goto out;
5499 }
5500
5501 /*
5502 * Now, look for a conflicting open share.
5503 */
5504 if (remove) {
5505 /*
5506 * If the entry in the directory was the last reference to the
5507 * corresponding filesystem object, the object can be destroyed
5508 * */
5509 if(lfp->lf_usecount>1)
5510 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5511 if (stp->ls_flags & NFSLCK_WRITEDENY) {
5512 error = NFSERR_FILEOPEN;
5513 break;
5514 }
5515 }
5516 }
5517
5518 NFSUNLOCKSTATE();
5519 if (haslock) {
5520 NFSLOCKV4ROOTMUTEX();
5521 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5522 NFSUNLOCKV4ROOTMUTEX();
5523 }
5524
5525 out:
5526 NFSEXITCODE(error);
5527 return (error);
5528 }
5529
5530 /*
5531 * Clear out all delegations for the file referred to by lfp.
5532 * May return NFSERR_DELAY, if there will be a delay waiting for
5533 * delegations to expire.
5534 * Returns -1 to indicate it slept while recalling a delegation.
5535 * This function has the side effect of deleting the nfslockfile structure,
5536 * if it no longer has associated state and didn't have to sleep.
5537 * Unlocks State before a non-zero value is returned.
5538 */
5539 static int
nfsrv_cleandeleg(vnode_t vp,struct nfslockfile * lfp,struct nfsclient * clp,int * haslockp,NFSPROC_T * p)5540 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5541 struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5542 {
5543 struct nfsstate *stp, *nstp;
5544 int ret = 0;
5545
5546 stp = LIST_FIRST(&lfp->lf_deleg);
5547 while (stp != LIST_END(&lfp->lf_deleg)) {
5548 nstp = LIST_NEXT(stp, ls_file);
5549 if (stp->ls_clp != clp) {
5550 ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5551 if (ret) {
5552 /*
5553 * nfsrv_delegconflict() unlocks state
5554 * when it returns non-zero.
5555 */
5556 goto out;
5557 }
5558 }
5559 stp = nstp;
5560 }
5561 out:
5562 NFSEXITCODE(ret);
5563 return (ret);
5564 }
5565
5566 /*
5567 * There are certain operations that, when being done outside of NFSv4,
5568 * require that any NFSv4 delegation for the file be recalled.
5569 * This function is to be called for those cases:
5570 * VOP_RENAME() - When a delegation is being recalled for any reason,
5571 * the client may have to do Opens against the server, using the file's
5572 * final component name. If the file has been renamed on the server,
5573 * that component name will be incorrect and the Open will fail.
5574 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5575 * been removed on the server, if there is a delegation issued to
5576 * that client for the file. I say "theoretically" since clients
5577 * normally do an Access Op before the Open and that Access Op will
5578 * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5579 * they will detect the file's removal in the same manner. (There is
5580 * one case where RFC3530 allows a client to do an Open without first
5581 * doing an Access Op, which is passage of a check against the ACE
5582 * returned with a Write delegation, but current practice is to ignore
5583 * the ACE and always do an Access Op.)
5584 * Since the functions can only be called with an unlocked vnode, this
5585 * can't be done at this time.
5586 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5587 * locks locally in the client, which are not visible to the server. To
5588 * deal with this, issuing of delegations for a vnode must be disabled
5589 * and all delegations for the vnode recalled. This is done via the
5590 * second function, using the VV_DISABLEDELEG vflag on the vnode.
5591 */
5592 void
nfsd_recalldelegation(vnode_t vp,NFSPROC_T * p)5593 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5594 {
5595 time_t starttime;
5596 int error;
5597
5598 /*
5599 * First, check to see if the server is currently running and it has
5600 * been called for a regular file when issuing delegations.
5601 */
5602 if (NFSD_VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG ||
5603 nfsrv_issuedelegs == 0)
5604 return;
5605
5606 KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5607 /*
5608 * First, get a reference on the nfsv4rootfs_lock so that an
5609 * exclusive lock cannot be acquired by another thread.
5610 */
5611 NFSLOCKV4ROOTMUTEX();
5612 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5613 NFSUNLOCKV4ROOTMUTEX();
5614
5615 /*
5616 * Now, call nfsrv_checkremove() in a loop while it returns
5617 * NFSERR_DELAY. Return upon any other error or when timed out.
5618 */
5619 starttime = NFSD_MONOSEC;
5620 do {
5621 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5622 error = nfsrv_checkremove(vp, 0, NULL,
5623 (nfsquad_t)((u_quad_t)0), p);
5624 NFSVOPUNLOCK(vp);
5625 } else
5626 error = EPERM;
5627 if (error == NFSERR_DELAY) {
5628 if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5629 break;
5630 /* Sleep for a short period of time */
5631 (void) nfs_catnap(PZERO, 0, "nfsremove");
5632 }
5633 } while (error == NFSERR_DELAY);
5634 NFSLOCKV4ROOTMUTEX();
5635 nfsv4_relref(&nfsv4rootfs_lock);
5636 NFSUNLOCKV4ROOTMUTEX();
5637 }
5638
5639 void
nfsd_disabledelegation(vnode_t vp,NFSPROC_T * p)5640 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5641 {
5642
5643 #ifdef VV_DISABLEDELEG
5644 /*
5645 * First, flag issuance of delegations disabled.
5646 */
5647 atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5648 #endif
5649
5650 /*
5651 * Then call nfsd_recalldelegation() to get rid of all extant
5652 * delegations.
5653 */
5654 nfsd_recalldelegation(vp, p);
5655 }
5656
5657 /*
5658 * Check for conflicting locks, etc. and then get rid of delegations.
5659 * (At one point I thought that I should get rid of delegations for any
5660 * Setattr, since it could potentially disallow the I/O op (read or write)
5661 * allowed by the delegation. However, Setattr Ops that aren't changing
5662 * the size get a stateid of all 0s, so you can't tell if it is a delegation
5663 * for the same client or a different one, so I decided to only get rid
5664 * of delegations for other clients when the size is being changed.)
5665 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5666 * as Write backs, even if there is no delegation, so it really isn't any
5667 * different?)
5668 */
5669 int
nfsrv_checksetattr(vnode_t vp,struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,struct nfsexstuff * exp,NFSPROC_T * p)5670 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5671 nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5672 struct nfsexstuff *exp, NFSPROC_T *p)
5673 {
5674 struct nfsstate st, *stp = &st;
5675 struct nfslock lo, *lop = &lo;
5676 int error = 0;
5677 nfsquad_t clientid;
5678
5679 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5680 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5681 lop->lo_first = nvap->na_size;
5682 } else {
5683 stp->ls_flags = 0;
5684 lop->lo_first = 0;
5685 }
5686 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5687 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5688 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5689 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5690 stp->ls_flags |= NFSLCK_SETATTR;
5691 if (stp->ls_flags == 0)
5692 goto out;
5693 lop->lo_end = NFS64BITSSET;
5694 lop->lo_flags = NFSLCK_WRITE;
5695 stp->ls_ownerlen = 0;
5696 stp->ls_op = NULL;
5697 stp->ls_uid = nd->nd_cred->cr_uid;
5698 stp->ls_stateid.seqid = stateidp->seqid;
5699 clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5700 clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5701 stp->ls_stateid.other[2] = stateidp->other[2];
5702 error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5703 stateidp, exp, nd, p);
5704
5705 out:
5706 NFSEXITCODE2(error, nd);
5707 return (error);
5708 }
5709
5710 /*
5711 * Check for a write delegation and do a CBGETATTR if there is one, updating
5712 * the attributes, as required.
5713 * Should I return an error if I can't get the attributes? (For now, I'll
5714 * just return ok.
5715 */
5716 int
nfsrv_checkgetattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSPROC_T * p)5717 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5718 struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
5719 {
5720 struct nfsstate *stp;
5721 struct nfslockfile *lfp;
5722 struct nfsclient *clp;
5723 struct nfsvattr nva;
5724 fhandle_t nfh;
5725 int error = 0;
5726 nfsattrbit_t cbbits;
5727 u_quad_t delegfilerev;
5728
5729 NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5730 if (!NFSNONZERO_ATTRBIT(&cbbits))
5731 goto out;
5732 if (nfsrv_writedelegcnt == 0)
5733 goto out;
5734
5735 /*
5736 * Get the lock file structure.
5737 * (A return of -1 means no associated state, so return ok.)
5738 */
5739 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5740 NFSLOCKSTATE();
5741 if (!error)
5742 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5743 if (error) {
5744 NFSUNLOCKSTATE();
5745 if (error == -1)
5746 error = 0;
5747 goto out;
5748 }
5749
5750 /*
5751 * Now, look for a write delegation.
5752 */
5753 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5754 if (stp->ls_flags & NFSLCK_DELEGWRITE)
5755 break;
5756 }
5757 if (stp == LIST_END(&lfp->lf_deleg)) {
5758 NFSUNLOCKSTATE();
5759 goto out;
5760 }
5761 clp = stp->ls_clp;
5762
5763 /* If the clientid is not confirmed, ignore the delegation. */
5764 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5765 NFSUNLOCKSTATE();
5766 goto out;
5767 }
5768
5769 delegfilerev = stp->ls_filerev;
5770 /*
5771 * If the Write delegation was issued as a part of this Compound RPC
5772 * or if we have an Implied Clientid (used in a previous Op in this
5773 * compound) and it is the client the delegation was issued to,
5774 * just return ok.
5775 * I also assume that it is from the same client iff the network
5776 * host IP address is the same as the callback address. (Not
5777 * exactly correct by the RFC, but avoids a lot of Getattr
5778 * callbacks.)
5779 */
5780 if (nd->nd_compref == stp->ls_compref ||
5781 ((nd->nd_flag & ND_IMPLIEDCLID) &&
5782 clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5783 nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5784 NFSUNLOCKSTATE();
5785 goto out;
5786 }
5787
5788 /*
5789 * We are now done with the delegation state structure,
5790 * so the statelock can be released and we can now tsleep().
5791 */
5792
5793 /*
5794 * Now, we must do the CB Getattr callback, to see if Change or Size
5795 * has changed.
5796 */
5797 if (clp->lc_expiry >= NFSD_MONOSEC) {
5798 NFSUNLOCKSTATE();
5799 NFSVNO_ATTRINIT(&nva);
5800 nva.na_filerev = NFS64BITSSET;
5801 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5802 0, &nfh, &nva, &cbbits, 0, p);
5803 if (!error) {
5804 if ((nva.na_filerev != NFS64BITSSET &&
5805 nva.na_filerev > delegfilerev) ||
5806 (NFSVNO_ISSETSIZE(&nva) &&
5807 nva.na_size != nvap->na_size)) {
5808 error = nfsvno_updfilerev(vp, nvap, nd, p);
5809 if (NFSVNO_ISSETSIZE(&nva))
5810 nvap->na_size = nva.na_size;
5811 }
5812 } else
5813 error = 0; /* Ignore callback errors for now. */
5814 } else {
5815 NFSUNLOCKSTATE();
5816 }
5817
5818 out:
5819 NFSEXITCODE2(error, nd);
5820 return (error);
5821 }
5822
5823 /*
5824 * This function looks for openowners that haven't had any opens for
5825 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5826 * is set.
5827 */
5828 void
nfsrv_throwawayopens(NFSPROC_T * p)5829 nfsrv_throwawayopens(NFSPROC_T *p)
5830 {
5831 struct nfsclient *clp, *nclp;
5832 struct nfsstate *stp, *nstp;
5833 int i;
5834
5835 NFSLOCKSTATE();
5836 NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS;
5837 /*
5838 * For each client...
5839 */
5840 for (i = 0; i < nfsrv_clienthashsize; i++) {
5841 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
5842 nclp) {
5843 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5844 if (LIST_EMPTY(&stp->ls_open) &&
5845 (stp->ls_noopens > NFSNOOPEN ||
5846 (nfsrv_openpluslock * 2) >
5847 nfsrv_v4statelimit))
5848 nfsrv_freeopenowner(stp, 0, p);
5849 }
5850 }
5851 }
5852 NFSUNLOCKSTATE();
5853 }
5854
5855 /*
5856 * This function checks to see if the credentials are the same.
5857 * The check for same credentials is needed for state management operations
5858 * for NFSv4.0 or NFSv4.1/4.2 when SP4_MACH_CRED is configured via
5859 * ExchangeID.
5860 * Returns 1 for not same, 0 otherwise.
5861 */
5862 static int
nfsrv_notsamecredname(int op,struct nfsrv_descript * nd,struct nfsclient * clp)5863 nfsrv_notsamecredname(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
5864 {
5865
5866 /* Check for the SP4_MACH_CRED case. */
5867 if (op != 0 && nfsrv_checkmachcred(op, nd, clp) != 0)
5868 return (1);
5869
5870 /* For NFSv4.1/4.2, SP4_NONE always allows this. */
5871 if ((nd->nd_flag & ND_NFSV41) != 0)
5872 return (0);
5873
5874 if (nd->nd_flag & ND_GSS) {
5875 if (!(clp->lc_flags & LCL_GSS))
5876 return (1);
5877 if (clp->lc_flags & LCL_NAME) {
5878 if (nd->nd_princlen != clp->lc_namelen ||
5879 NFSBCMP(nd->nd_principal, clp->lc_name,
5880 clp->lc_namelen))
5881 return (1);
5882 else
5883 return (0);
5884 }
5885 if (nd->nd_cred->cr_uid == clp->lc_uid)
5886 return (0);
5887 else
5888 return (1);
5889 } else if (clp->lc_flags & LCL_GSS)
5890 return (1);
5891 /*
5892 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5893 * in RFC3530, which talks about principals, but doesn't say anything
5894 * about uids for AUTH_SYS.)
5895 */
5896 if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5897 return (0);
5898 else
5899 return (1);
5900 }
5901
5902 /*
5903 * Calculate the lease expiry time.
5904 */
5905 static time_t
nfsrv_leaseexpiry(void)5906 nfsrv_leaseexpiry(void)
5907 {
5908
5909 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC)
5910 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5911 return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5912 }
5913
5914 /*
5915 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5916 */
5917 static void
nfsrv_delaydelegtimeout(struct nfsstate * stp)5918 nfsrv_delaydelegtimeout(struct nfsstate *stp)
5919 {
5920
5921 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5922 return;
5923
5924 if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5925 stp->ls_delegtime < stp->ls_delegtimelimit) {
5926 stp->ls_delegtime += nfsrv_lease;
5927 if (stp->ls_delegtime > stp->ls_delegtimelimit)
5928 stp->ls_delegtime = stp->ls_delegtimelimit;
5929 }
5930 }
5931
5932 /*
5933 * This function checks to see if there is any other state associated
5934 * with the openowner for this Open.
5935 * It returns 1 if there is no other state, 0 otherwise.
5936 */
5937 static int
nfsrv_nootherstate(struct nfsstate * stp)5938 nfsrv_nootherstate(struct nfsstate *stp)
5939 {
5940 struct nfsstate *tstp;
5941
5942 LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5943 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5944 return (0);
5945 }
5946 return (1);
5947 }
5948
5949 /*
5950 * Create a list of lock deltas (changes to local byte range locking
5951 * that can be rolled back using the list) and apply the changes via
5952 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5953 * the rollback or update function will be called after this.
5954 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5955 * call fails. If it returns an error, it will unlock the list.
5956 */
5957 static int
nfsrv_locallock(vnode_t vp,struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)5958 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5959 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5960 {
5961 struct nfslock *lop, *nlop;
5962 int error = 0;
5963
5964 /* Loop through the list of locks. */
5965 lop = LIST_FIRST(&lfp->lf_locallock);
5966 while (first < end && lop != NULL) {
5967 nlop = LIST_NEXT(lop, lo_lckowner);
5968 if (first >= lop->lo_end) {
5969 /* not there yet */
5970 lop = nlop;
5971 } else if (first < lop->lo_first) {
5972 /* new one starts before entry in list */
5973 if (end <= lop->lo_first) {
5974 /* no overlap between old and new */
5975 error = nfsrv_dolocal(vp, lfp, flags,
5976 NFSLCK_UNLOCK, first, end, cfp, p);
5977 if (error != 0)
5978 break;
5979 first = end;
5980 } else {
5981 /* handle fragment overlapped with new one */
5982 error = nfsrv_dolocal(vp, lfp, flags,
5983 NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5984 p);
5985 if (error != 0)
5986 break;
5987 first = lop->lo_first;
5988 }
5989 } else {
5990 /* new one overlaps this entry in list */
5991 if (end <= lop->lo_end) {
5992 /* overlaps all of new one */
5993 error = nfsrv_dolocal(vp, lfp, flags,
5994 lop->lo_flags, first, end, cfp, p);
5995 if (error != 0)
5996 break;
5997 first = end;
5998 } else {
5999 /* handle fragment overlapped with new one */
6000 error = nfsrv_dolocal(vp, lfp, flags,
6001 lop->lo_flags, first, lop->lo_end, cfp, p);
6002 if (error != 0)
6003 break;
6004 first = lop->lo_end;
6005 lop = nlop;
6006 }
6007 }
6008 }
6009 if (first < end && error == 0)
6010 /* handle fragment past end of list */
6011 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
6012 end, cfp, p);
6013
6014 NFSEXITCODE(error);
6015 return (error);
6016 }
6017
6018 /*
6019 * Local lock unlock. Unlock all byte ranges that are no longer locked
6020 * by NFSv4. To do this, unlock any subranges of first-->end that
6021 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
6022 * list. This list has all locks for the file held by other
6023 * <clientid, lockowner> tuples. The list is ordered by increasing
6024 * lo_first value, but may have entries that overlap each other, for
6025 * the case of read locks.
6026 */
6027 static void
nfsrv_localunlock(vnode_t vp,struct nfslockfile * lfp,uint64_t init_first,uint64_t init_end,NFSPROC_T * p)6028 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
6029 uint64_t init_end, NFSPROC_T *p)
6030 {
6031 struct nfslock *lop;
6032 uint64_t first, end, prevfirst __unused;
6033
6034 first = init_first;
6035 end = init_end;
6036 while (first < init_end) {
6037 /* Loop through all nfs locks, adjusting first and end */
6038 prevfirst = 0;
6039 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
6040 KASSERT(prevfirst <= lop->lo_first,
6041 ("nfsv4 locks out of order"));
6042 KASSERT(lop->lo_first < lop->lo_end,
6043 ("nfsv4 bogus lock"));
6044 prevfirst = lop->lo_first;
6045 if (first >= lop->lo_first &&
6046 first < lop->lo_end)
6047 /*
6048 * Overlaps with initial part, so trim
6049 * off that initial part by moving first past
6050 * it.
6051 */
6052 first = lop->lo_end;
6053 else if (end > lop->lo_first &&
6054 lop->lo_first > first) {
6055 /*
6056 * This lock defines the end of the
6057 * segment to unlock, so set end to the
6058 * start of it and break out of the loop.
6059 */
6060 end = lop->lo_first;
6061 break;
6062 }
6063 if (first >= end)
6064 /*
6065 * There is no segment left to do, so
6066 * break out of this loop and then exit
6067 * the outer while() since first will be set
6068 * to end, which must equal init_end here.
6069 */
6070 break;
6071 }
6072 if (first < end) {
6073 /* Unlock this segment */
6074 (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
6075 NFSLCK_READ, first, end, NULL, p);
6076 nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
6077 first, end);
6078 }
6079 /*
6080 * Now move past this segment and look for any further
6081 * segment in the range, if there is one.
6082 */
6083 first = end;
6084 end = init_end;
6085 }
6086 }
6087
6088 /*
6089 * Do the local lock operation and update the rollback list, as required.
6090 * Perform the rollback and return the error if nfsvno_advlock() fails.
6091 */
6092 static int
nfsrv_dolocal(vnode_t vp,struct nfslockfile * lfp,int flags,int oldflags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)6093 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
6094 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
6095 {
6096 struct nfsrollback *rlp;
6097 int error = 0, ltype, oldltype;
6098
6099 if (flags & NFSLCK_WRITE)
6100 ltype = F_WRLCK;
6101 else if (flags & NFSLCK_READ)
6102 ltype = F_RDLCK;
6103 else
6104 ltype = F_UNLCK;
6105 if (oldflags & NFSLCK_WRITE)
6106 oldltype = F_WRLCK;
6107 else if (oldflags & NFSLCK_READ)
6108 oldltype = F_RDLCK;
6109 else
6110 oldltype = F_UNLCK;
6111 if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
6112 /* nothing to do */
6113 goto out;
6114 error = nfsvno_advlock(vp, ltype, first, end, p);
6115 if (error != 0) {
6116 if (cfp != NULL) {
6117 cfp->cl_clientid.lval[0] = 0;
6118 cfp->cl_clientid.lval[1] = 0;
6119 cfp->cl_first = 0;
6120 cfp->cl_end = NFS64BITSSET;
6121 cfp->cl_flags = NFSLCK_WRITE;
6122 cfp->cl_ownerlen = 5;
6123 NFSBCOPY("LOCAL", cfp->cl_owner, 5);
6124 }
6125 nfsrv_locallock_rollback(vp, lfp, p);
6126 } else if (ltype != F_UNLCK) {
6127 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
6128 M_WAITOK);
6129 rlp->rlck_first = first;
6130 rlp->rlck_end = end;
6131 rlp->rlck_type = oldltype;
6132 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
6133 }
6134
6135 out:
6136 NFSEXITCODE(error);
6137 return (error);
6138 }
6139
6140 /*
6141 * Roll back local lock changes and free up the rollback list.
6142 */
6143 static void
nfsrv_locallock_rollback(vnode_t vp,struct nfslockfile * lfp,NFSPROC_T * p)6144 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
6145 {
6146 struct nfsrollback *rlp, *nrlp;
6147
6148 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
6149 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
6150 rlp->rlck_end, p);
6151 free(rlp, M_NFSDROLLBACK);
6152 }
6153 LIST_INIT(&lfp->lf_rollback);
6154 }
6155
6156 /*
6157 * Update local lock list and delete rollback list (ie now committed to the
6158 * local locks). Most of the work is done by the internal function.
6159 */
6160 static void
nfsrv_locallock_commit(struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end)6161 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
6162 uint64_t end)
6163 {
6164 struct nfsrollback *rlp, *nrlp;
6165 struct nfslock *new_lop, *other_lop;
6166
6167 new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
6168 if (flags & (NFSLCK_READ | NFSLCK_WRITE))
6169 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
6170 M_WAITOK);
6171 else
6172 other_lop = NULL;
6173 new_lop->lo_flags = flags;
6174 new_lop->lo_first = first;
6175 new_lop->lo_end = end;
6176 nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
6177 if (new_lop != NULL)
6178 free(new_lop, M_NFSDLOCK);
6179 if (other_lop != NULL)
6180 free(other_lop, M_NFSDLOCK);
6181
6182 /* and get rid of the rollback list */
6183 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
6184 free(rlp, M_NFSDROLLBACK);
6185 LIST_INIT(&lfp->lf_rollback);
6186 }
6187
6188 /*
6189 * Lock the struct nfslockfile for local lock updating.
6190 */
6191 static void
nfsrv_locklf(struct nfslockfile * lfp)6192 nfsrv_locklf(struct nfslockfile *lfp)
6193 {
6194 int gotlock;
6195
6196 /* lf_usecount ensures *lfp won't be free'd */
6197 lfp->lf_usecount++;
6198 do {
6199 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6200 NFSSTATEMUTEXPTR, NULL);
6201 } while (gotlock == 0);
6202 lfp->lf_usecount--;
6203 }
6204
6205 /*
6206 * Unlock the struct nfslockfile after local lock updating.
6207 */
6208 static void
nfsrv_unlocklf(struct nfslockfile * lfp)6209 nfsrv_unlocklf(struct nfslockfile *lfp)
6210 {
6211
6212 nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6213 }
6214
6215 /*
6216 * Clear out all state for the NFSv4 server.
6217 * Must be called by a thread that can sleep when no nfsds are running.
6218 */
6219 void
nfsrv_throwawayallstate(NFSPROC_T * p)6220 nfsrv_throwawayallstate(NFSPROC_T *p)
6221 {
6222 struct nfsclient *clp, *nclp;
6223 struct nfslockfile *lfp, *nlfp;
6224 int i;
6225
6226 /*
6227 * For each client, clean out the state and then free the structure.
6228 */
6229 for (i = 0; i < nfsrv_clienthashsize; i++) {
6230 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
6231 nclp) {
6232 nfsrv_cleanclient(clp, p, false, NULL);
6233 nfsrv_freedeleglist(&clp->lc_deleg);
6234 nfsrv_freedeleglist(&clp->lc_olddeleg);
6235 free(clp->lc_stateid, M_NFSDCLIENT);
6236 free(clp, M_NFSDCLIENT);
6237 }
6238 }
6239
6240 /*
6241 * Also, free up any remaining lock file structures.
6242 */
6243 for (i = 0; i < nfsrv_lockhashsize; i++) {
6244 LIST_FOREACH_SAFE(lfp, &NFSD_VNET(nfslockhash)[i], lf_hash,
6245 nlfp) {
6246 printf("nfsd unload: fnd a lock file struct\n");
6247 nfsrv_freenfslockfile(lfp);
6248 }
6249 }
6250
6251 /* And get rid of the deviceid structures and layouts. */
6252 nfsrv_freealllayoutsanddevids();
6253 }
6254
6255 /*
6256 * Check the sequence# for the session and slot provided as an argument.
6257 * Also, renew the lease if the session will return NFS_OK.
6258 */
6259 int
nfsrv_checksequence(struct nfsrv_descript * nd,uint32_t sequenceid,uint32_t * highest_slotidp,uint32_t * target_highest_slotidp,int cache_this,uint32_t * sflagsp,NFSPROC_T * p)6260 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6261 uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6262 uint32_t *sflagsp, NFSPROC_T *p)
6263 {
6264 struct nfsdsession *sep;
6265 struct nfssessionhash *shp;
6266 int error;
6267
6268 shp = NFSSESSIONHASH(nd->nd_sessionid);
6269 NFSLOCKSESSION(shp);
6270 sep = nfsrv_findsession(nd->nd_sessionid);
6271 if (sep == NULL) {
6272 NFSUNLOCKSESSION(shp);
6273 return (NFSERR_BADSESSION);
6274 }
6275 error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6276 sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6277 if (error != 0) {
6278 NFSUNLOCKSESSION(shp);
6279 return (error);
6280 }
6281 if (cache_this != 0)
6282 nd->nd_flag |= ND_SAVEREPLY;
6283 /* Renew the lease. */
6284 sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6285 nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6286 nd->nd_flag |= ND_IMPLIEDCLID;
6287
6288 /* Handle the SP4_MECH_CRED case for NFSv4.1/4.2. */
6289 if ((sep->sess_clp->lc_flags & LCL_MACHCRED) != 0 &&
6290 (nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
6291 nd->nd_princlen == sep->sess_clp->lc_namelen &&
6292 !NFSBCMP(sep->sess_clp->lc_name, nd->nd_principal,
6293 nd->nd_princlen)) {
6294 nd->nd_flag |= ND_MACHCRED;
6295 NFSSET_OPBIT(&nd->nd_allowops, &sep->sess_clp->lc_allowops);
6296 }
6297
6298 /* Save maximum request and reply sizes. */
6299 nd->nd_maxreq = sep->sess_maxreq;
6300 nd->nd_maxresp = sep->sess_maxresp;
6301
6302 *sflagsp = 0;
6303 if (sep->sess_clp->lc_req.nr_client == NULL ||
6304 (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
6305 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
6306 NFSUNLOCKSESSION(shp);
6307 if (error == NFSERR_EXPIRED) {
6308 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6309 error = 0;
6310 } else if (error == NFSERR_ADMINREVOKED) {
6311 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6312 error = 0;
6313 }
6314 *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6315 return (0);
6316 }
6317
6318 /*
6319 * Check/set reclaim complete for this session/clientid.
6320 */
6321 int
nfsrv_checkreclaimcomplete(struct nfsrv_descript * nd,int onefs)6322 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6323 {
6324 struct nfsdsession *sep;
6325 struct nfssessionhash *shp;
6326 int error = 0;
6327
6328 shp = NFSSESSIONHASH(nd->nd_sessionid);
6329 NFSLOCKSTATE();
6330 NFSLOCKSESSION(shp);
6331 sep = nfsrv_findsession(nd->nd_sessionid);
6332 if (sep == NULL) {
6333 NFSUNLOCKSESSION(shp);
6334 NFSUNLOCKSTATE();
6335 return (NFSERR_BADSESSION);
6336 }
6337
6338 if (onefs != 0)
6339 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6340 /* Check to see if reclaim complete has already happened. */
6341 else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6342 error = NFSERR_COMPLETEALREADY;
6343 else {
6344 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6345 nfsrv_markreclaim(sep->sess_clp);
6346 }
6347 NFSUNLOCKSESSION(shp);
6348 NFSUNLOCKSTATE();
6349 return (error);
6350 }
6351
6352 /*
6353 * Cache the reply in a session slot.
6354 */
6355 void
nfsrv_cache_session(struct nfsrv_descript * nd,struct mbuf ** m)6356 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6357 {
6358 struct nfsdsession *sep;
6359 struct nfssessionhash *shp;
6360 char *buf, *cp;
6361 #ifdef INET
6362 struct sockaddr_in *sin;
6363 #endif
6364 #ifdef INET6
6365 struct sockaddr_in6 *sin6;
6366 #endif
6367
6368 shp = NFSSESSIONHASH(nd->nd_sessionid);
6369 NFSLOCKSESSION(shp);
6370 sep = nfsrv_findsession(nd->nd_sessionid);
6371 if (sep == NULL) {
6372 NFSUNLOCKSESSION(shp);
6373 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags &
6374 NFSNSF_GRACEOVER) != 0) {
6375 buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6376 switch (nd->nd_nam->sa_family) {
6377 #ifdef INET
6378 case AF_INET:
6379 sin = (struct sockaddr_in *)nd->nd_nam;
6380 cp = inet_ntop(sin->sin_family,
6381 &sin->sin_addr.s_addr, buf,
6382 INET6_ADDRSTRLEN);
6383 break;
6384 #endif
6385 #ifdef INET6
6386 case AF_INET6:
6387 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6388 cp = inet_ntop(sin6->sin6_family,
6389 &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6390 break;
6391 #endif
6392 default:
6393 cp = NULL;
6394 }
6395 if (cp != NULL)
6396 printf("nfsrv_cache_session: no session "
6397 "IPaddr=%s, check NFS clients for unique "
6398 "/etc/hostid's\n", cp);
6399 else
6400 printf("nfsrv_cache_session: no session, "
6401 "check NFS clients for unique "
6402 "/etc/hostid's\n");
6403 free(buf, M_TEMP);
6404 }
6405 m_freem(*m);
6406 return;
6407 }
6408 nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6409 m);
6410 NFSUNLOCKSESSION(shp);
6411 }
6412
6413 /*
6414 * Search for a session that matches the sessionid.
6415 */
6416 static struct nfsdsession *
nfsrv_findsession(uint8_t * sessionid)6417 nfsrv_findsession(uint8_t *sessionid)
6418 {
6419 struct nfsdsession *sep;
6420 struct nfssessionhash *shp;
6421
6422 shp = NFSSESSIONHASH(sessionid);
6423 LIST_FOREACH(sep, &shp->list, sess_hash) {
6424 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6425 break;
6426 }
6427 return (sep);
6428 }
6429
6430 /*
6431 * Destroy a session.
6432 */
6433 int
nfsrv_destroysession(struct nfsrv_descript * nd,uint8_t * sessionid)6434 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6435 {
6436 int error, igotlock, samesess;
6437
6438 samesess = 0;
6439 if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6440 (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6441 samesess = 1;
6442 if ((nd->nd_flag & ND_LASTOP) == 0)
6443 return (NFSERR_BADSESSION);
6444 }
6445
6446 /* Lock out other nfsd threads */
6447 NFSLOCKV4ROOTMUTEX();
6448 nfsv4_relref(&nfsv4rootfs_lock);
6449 do {
6450 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6451 NFSV4ROOTLOCKMUTEXPTR, NULL);
6452 } while (igotlock == 0);
6453 NFSUNLOCKV4ROOTMUTEX();
6454
6455 error = nfsrv_freesession(nd, NULL, sessionid, false, NULL);
6456 if (error == 0 && samesess != 0)
6457 nd->nd_flag &= ~ND_HASSEQUENCE;
6458
6459 NFSLOCKV4ROOTMUTEX();
6460 nfsv4_unlock(&nfsv4rootfs_lock, 1);
6461 NFSUNLOCKV4ROOTMUTEX();
6462 return (error);
6463 }
6464
6465 /*
6466 * Bind a connection to a session.
6467 * For now, only certain variants are supported, since the current session
6468 * structure can only handle a single backchannel entry, which will be
6469 * applied to all connections if it is set.
6470 */
6471 int
nfsrv_bindconnsess(struct nfsrv_descript * nd,uint8_t * sessionid,int * foreaftp)6472 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6473 {
6474 struct nfssessionhash *shp;
6475 struct nfsdsession *sep;
6476 struct nfsclient *clp;
6477 SVCXPRT *savxprt;
6478 int error;
6479
6480 error = 0;
6481 savxprt = NULL;
6482 shp = NFSSESSIONHASH(sessionid);
6483 NFSLOCKSTATE();
6484 NFSLOCKSESSION(shp);
6485 sep = nfsrv_findsession(sessionid);
6486 if (sep != NULL) {
6487 clp = sep->sess_clp;
6488 error = nfsrv_checkmachcred(NFSV4OP_BINDCONNTOSESS, nd, clp);
6489 if (error != 0)
6490 goto out;
6491 if (*foreaftp == NFSCDFC4_BACK ||
6492 *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6493 *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6494 /* Try to set up a backchannel. */
6495 if (clp->lc_req.nr_client == NULL) {
6496 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6497 "backchannel\n");
6498 clp->lc_req.nr_client = (struct __rpc_client *)
6499 clnt_bck_create(nd->nd_xprt->xp_socket,
6500 sep->sess_cbprogram, NFSV4_CBVERS);
6501 }
6502 if (clp->lc_req.nr_client != NULL) {
6503 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6504 "backchannel\n");
6505 savxprt = sep->sess_cbsess.nfsess_xprt;
6506 SVC_ACQUIRE(nd->nd_xprt);
6507 CLNT_ACQUIRE(clp->lc_req.nr_client);
6508 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
6509 /* Disable idle timeout. */
6510 nd->nd_xprt->xp_idletimeout = 0;
6511 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6512 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6513 clp->lc_flags |= LCL_DONEBINDCONN |
6514 LCL_NEEDSCBNULL;
6515 clp->lc_flags &= ~LCL_CBDOWN;
6516 if (*foreaftp == NFSCDFS4_BACK)
6517 *foreaftp = NFSCDFS4_BACK;
6518 else
6519 *foreaftp = NFSCDFS4_BOTH;
6520 } else if (*foreaftp != NFSCDFC4_BACK) {
6521 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6522 "up backchannel\n");
6523 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6524 clp->lc_flags |= LCL_DONEBINDCONN;
6525 *foreaftp = NFSCDFS4_FORE;
6526 } else {
6527 error = NFSERR_NOTSUPP;
6528 printf("nfsrv_bindconnsess: Can't add "
6529 "backchannel\n");
6530 }
6531 } else {
6532 NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6533 clp->lc_flags |= LCL_DONEBINDCONN;
6534 *foreaftp = NFSCDFS4_FORE;
6535 }
6536 } else
6537 error = NFSERR_BADSESSION;
6538 out:
6539 NFSUNLOCKSESSION(shp);
6540 NFSUNLOCKSTATE();
6541 if (savxprt != NULL)
6542 SVC_RELEASE(savxprt);
6543 return (error);
6544 }
6545
6546 /*
6547 * Free up a session structure.
6548 */
6549 static int
nfsrv_freesession(struct nfsrv_descript * nd,struct nfsdsession * sep,uint8_t * sessionid,bool locked,SVCXPRT ** old_xprtp)6550 nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
6551 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp)
6552 {
6553 struct nfssessionhash *shp;
6554 int i;
6555
6556 if (!locked)
6557 NFSLOCKSTATE();
6558 if (sep == NULL) {
6559 shp = NFSSESSIONHASH(sessionid);
6560 NFSLOCKSESSION(shp);
6561 sep = nfsrv_findsession(sessionid);
6562 } else {
6563 shp = NFSSESSIONHASH(sep->sess_sessionid);
6564 NFSLOCKSESSION(shp);
6565 }
6566 if (sep != NULL) {
6567 /* Check for the SP4_MACH_CRED case. */
6568 if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
6569 nd, sep->sess_clp) != 0) {
6570 NFSUNLOCKSESSION(shp);
6571 if (!locked)
6572 NFSUNLOCKSTATE();
6573 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
6574 }
6575
6576 sep->sess_refcnt--;
6577 if (sep->sess_refcnt > 0) {
6578 NFSUNLOCKSESSION(shp);
6579 if (!locked)
6580 NFSUNLOCKSTATE();
6581 return (NFSERR_BACKCHANBUSY);
6582 }
6583 LIST_REMOVE(sep, sess_hash);
6584 LIST_REMOVE(sep, sess_list);
6585 }
6586 NFSUNLOCKSESSION(shp);
6587 if (!locked)
6588 NFSUNLOCKSTATE();
6589 if (sep == NULL)
6590 return (NFSERR_BADSESSION);
6591 for (i = 0; i < NFSV4_SLOTS; i++)
6592 if (sep->sess_slots[i].nfssl_reply != NULL)
6593 m_freem(sep->sess_slots[i].nfssl_reply);
6594 if (!locked) {
6595 if (sep->sess_cbsess.nfsess_xprt != NULL)
6596 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6597 if (old_xprtp != NULL)
6598 *old_xprtp = NULL;
6599 } else if (old_xprtp != NULL)
6600 *old_xprtp = sep->sess_cbsess.nfsess_xprt;
6601 free(sep, M_NFSDSESSION);
6602 return (0);
6603 }
6604
6605 /*
6606 * Free a stateid.
6607 * RFC5661 says that it should fail when there are associated opens, locks
6608 * or delegations. Since stateids represent opens, I don't see how you can
6609 * free an open stateid (it will be free'd when closed), so this function
6610 * only works for lock stateids (freeing the lock_owner) or delegations.
6611 */
6612 int
nfsrv_freestateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6613 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6614 NFSPROC_T *p)
6615 {
6616 struct nfsclient *clp;
6617 struct nfsstate *stp;
6618 int error;
6619
6620 NFSLOCKSTATE();
6621 /*
6622 * Look up the stateid
6623 */
6624 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6625 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6626 if (error == 0) {
6627 /* First, check for a delegation. */
6628 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6629 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6630 NFSX_STATEIDOTHER))
6631 break;
6632 }
6633 if (stp != NULL) {
6634 nfsrv_freedeleg(stp);
6635 NFSUNLOCKSTATE();
6636 return (error);
6637 }
6638 }
6639 /* Not a delegation, try for a lock_owner. */
6640 if (error == 0)
6641 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6642 if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6643 NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6644 /* Not a lock_owner stateid. */
6645 error = NFSERR_LOCKSHELD;
6646 if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6647 error = NFSERR_LOCKSHELD;
6648 if (error == 0)
6649 nfsrv_freelockowner(stp, NULL, 0, p);
6650 NFSUNLOCKSTATE();
6651 return (error);
6652 }
6653
6654 /*
6655 * Test a stateid.
6656 */
6657 int
nfsrv_teststateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6658 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6659 NFSPROC_T *p)
6660 {
6661 struct nfsclient *clp;
6662 struct nfsstate *stp;
6663 int error;
6664
6665 NFSLOCKSTATE();
6666 /*
6667 * Look up the stateid
6668 */
6669 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6670 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6671 if (error == 0)
6672 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6673 if (error == 0 && stateidp->seqid != 0 &&
6674 SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6675 error = NFSERR_OLDSTATEID;
6676 NFSUNLOCKSTATE();
6677 return (error);
6678 }
6679
6680 /*
6681 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6682 */
6683 static int
nfsv4_setcbsequence(struct nfsrv_descript * nd,struct nfsclient * clp,int dont_replycache,struct nfsdsession ** sepp,int * slotposp)6684 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6685 int dont_replycache, struct nfsdsession **sepp, int *slotposp)
6686 {
6687 struct nfsdsession *sep;
6688 uint32_t *tl, slotseq = 0;
6689 int maxslot;
6690 uint8_t sessionid[NFSX_V4SESSIONID];
6691 int error;
6692
6693 error = nfsv4_getcbsession(clp, sepp);
6694 if (error != 0)
6695 return (error);
6696 sep = *sepp;
6697 nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
6698 &slotseq, sessionid, true);
6699 KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6700
6701 /* Build the Sequence arguments. */
6702 NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6703 bcopy(sessionid, tl, NFSX_V4SESSIONID);
6704 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6705 nd->nd_slotseq = tl;
6706 nd->nd_slotid = *slotposp;
6707 nd->nd_flag |= ND_HASSLOTID;
6708 *tl++ = txdr_unsigned(slotseq);
6709 *tl++ = txdr_unsigned(*slotposp);
6710 *tl++ = txdr_unsigned(maxslot);
6711 if (dont_replycache == 0)
6712 *tl++ = newnfs_true;
6713 else
6714 *tl++ = newnfs_false;
6715 *tl = 0; /* No referring call list, for now. */
6716 nd->nd_flag |= ND_HASSEQUENCE;
6717 return (0);
6718 }
6719
6720 /*
6721 * Get a session for the callback.
6722 */
6723 static int
nfsv4_getcbsession(struct nfsclient * clp,struct nfsdsession ** sepp)6724 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6725 {
6726 struct nfsdsession *sep;
6727
6728 NFSLOCKSTATE();
6729 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6730 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6731 break;
6732 }
6733 if (sep == NULL) {
6734 NFSUNLOCKSTATE();
6735 return (NFSERR_BADSESSION);
6736 }
6737 sep->sess_refcnt++;
6738 *sepp = sep;
6739 NFSUNLOCKSTATE();
6740 return (0);
6741 }
6742
6743 /*
6744 * Free up all backchannel xprts. This needs to be done when the nfsd threads
6745 * exit, since those transports will all be going away.
6746 * This is only called after all the nfsd threads are done performing RPCs,
6747 * so locking shouldn't be an issue.
6748 */
6749 void
nfsrv_freeallbackchannel_xprts(void)6750 nfsrv_freeallbackchannel_xprts(void)
6751 {
6752 struct nfsdsession *sep;
6753 struct nfsclient *clp;
6754 SVCXPRT *xprt;
6755 int i;
6756
6757 for (i = 0; i < nfsrv_clienthashsize; i++) {
6758 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
6759 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6760 xprt = sep->sess_cbsess.nfsess_xprt;
6761 sep->sess_cbsess.nfsess_xprt = NULL;
6762 if (xprt != NULL)
6763 SVC_RELEASE(xprt);
6764 }
6765 }
6766 }
6767 }
6768
6769 /*
6770 * Do a layout commit. Actually just call nfsrv_updatemdsattr().
6771 * I have no idea if the rest of these arguments will ever be useful?
6772 */
6773 int
nfsrv_layoutcommit(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int hasnewoff,uint64_t newoff,uint64_t offset,uint64_t len,int hasnewmtime,struct timespec * newmtimep,int reclaim,nfsv4stateid_t * stateidp,int maxcnt,char * layp,int * hasnewsizep,uint64_t * newsizep,struct ucred * cred,NFSPROC_T * p)6774 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
6775 int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
6776 int hasnewmtime, struct timespec *newmtimep, int reclaim,
6777 nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
6778 uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
6779 {
6780 struct nfsvattr na;
6781 int error;
6782
6783 error = nfsrv_updatemdsattr(vp, &na, p);
6784 if (error == 0) {
6785 *hasnewsizep = 1;
6786 *newsizep = na.na_size;
6787 }
6788 return (error);
6789 }
6790
6791 /*
6792 * Try and get a layout.
6793 */
6794 int
nfsrv_layoutget(struct nfsrv_descript * nd,vnode_t vp,struct nfsexstuff * exp,int layouttype,int * iomode,uint64_t * offset,uint64_t * len,uint64_t minlen,nfsv4stateid_t * stateidp,int maxcnt,int * retonclose,int * layoutlenp,char * layp,struct ucred * cred,NFSPROC_T * p)6795 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
6796 int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
6797 uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
6798 int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
6799 {
6800 struct nfslayouthash *lhyp;
6801 struct nfslayout *lyp;
6802 char *devid;
6803 fhandle_t fh, *dsfhp;
6804 int error, mirrorcnt;
6805
6806 if (nfsrv_devidcnt == 0)
6807 return (NFSERR_UNKNLAYOUTTYPE);
6808
6809 if (*offset != 0)
6810 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
6811 (uintmax_t)*len);
6812 error = nfsvno_getfh(vp, &fh, p);
6813 NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
6814 if (error != 0)
6815 return (error);
6816
6817 /*
6818 * For now, all layouts are for entire files.
6819 * Only issue Read/Write layouts if requested for a non-readonly fs.
6820 */
6821 if (NFSVNO_EXRDONLY(exp)) {
6822 if (*iomode == NFSLAYOUTIOMODE_RW)
6823 return (NFSERR_LAYOUTTRYLATER);
6824 *iomode = NFSLAYOUTIOMODE_READ;
6825 }
6826 if (*iomode != NFSLAYOUTIOMODE_RW)
6827 *iomode = NFSLAYOUTIOMODE_READ;
6828
6829 /*
6830 * Check to see if a write layout can be issued for this file.
6831 * This is used during mirror recovery to avoid RW layouts being
6832 * issued for a file while it is being copied to the recovered
6833 * mirror.
6834 */
6835 if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
6836 return (NFSERR_LAYOUTTRYLATER);
6837
6838 *retonclose = 0;
6839 *offset = 0;
6840 *len = UINT64_MAX;
6841
6842 /* First, see if a layout already exists and return if found. */
6843 lhyp = NFSLAYOUTHASH(&fh);
6844 NFSLOCKLAYOUT(lhyp);
6845 error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
6846 NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
6847 /*
6848 * Not sure if the seqid must be the same, so I won't check it.
6849 */
6850 if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
6851 stateidp->other[1] != lyp->lay_stateid.other[1] ||
6852 stateidp->other[2] != lyp->lay_stateid.other[2])) {
6853 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
6854 NFSUNLOCKLAYOUT(lhyp);
6855 NFSD_DEBUG(1, "ret bad stateid\n");
6856 return (NFSERR_BADSTATEID);
6857 }
6858 /*
6859 * I believe we get here because there is a race between
6860 * the client processing the CBLAYOUTRECALL and the layout
6861 * being deleted here on the server.
6862 * The client has now done a LayoutGet with a non-layout
6863 * stateid, as it would when there is no layout.
6864 * As such, free this layout and set error == NFSERR_BADSTATEID
6865 * so the code below will create a new layout structure as
6866 * would happen if no layout was found.
6867 * "lyp" will be set before being used below, but set it NULL
6868 * as a safety belt.
6869 */
6870 nfsrv_freelayout(&lhyp->list, lyp);
6871 lyp = NULL;
6872 error = NFSERR_BADSTATEID;
6873 }
6874 if (error == 0) {
6875 if (lyp->lay_layoutlen > maxcnt) {
6876 NFSUNLOCKLAYOUT(lhyp);
6877 NFSD_DEBUG(1, "ret layout too small\n");
6878 return (NFSERR_TOOSMALL);
6879 }
6880 if (*iomode == NFSLAYOUTIOMODE_RW) {
6881 if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
6882 NFSUNLOCKLAYOUT(lhyp);
6883 NFSD_DEBUG(1, "ret layout nospace\n");
6884 return (NFSERR_NOSPC);
6885 }
6886 lyp->lay_flags |= NFSLAY_RW;
6887 } else
6888 lyp->lay_flags |= NFSLAY_READ;
6889 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
6890 *layoutlenp = lyp->lay_layoutlen;
6891 if (++lyp->lay_stateid.seqid == 0)
6892 lyp->lay_stateid.seqid = 1;
6893 stateidp->seqid = lyp->lay_stateid.seqid;
6894 NFSUNLOCKLAYOUT(lhyp);
6895 NFSD_DEBUG(4, "ret fnd layout\n");
6896 return (0);
6897 }
6898 NFSUNLOCKLAYOUT(lhyp);
6899
6900 /* Find the device id and file handle. */
6901 dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6902 devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6903 error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
6904 NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
6905 if (error == 0) {
6906 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
6907 if (NFSX_V4FILELAYOUT > maxcnt)
6908 error = NFSERR_TOOSMALL;
6909 else
6910 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
6911 devid, vp->v_mount->mnt_stat.f_fsid);
6912 } else {
6913 if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
6914 error = NFSERR_TOOSMALL;
6915 else
6916 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
6917 &fh, dsfhp, devid,
6918 vp->v_mount->mnt_stat.f_fsid);
6919 }
6920 }
6921 free(dsfhp, M_TEMP);
6922 free(devid, M_TEMP);
6923 if (error != 0)
6924 return (error);
6925
6926 /*
6927 * Now, add this layout to the list.
6928 */
6929 error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
6930 NFSD_DEBUG(4, "layoutget addl=%d\n", error);
6931 /*
6932 * The lyp will be set to NULL by nfsrv_addlayout() if it
6933 * linked the new structure into the lists.
6934 */
6935 free(lyp, M_NFSDSTATE);
6936 return (error);
6937 }
6938
6939 /*
6940 * Generate a File Layout.
6941 */
6942 static struct nfslayout *
nfsrv_filelayout(struct nfsrv_descript * nd,int iomode,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6943 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
6944 fhandle_t *dsfhp, char *devid, fsid_t fs)
6945 {
6946 uint32_t *tl;
6947 struct nfslayout *lyp;
6948 uint64_t pattern_offset;
6949
6950 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
6951 M_WAITOK | M_ZERO);
6952 lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
6953 if (iomode == NFSLAYOUTIOMODE_RW)
6954 lyp->lay_flags = NFSLAY_RW;
6955 else
6956 lyp->lay_flags = NFSLAY_READ;
6957 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6958 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6959 lyp->lay_fsid = fs;
6960 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6961
6962 /* Fill in the xdr for the files layout. */
6963 tl = (uint32_t *)lyp->lay_xdr;
6964 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6965 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6966
6967 /* Set the stripe size to the maximum I/O size. */
6968 *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
6969 *tl++ = 0; /* 1st stripe index. */
6970 pattern_offset = 0;
6971 txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
6972 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6973 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6974 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6975 lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
6976 return (lyp);
6977 }
6978
6979 #define FLEX_OWNERID "999"
6980 #define FLEX_UID0 "0"
6981 /*
6982 * Generate a Flex File Layout.
6983 * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
6984 * string goes on the wire, it isn't supposed to be used by the client,
6985 * since this server uses tight coupling.
6986 * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
6987 * a string of "0". This works around the Linux Flex File Layout driver bug
6988 * which uses the synthetic uid/gid strings for the "tightly coupled" case.
6989 */
6990 static struct nfslayout *
nfsrv_flexlayout(struct nfsrv_descript * nd,int iomode,int mirrorcnt,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6991 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
6992 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
6993 {
6994 uint32_t *tl;
6995 struct nfslayout *lyp;
6996 uint64_t lenval;
6997 int i;
6998
6999 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
7000 M_NFSDSTATE, M_WAITOK | M_ZERO);
7001 lyp->lay_type = NFSLAYOUT_FLEXFILE;
7002 if (iomode == NFSLAYOUTIOMODE_RW)
7003 lyp->lay_flags = NFSLAY_RW;
7004 else
7005 lyp->lay_flags = NFSLAY_READ;
7006 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
7007 lyp->lay_clientid.qval = nd->nd_clientid.qval;
7008 lyp->lay_fsid = fs;
7009 lyp->lay_mirrorcnt = mirrorcnt;
7010 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
7011
7012 /* Fill in the xdr for the files layout. */
7013 tl = (uint32_t *)lyp->lay_xdr;
7014 lenval = 0;
7015 txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */
7016 *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */
7017 for (i = 0; i < mirrorcnt; i++) {
7018 *tl++ = txdr_unsigned(1); /* One stripe. */
7019 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
7020 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7021 devid += NFSX_V4DEVICEID;
7022 *tl++ = txdr_unsigned(1); /* Efficiency. */
7023 *tl++ = 0; /* Proxy Stateid. */
7024 *tl++ = 0x55555555;
7025 *tl++ = 0x55555555;
7026 *tl++ = 0x55555555;
7027 *tl++ = txdr_unsigned(1); /* 1 file handle. */
7028 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
7029 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
7030 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
7031 dsfhp++;
7032 if (nfsrv_flexlinuxhack != 0) {
7033 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
7034 *tl = 0; /* 0 pad string. */
7035 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7036 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
7037 *tl = 0; /* 0 pad string. */
7038 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7039 } else {
7040 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7041 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7042 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7043 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7044 }
7045 }
7046 *tl++ = txdr_unsigned(0); /* ff_flags. */
7047 *tl = txdr_unsigned(60); /* Status interval hint. */
7048 lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
7049 return (lyp);
7050 }
7051
7052 /*
7053 * Parse and process Flex File errors returned via LayoutReturn.
7054 */
7055 static void
nfsrv_flexlayouterr(struct nfsrv_descript * nd,uint32_t * layp,int maxcnt,NFSPROC_T * p)7056 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
7057 NFSPROC_T *p)
7058 {
7059 uint32_t *tl;
7060 int cnt, errcnt, i, j, opnum, stat;
7061 char devid[NFSX_V4DEVICEID];
7062
7063 tl = layp;
7064 maxcnt -= NFSX_UNSIGNED;
7065 if (maxcnt > 0)
7066 cnt = fxdr_unsigned(int, *tl++);
7067 else
7068 cnt = 0;
7069 NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
7070 for (i = 0; i < cnt; i++) {
7071 maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
7072 NFSX_UNSIGNED;
7073 if (maxcnt <= 0)
7074 break;
7075 /* Skip offset, length and stateid for now. */
7076 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
7077 errcnt = fxdr_unsigned(int, *tl++);
7078 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
7079 for (j = 0; j < errcnt; j++) {
7080 maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
7081 if (maxcnt < 0)
7082 break;
7083 NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
7084 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7085 stat = fxdr_unsigned(int, *tl++);
7086 opnum = fxdr_unsigned(int, *tl++);
7087 NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
7088 stat);
7089 /*
7090 * Except for NFSERR_ACCES, NFSERR_STALE and
7091 * NFSERR_NOSPC errors, disable the mirror.
7092 */
7093 if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
7094 stat != NFSERR_NOSPC)
7095 nfsrv_delds(devid, p);
7096
7097 /* For NFSERR_NOSPC, mark all devids and layouts. */
7098 if (stat == NFSERR_NOSPC)
7099 nfsrv_marknospc(devid, true);
7100 }
7101 }
7102 }
7103
7104 /*
7105 * This function removes all flex file layouts which has a mirror with
7106 * a device id that matches the argument.
7107 * Called when the DS represented by the device id has failed.
7108 */
7109 void
nfsrv_flexmirrordel(char * devid,NFSPROC_T * p)7110 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
7111 {
7112 uint32_t *tl;
7113 struct nfslayout *lyp, *nlyp;
7114 struct nfslayouthash *lhyp;
7115 struct nfslayouthead loclyp;
7116 int i, j;
7117
7118 NFSD_DEBUG(4, "flexmirrordel\n");
7119 /* Move all layouts found onto a local list. */
7120 TAILQ_INIT(&loclyp);
7121 for (i = 0; i < nfsrv_layouthashsize; i++) {
7122 lhyp = &nfslayouthash[i];
7123 NFSLOCKLAYOUT(lhyp);
7124 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7125 if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
7126 lyp->lay_mirrorcnt > 1) {
7127 NFSD_DEBUG(4, "possible match\n");
7128 tl = lyp->lay_xdr;
7129 tl += 3;
7130 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
7131 tl++;
7132 if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
7133 == 0) {
7134 /* Found one. */
7135 NFSD_DEBUG(4, "fnd one\n");
7136 TAILQ_REMOVE(&lhyp->list, lyp,
7137 lay_list);
7138 TAILQ_INSERT_HEAD(&loclyp, lyp,
7139 lay_list);
7140 break;
7141 }
7142 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
7143 NFSM_RNDUP(NFSX_V4PNFSFH) /
7144 NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
7145 }
7146 }
7147 }
7148 NFSUNLOCKLAYOUT(lhyp);
7149 }
7150
7151 /* Now, try to do a Layout recall for each one found. */
7152 TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
7153 NFSD_DEBUG(4, "do layout recall\n");
7154 /*
7155 * The layout stateid.seqid needs to be incremented
7156 * before doing a LAYOUT_RECALL callback.
7157 */
7158 if (++lyp->lay_stateid.seqid == 0)
7159 lyp->lay_stateid.seqid = 1;
7160 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
7161 &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
7162 nfsrv_freelayout(&loclyp, lyp);
7163 }
7164 }
7165
7166 /*
7167 * Do a recall callback to the client for this layout.
7168 */
7169 static int
nfsrv_recalllayout(nfsquad_t clid,nfsv4stateid_t * stateidp,fhandle_t * fhp,struct nfslayout * lyp,int changed,int laytype,NFSPROC_T * p)7170 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
7171 struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
7172 {
7173 struct nfsclient *clp;
7174 int error;
7175
7176 NFSD_DEBUG(4, "nfsrv_recalllayout\n");
7177 error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
7178 0, NULL, p);
7179 NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
7180 if (error != 0) {
7181 printf("nfsrv_recalllayout: getclient err=%d\n", error);
7182 return (error);
7183 }
7184 if ((clp->lc_flags & LCL_NFSV41) != 0) {
7185 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
7186 stateidp, changed, fhp, NULL, NULL, laytype, p);
7187 /* If lyp != NULL, handle an error return here. */
7188 if (error != 0 && lyp != NULL) {
7189 NFSDRECALLLOCK();
7190 /*
7191 * Mark it returned, since no layout recall
7192 * has been done.
7193 * All errors seem to be non-recoverable, although
7194 * NFSERR_NOMATCHLAYOUT is a normal event.
7195 */
7196 if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
7197 lyp->lay_flags |= NFSLAY_RETURNED;
7198 wakeup(lyp);
7199 }
7200 NFSDRECALLUNLOCK();
7201 if (error != NFSERR_NOMATCHLAYOUT)
7202 printf("nfsrv_recalllayout: err=%d\n", error);
7203 }
7204 } else
7205 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
7206 return (error);
7207 }
7208
7209 /*
7210 * Find a layout to recall when we exceed our high water mark.
7211 */
7212 void
nfsrv_recalloldlayout(NFSPROC_T * p)7213 nfsrv_recalloldlayout(NFSPROC_T *p)
7214 {
7215 struct nfslayouthash *lhyp;
7216 struct nfslayout *lyp;
7217 nfsquad_t clientid;
7218 nfsv4stateid_t stateid;
7219 fhandle_t fh;
7220 int error, laytype = 0, ret;
7221
7222 lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
7223 NFSLOCKLAYOUT(lhyp);
7224 TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
7225 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
7226 lyp->lay_flags |= NFSLAY_CALLB;
7227 /*
7228 * The layout stateid.seqid needs to be incremented
7229 * before doing a LAYOUT_RECALL callback.
7230 */
7231 if (++lyp->lay_stateid.seqid == 0)
7232 lyp->lay_stateid.seqid = 1;
7233 clientid = lyp->lay_clientid;
7234 stateid = lyp->lay_stateid;
7235 NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
7236 laytype = lyp->lay_type;
7237 break;
7238 }
7239 }
7240 NFSUNLOCKLAYOUT(lhyp);
7241 if (lyp != NULL) {
7242 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
7243 laytype, p);
7244 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
7245 NFSD_DEBUG(4, "recallold=%d\n", error);
7246 if (error != 0) {
7247 NFSLOCKLAYOUT(lhyp);
7248 /*
7249 * Since the hash list was unlocked, we need to
7250 * find it again.
7251 */
7252 ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
7253 &lyp);
7254 if (ret == 0 &&
7255 (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
7256 lyp->lay_stateid.other[0] == stateid.other[0] &&
7257 lyp->lay_stateid.other[1] == stateid.other[1] &&
7258 lyp->lay_stateid.other[2] == stateid.other[2]) {
7259 /*
7260 * The client no longer knows this layout, so
7261 * it can be free'd now.
7262 */
7263 if (error == NFSERR_NOMATCHLAYOUT)
7264 nfsrv_freelayout(&lhyp->list, lyp);
7265 else {
7266 /*
7267 * Leave it to be tried later by
7268 * clearing NFSLAY_CALLB and moving
7269 * it to the head of the list, so it
7270 * won't be tried again for a while.
7271 */
7272 lyp->lay_flags &= ~NFSLAY_CALLB;
7273 TAILQ_REMOVE(&lhyp->list, lyp,
7274 lay_list);
7275 TAILQ_INSERT_HEAD(&lhyp->list, lyp,
7276 lay_list);
7277 }
7278 }
7279 NFSUNLOCKLAYOUT(lhyp);
7280 }
7281 }
7282 }
7283
7284 /*
7285 * Try and return layout(s).
7286 */
7287 int
nfsrv_layoutreturn(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int iomode,uint64_t offset,uint64_t len,int reclaim,int kind,nfsv4stateid_t * stateidp,int maxcnt,uint32_t * layp,int * fndp,struct ucred * cred,NFSPROC_T * p)7288 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
7289 int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
7290 int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
7291 struct ucred *cred, NFSPROC_T *p)
7292 {
7293 struct nfsvattr na;
7294 struct nfslayouthash *lhyp;
7295 struct nfslayout *lyp;
7296 fhandle_t fh;
7297 int error = 0;
7298
7299 *fndp = 0;
7300 if (kind == NFSV4LAYOUTRET_FILE) {
7301 error = nfsvno_getfh(vp, &fh, p);
7302 if (error == 0) {
7303 error = nfsrv_updatemdsattr(vp, &na, p);
7304 if (error != 0)
7305 printf("nfsrv_layoutreturn: updatemdsattr"
7306 " failed=%d\n", error);
7307 }
7308 if (error == 0) {
7309 if (reclaim == newnfs_true) {
7310 error = nfsrv_checkgrace(NULL, NULL,
7311 NFSLCK_RECLAIM);
7312 if (error != NFSERR_NOGRACE)
7313 error = 0;
7314 return (error);
7315 }
7316 lhyp = NFSLAYOUTHASH(&fh);
7317 NFSDRECALLLOCK();
7318 NFSLOCKLAYOUT(lhyp);
7319 error = nfsrv_findlayout(&nd->nd_clientid, &fh,
7320 layouttype, p, &lyp);
7321 NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
7322 if (error == 0 &&
7323 stateidp->other[0] == lyp->lay_stateid.other[0] &&
7324 stateidp->other[1] == lyp->lay_stateid.other[1] &&
7325 stateidp->other[2] == lyp->lay_stateid.other[2]) {
7326 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
7327 " %x %x %x laystateid %d %x %x %x"
7328 " off=%ju len=%ju flgs=0x%x\n",
7329 stateidp->seqid, stateidp->other[0],
7330 stateidp->other[1], stateidp->other[2],
7331 lyp->lay_stateid.seqid,
7332 lyp->lay_stateid.other[0],
7333 lyp->lay_stateid.other[1],
7334 lyp->lay_stateid.other[2],
7335 (uintmax_t)offset, (uintmax_t)len,
7336 lyp->lay_flags);
7337 if (++lyp->lay_stateid.seqid == 0)
7338 lyp->lay_stateid.seqid = 1;
7339 stateidp->seqid = lyp->lay_stateid.seqid;
7340 if (offset == 0 && len == UINT64_MAX) {
7341 if ((iomode & NFSLAYOUTIOMODE_READ) !=
7342 0)
7343 lyp->lay_flags &= ~NFSLAY_READ;
7344 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7345 lyp->lay_flags &= ~NFSLAY_RW;
7346 if ((lyp->lay_flags & (NFSLAY_READ |
7347 NFSLAY_RW)) == 0)
7348 nfsrv_freelayout(&lhyp->list,
7349 lyp);
7350 else
7351 *fndp = 1;
7352 } else
7353 *fndp = 1;
7354 }
7355 NFSUNLOCKLAYOUT(lhyp);
7356 /* Search the nfsrv_recalllist for a match. */
7357 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
7358 if (NFSBCMP(&lyp->lay_fh, &fh,
7359 sizeof(fh)) == 0 &&
7360 lyp->lay_clientid.qval ==
7361 nd->nd_clientid.qval &&
7362 stateidp->other[0] ==
7363 lyp->lay_stateid.other[0] &&
7364 stateidp->other[1] ==
7365 lyp->lay_stateid.other[1] &&
7366 stateidp->other[2] ==
7367 lyp->lay_stateid.other[2]) {
7368 lyp->lay_flags |= NFSLAY_RETURNED;
7369 wakeup(lyp);
7370 error = 0;
7371 }
7372 }
7373 NFSDRECALLUNLOCK();
7374 }
7375 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
7376 nfsrv_flexlayouterr(nd, layp, maxcnt, p);
7377 } else if (kind == NFSV4LAYOUTRET_FSID)
7378 nfsrv_freelayouts(&nd->nd_clientid,
7379 &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
7380 else if (kind == NFSV4LAYOUTRET_ALL)
7381 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
7382 else
7383 error = NFSERR_INVAL;
7384 if (error == -1)
7385 error = 0;
7386 return (error);
7387 }
7388
7389 /*
7390 * Look for an existing layout.
7391 */
7392 static int
nfsrv_findlayout(nfsquad_t * clientidp,fhandle_t * fhp,int laytype,NFSPROC_T * p,struct nfslayout ** lypp)7393 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
7394 NFSPROC_T *p, struct nfslayout **lypp)
7395 {
7396 struct nfslayouthash *lhyp;
7397 struct nfslayout *lyp;
7398 int ret;
7399
7400 *lypp = NULL;
7401 ret = 0;
7402 lhyp = NFSLAYOUTHASH(fhp);
7403 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
7404 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7405 lyp->lay_clientid.qval == clientidp->qval &&
7406 lyp->lay_type == laytype)
7407 break;
7408 }
7409 if (lyp != NULL)
7410 *lypp = lyp;
7411 else
7412 ret = -1;
7413 return (ret);
7414 }
7415
7416 /*
7417 * Add the new layout, as required.
7418 */
7419 static int
nfsrv_addlayout(struct nfsrv_descript * nd,struct nfslayout ** lypp,nfsv4stateid_t * stateidp,char * layp,int * layoutlenp,NFSPROC_T * p)7420 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
7421 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
7422 {
7423 struct nfsclient *clp;
7424 struct nfslayouthash *lhyp;
7425 struct nfslayout *lyp, *nlyp;
7426 fhandle_t *fhp;
7427 int error;
7428
7429 KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
7430 ("nfsrv_layoutget: no nd_clientid\n"));
7431 lyp = *lypp;
7432 fhp = &lyp->lay_fh;
7433 NFSLOCKSTATE();
7434 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
7435 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
7436 if (error != 0) {
7437 NFSUNLOCKSTATE();
7438 return (error);
7439 }
7440 lyp->lay_stateid.seqid = stateidp->seqid = 1;
7441 lyp->lay_stateid.other[0] = stateidp->other[0] =
7442 clp->lc_clientid.lval[0];
7443 lyp->lay_stateid.other[1] = stateidp->other[1] =
7444 clp->lc_clientid.lval[1];
7445 lyp->lay_stateid.other[2] = stateidp->other[2] =
7446 nfsrv_nextstateindex(clp);
7447 NFSUNLOCKSTATE();
7448
7449 lhyp = NFSLAYOUTHASH(fhp);
7450 NFSLOCKLAYOUT(lhyp);
7451 TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
7452 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7453 nlyp->lay_clientid.qval == nd->nd_clientid.qval)
7454 break;
7455 }
7456 if (nlyp != NULL) {
7457 /* A layout already exists, so use it. */
7458 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
7459 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
7460 *layoutlenp = nlyp->lay_layoutlen;
7461 if (++nlyp->lay_stateid.seqid == 0)
7462 nlyp->lay_stateid.seqid = 1;
7463 stateidp->seqid = nlyp->lay_stateid.seqid;
7464 stateidp->other[0] = nlyp->lay_stateid.other[0];
7465 stateidp->other[1] = nlyp->lay_stateid.other[1];
7466 stateidp->other[2] = nlyp->lay_stateid.other[2];
7467 NFSUNLOCKLAYOUT(lhyp);
7468 return (0);
7469 }
7470
7471 /* Insert the new layout in the lists. */
7472 *lypp = NULL;
7473 atomic_add_int(&nfsrv_layoutcnt, 1);
7474 NFSD_VNET(nfsstatsv1_p)->srvlayouts++;
7475 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
7476 *layoutlenp = lyp->lay_layoutlen;
7477 TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
7478 NFSUNLOCKLAYOUT(lhyp);
7479 return (0);
7480 }
7481
7482 /*
7483 * Get the devinfo for a deviceid.
7484 */
7485 int
nfsrv_getdevinfo(char * devid,int layouttype,uint32_t * maxcnt,uint32_t * notify,int * devaddrlen,char ** devaddr)7486 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
7487 uint32_t *notify, int *devaddrlen, char **devaddr)
7488 {
7489 struct nfsdevice *ds;
7490
7491 if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
7492 NFSLAYOUT_FLEXFILE) ||
7493 (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
7494 return (NFSERR_UNKNLAYOUTTYPE);
7495
7496 /*
7497 * Now, search for the device id. Note that the structures won't go
7498 * away, but the order changes in the list. As such, the lock only
7499 * needs to be held during the search through the list.
7500 */
7501 NFSDDSLOCK();
7502 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7503 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
7504 ds->nfsdev_nmp != NULL)
7505 break;
7506 }
7507 NFSDDSUNLOCK();
7508 if (ds == NULL)
7509 return (NFSERR_NOENT);
7510
7511 /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
7512 *devaddrlen = 0;
7513 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
7514 *devaddrlen = ds->nfsdev_fileaddrlen;
7515 *devaddr = ds->nfsdev_fileaddr;
7516 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7517 *devaddrlen = ds->nfsdev_flexaddrlen;
7518 *devaddr = ds->nfsdev_flexaddr;
7519 }
7520 if (*devaddrlen == 0)
7521 return (NFSERR_UNKNLAYOUTTYPE);
7522
7523 /*
7524 * The XDR overhead is 3 unsigned values: layout_type,
7525 * length_of_address and notify bitmap.
7526 * If the notify array is changed to not all zeros, the
7527 * count of unsigned values must be increased.
7528 */
7529 if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
7530 3 * NFSX_UNSIGNED) {
7531 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
7532 return (NFSERR_TOOSMALL);
7533 }
7534 return (0);
7535 }
7536
7537 /*
7538 * Free a list of layout state structures.
7539 */
7540 static void
nfsrv_freelayoutlist(nfsquad_t clientid)7541 nfsrv_freelayoutlist(nfsquad_t clientid)
7542 {
7543 struct nfslayouthash *lhyp;
7544 struct nfslayout *lyp, *nlyp;
7545 int i;
7546
7547 for (i = 0; i < nfsrv_layouthashsize; i++) {
7548 lhyp = &nfslayouthash[i];
7549 NFSLOCKLAYOUT(lhyp);
7550 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7551 if (lyp->lay_clientid.qval == clientid.qval)
7552 nfsrv_freelayout(&lhyp->list, lyp);
7553 }
7554 NFSUNLOCKLAYOUT(lhyp);
7555 }
7556 }
7557
7558 /*
7559 * Free up a layout.
7560 */
7561 static void
nfsrv_freelayout(struct nfslayouthead * lhp,struct nfslayout * lyp)7562 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
7563 {
7564
7565 NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
7566 atomic_add_int(&nfsrv_layoutcnt, -1);
7567 NFSD_VNET(nfsstatsv1_p)->srvlayouts--;
7568 TAILQ_REMOVE(lhp, lyp, lay_list);
7569 free(lyp, M_NFSDSTATE);
7570 }
7571
7572 /*
7573 * Free up a device id.
7574 */
7575 void
nfsrv_freeonedevid(struct nfsdevice * ds)7576 nfsrv_freeonedevid(struct nfsdevice *ds)
7577 {
7578 int i;
7579
7580 atomic_add_int(&nfsrv_devidcnt, -1);
7581 vrele(ds->nfsdev_dvp);
7582 for (i = 0; i < nfsrv_dsdirsize; i++)
7583 if (ds->nfsdev_dsdir[i] != NULL)
7584 vrele(ds->nfsdev_dsdir[i]);
7585 free(ds->nfsdev_fileaddr, M_NFSDSTATE);
7586 free(ds->nfsdev_flexaddr, M_NFSDSTATE);
7587 free(ds->nfsdev_host, M_NFSDSTATE);
7588 free(ds, M_NFSDSTATE);
7589 }
7590
7591 /*
7592 * Free up a device id and its mirrors.
7593 */
7594 static void
nfsrv_freedevid(struct nfsdevice * ds)7595 nfsrv_freedevid(struct nfsdevice *ds)
7596 {
7597
7598 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
7599 nfsrv_freeonedevid(ds);
7600 }
7601
7602 /*
7603 * Free all layouts and device ids.
7604 * Done when the nfsd threads are shut down since there may be a new
7605 * modified device id list created when the nfsd is restarted.
7606 */
7607 void
nfsrv_freealllayoutsanddevids(void)7608 nfsrv_freealllayoutsanddevids(void)
7609 {
7610 struct nfsdontlist *mrp, *nmrp;
7611 struct nfslayout *lyp, *nlyp;
7612
7613 /* Get rid of the deviceid structures. */
7614 nfsrv_freealldevids();
7615 TAILQ_INIT(&nfsrv_devidhead);
7616 nfsrv_devidcnt = 0;
7617
7618 /* Get rid of all layouts. */
7619 nfsrv_freealllayouts();
7620
7621 /* Get rid of any nfsdontlist entries. */
7622 LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
7623 free(mrp, M_NFSDSTATE);
7624 LIST_INIT(&nfsrv_dontlisthead);
7625 nfsrv_dontlistlen = 0;
7626
7627 /* Free layouts in the recall list. */
7628 TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
7629 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
7630 TAILQ_INIT(&nfsrv_recalllisthead);
7631 }
7632
7633 /*
7634 * Free layouts that match the arguments.
7635 */
7636 static void
nfsrv_freelayouts(nfsquad_t * clid,fsid_t * fs,int laytype,int iomode)7637 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
7638 {
7639 struct nfslayouthash *lhyp;
7640 struct nfslayout *lyp, *nlyp;
7641 int i;
7642
7643 for (i = 0; i < nfsrv_layouthashsize; i++) {
7644 lhyp = &nfslayouthash[i];
7645 NFSLOCKLAYOUT(lhyp);
7646 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7647 if (clid->qval != lyp->lay_clientid.qval)
7648 continue;
7649 if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
7650 continue;
7651 if (laytype != lyp->lay_type)
7652 continue;
7653 if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
7654 lyp->lay_flags &= ~NFSLAY_READ;
7655 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7656 lyp->lay_flags &= ~NFSLAY_RW;
7657 if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
7658 nfsrv_freelayout(&lhyp->list, lyp);
7659 }
7660 NFSUNLOCKLAYOUT(lhyp);
7661 }
7662 }
7663
7664 /*
7665 * Free all layouts for the argument file.
7666 */
7667 void
nfsrv_freefilelayouts(fhandle_t * fhp)7668 nfsrv_freefilelayouts(fhandle_t *fhp)
7669 {
7670 struct nfslayouthash *lhyp;
7671 struct nfslayout *lyp, *nlyp;
7672
7673 lhyp = NFSLAYOUTHASH(fhp);
7674 NFSLOCKLAYOUT(lhyp);
7675 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7676 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
7677 nfsrv_freelayout(&lhyp->list, lyp);
7678 }
7679 NFSUNLOCKLAYOUT(lhyp);
7680 }
7681
7682 /*
7683 * Free all layouts.
7684 */
7685 static void
nfsrv_freealllayouts(void)7686 nfsrv_freealllayouts(void)
7687 {
7688 struct nfslayouthash *lhyp;
7689 struct nfslayout *lyp, *nlyp;
7690 int i;
7691
7692 for (i = 0; i < nfsrv_layouthashsize; i++) {
7693 lhyp = &nfslayouthash[i];
7694 NFSLOCKLAYOUT(lhyp);
7695 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
7696 nfsrv_freelayout(&lhyp->list, lyp);
7697 NFSUNLOCKLAYOUT(lhyp);
7698 }
7699 }
7700
7701 /*
7702 * Look up the mount path for the DS server.
7703 */
7704 static int
nfsrv_setdsserver(char * dspathp,char * mdspathp,NFSPROC_T * p,struct nfsdevice ** dsp)7705 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
7706 struct nfsdevice **dsp)
7707 {
7708 struct nameidata nd;
7709 struct nfsdevice *ds;
7710 struct mount *mp;
7711 int error, i;
7712 char *dsdirpath;
7713 size_t dsdirsize;
7714
7715 NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
7716 *dsp = NULL;
7717 if (jailed(p->td_ucred)) {
7718 printf("A pNFS nfsd cannot run in a jail\n");
7719 return (EPERM);
7720 }
7721 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
7722 dspathp);
7723 error = namei(&nd);
7724 NFSD_DEBUG(4, "lookup=%d\n", error);
7725 if (error != 0)
7726 return (error);
7727 if (nd.ni_vp->v_type != VDIR) {
7728 vput(nd.ni_vp);
7729 NFSD_DEBUG(4, "dspath not dir\n");
7730 return (ENOTDIR);
7731 }
7732 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7733 vput(nd.ni_vp);
7734 NFSD_DEBUG(4, "dspath not an NFS mount\n");
7735 return (ENXIO);
7736 }
7737
7738 /*
7739 * Allocate a DS server structure with the NFS mounted directory
7740 * vnode reference counted, so that a non-forced dismount will
7741 * fail with EBUSY.
7742 * This structure is always linked into the list, even if an error
7743 * is being returned. The caller will free the entire list upon
7744 * an error return.
7745 */
7746 *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
7747 M_NFSDSTATE, M_WAITOK | M_ZERO);
7748 ds->nfsdev_dvp = nd.ni_vp;
7749 ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
7750 NFSVOPUNLOCK(nd.ni_vp);
7751
7752 dsdirsize = strlen(dspathp) + 16;
7753 dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
7754 /* Now, create the DS directory structures. */
7755 for (i = 0; i < nfsrv_dsdirsize; i++) {
7756 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
7757 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7758 UIO_SYSSPACE, dsdirpath);
7759 error = namei(&nd);
7760 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
7761 if (error != 0)
7762 break;
7763 if (nd.ni_vp->v_type != VDIR) {
7764 vput(nd.ni_vp);
7765 error = ENOTDIR;
7766 NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
7767 break;
7768 }
7769 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7770 vput(nd.ni_vp);
7771 error = ENXIO;
7772 NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
7773 break;
7774 }
7775 ds->nfsdev_dsdir[i] = nd.ni_vp;
7776 NFSVOPUNLOCK(nd.ni_vp);
7777 }
7778 free(dsdirpath, M_TEMP);
7779
7780 if (strlen(mdspathp) > 0) {
7781 /*
7782 * This DS stores file for a specific MDS exported file
7783 * system.
7784 */
7785 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7786 UIO_SYSSPACE, mdspathp);
7787 error = namei(&nd);
7788 NFSD_DEBUG(4, "mds lookup=%d\n", error);
7789 if (error != 0)
7790 goto out;
7791 if (nd.ni_vp->v_type != VDIR) {
7792 vput(nd.ni_vp);
7793 error = ENOTDIR;
7794 NFSD_DEBUG(4, "mdspath not dir\n");
7795 goto out;
7796 }
7797 mp = nd.ni_vp->v_mount;
7798 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
7799 vput(nd.ni_vp);
7800 error = ENXIO;
7801 NFSD_DEBUG(4, "mdspath not an exported fs\n");
7802 goto out;
7803 }
7804 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
7805 ds->nfsdev_mdsisset = 1;
7806 vput(nd.ni_vp);
7807 }
7808
7809 out:
7810 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
7811 atomic_add_int(&nfsrv_devidcnt, 1);
7812 return (error);
7813 }
7814
7815 /*
7816 * Look up the mount path for the DS server and delete it.
7817 */
7818 int
nfsrv_deldsserver(int op,char * dspathp,NFSPROC_T * p)7819 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
7820 {
7821 struct mount *mp;
7822 struct nfsmount *nmp;
7823 struct nfsdevice *ds;
7824 int error;
7825
7826 NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
7827 /*
7828 * Search for the path in the mount list. Avoid looking the path
7829 * up, since this mount point may be hung, with associated locked
7830 * vnodes, etc.
7831 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
7832 * until this completes.
7833 * As noted in the man page, this should be done before any forced
7834 * dismount on the mount point, but at least the handshake on
7835 * NFSMNTP_CANCELRPCS should make it safe.
7836 */
7837 error = 0;
7838 ds = NULL;
7839 nmp = NULL;
7840 mtx_lock(&mountlist_mtx);
7841 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
7842 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
7843 strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
7844 mp->mnt_data != NULL) {
7845 nmp = VFSTONFS(mp);
7846 NFSLOCKMNT(nmp);
7847 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7848 NFSMNTP_CANCELRPCS)) == 0) {
7849 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7850 NFSUNLOCKMNT(nmp);
7851 } else {
7852 NFSUNLOCKMNT(nmp);
7853 nmp = NULL;
7854 }
7855 break;
7856 }
7857 }
7858 mtx_unlock(&mountlist_mtx);
7859
7860 if (nmp != NULL) {
7861 ds = nfsrv_deldsnmp(op, nmp, p);
7862 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
7863 if (ds != NULL) {
7864 nfsrv_killrpcs(nmp);
7865 NFSD_DEBUG(4, "aft killrpcs\n");
7866 } else
7867 error = ENXIO;
7868 NFSLOCKMNT(nmp);
7869 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7870 wakeup(nmp);
7871 NFSUNLOCKMNT(nmp);
7872 } else
7873 error = EINVAL;
7874 return (error);
7875 }
7876
7877 /*
7878 * Search for and remove a DS entry which matches the "nmp" argument.
7879 * The nfsdevice structure pointer is returned so that the caller can
7880 * free it via nfsrv_freeonedevid().
7881 * For the forced case, do not try to do LayoutRecalls, since the server
7882 * must be shut down now anyhow.
7883 */
7884 struct nfsdevice *
nfsrv_deldsnmp(int op,struct nfsmount * nmp,NFSPROC_T * p)7885 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
7886 {
7887 struct nfsdevice *fndds;
7888
7889 NFSD_DEBUG(4, "deldsdvp\n");
7890 NFSDDSLOCK();
7891 if (op == PNFSDOP_FORCEDELDS)
7892 fndds = nfsv4_findmirror(nmp);
7893 else
7894 fndds = nfsrv_findmirroredds(nmp);
7895 if (fndds != NULL)
7896 nfsrv_deleteds(fndds);
7897 NFSDDSUNLOCK();
7898 if (fndds != NULL) {
7899 if (op != PNFSDOP_FORCEDELDS)
7900 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7901 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7902 }
7903 return (fndds);
7904 }
7905
7906 /*
7907 * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
7908 * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
7909 * point.
7910 * Also, returns an error instead of the nfsdevice found.
7911 */
7912 int
nfsrv_delds(char * devid,NFSPROC_T * p)7913 nfsrv_delds(char *devid, NFSPROC_T *p)
7914 {
7915 struct nfsdevice *ds, *fndds;
7916 struct nfsmount *nmp;
7917 int fndmirror;
7918
7919 NFSD_DEBUG(4, "delds\n");
7920 /*
7921 * Search the DS server list for a match with devid.
7922 * Remove the DS entry if found and there is a mirror.
7923 */
7924 fndds = NULL;
7925 nmp = NULL;
7926 fndmirror = 0;
7927 NFSDDSLOCK();
7928 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7929 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
7930 ds->nfsdev_nmp != NULL) {
7931 NFSD_DEBUG(4, "fnd main ds\n");
7932 fndds = ds;
7933 break;
7934 }
7935 }
7936 if (fndds == NULL) {
7937 NFSDDSUNLOCK();
7938 return (ENXIO);
7939 }
7940 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
7941 fndmirror = 1;
7942 else if (fndds->nfsdev_mdsisset != 0) {
7943 /* For the fsid is set case, search for a mirror. */
7944 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7945 if (ds != fndds && ds->nfsdev_nmp != NULL &&
7946 ds->nfsdev_mdsisset != 0 &&
7947 fsidcmp(&ds->nfsdev_mdsfsid,
7948 &fndds->nfsdev_mdsfsid) == 0) {
7949 fndmirror = 1;
7950 break;
7951 }
7952 }
7953 }
7954 if (fndmirror != 0) {
7955 nmp = fndds->nfsdev_nmp;
7956 NFSLOCKMNT(nmp);
7957 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7958 NFSMNTP_CANCELRPCS)) == 0) {
7959 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7960 NFSUNLOCKMNT(nmp);
7961 nfsrv_deleteds(fndds);
7962 } else {
7963 NFSUNLOCKMNT(nmp);
7964 nmp = NULL;
7965 }
7966 }
7967 NFSDDSUNLOCK();
7968 if (nmp != NULL) {
7969 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7970 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7971 nfsrv_killrpcs(nmp);
7972 NFSLOCKMNT(nmp);
7973 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7974 wakeup(nmp);
7975 NFSUNLOCKMNT(nmp);
7976 return (0);
7977 }
7978 return (ENXIO);
7979 }
7980
7981 /*
7982 * Mark a DS as disabled by setting nfsdev_nmp = NULL.
7983 */
7984 static void
nfsrv_deleteds(struct nfsdevice * fndds)7985 nfsrv_deleteds(struct nfsdevice *fndds)
7986 {
7987
7988 NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
7989 fndds->nfsdev_nmp = NULL;
7990 if (fndds->nfsdev_mdsisset == 0)
7991 nfsrv_faildscnt--;
7992 }
7993
7994 /*
7995 * Fill in the addr structures for the File and Flex File layouts.
7996 */
7997 static void
nfsrv_allocdevid(struct nfsdevice * ds,char * addr,char * dnshost)7998 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
7999 {
8000 uint32_t *tl;
8001 char *netprot;
8002 int addrlen;
8003 static uint64_t new_devid = 0;
8004
8005 if (strchr(addr, ':') != NULL)
8006 netprot = "tcp6";
8007 else
8008 netprot = "tcp";
8009
8010 /* Fill in the device id. */
8011 NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
8012 new_devid++;
8013 NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
8014 sizeof(new_devid));
8015
8016 /*
8017 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
8018 * as defined in RFC5661) in XDR.
8019 */
8020 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8021 6 * NFSX_UNSIGNED;
8022 NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
8023 ds->nfsdev_fileaddrlen = addrlen;
8024 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8025 ds->nfsdev_fileaddr = (char *)tl;
8026 *tl++ = txdr_unsigned(1); /* One stripe with index 0. */
8027 *tl++ = 0;
8028 *tl++ = txdr_unsigned(1); /* One multipath list */
8029 *tl++ = txdr_unsigned(1); /* with one entry in it. */
8030 /* The netaddr for this one entry. */
8031 *tl++ = txdr_unsigned(strlen(netprot));
8032 NFSBCOPY(netprot, tl, strlen(netprot));
8033 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8034 *tl++ = txdr_unsigned(strlen(addr));
8035 NFSBCOPY(addr, tl, strlen(addr));
8036
8037 /*
8038 * Fill in the flex file addr (actually the ff_device_addr4
8039 * as defined for Flexible File Layout) in XDR.
8040 */
8041 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8042 14 * NFSX_UNSIGNED;
8043 ds->nfsdev_flexaddrlen = addrlen;
8044 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8045 ds->nfsdev_flexaddr = (char *)tl;
8046 *tl++ = txdr_unsigned(1); /* One multipath entry. */
8047 /* The netaddr for this one entry. */
8048 *tl++ = txdr_unsigned(strlen(netprot));
8049 NFSBCOPY(netprot, tl, strlen(netprot));
8050 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8051 *tl++ = txdr_unsigned(strlen(addr));
8052 NFSBCOPY(addr, tl, strlen(addr));
8053 tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
8054 *tl++ = txdr_unsigned(2); /* Two NFS Versions. */
8055 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
8056 *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
8057 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
8058 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
8059 *tl++ = newnfs_true; /* Tightly coupled. */
8060 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
8061 *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
8062 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
8063 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
8064 *tl = newnfs_true; /* Tightly coupled. */
8065
8066 ds->nfsdev_hostnamelen = strlen(dnshost);
8067 ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
8068 M_WAITOK);
8069 NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
8070 }
8071
8072 /*
8073 * Create the device id list.
8074 * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
8075 * is misconfigured.
8076 */
8077 int
nfsrv_createdevids(struct nfsd_nfsd_args * args,NFSPROC_T * p)8078 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
8079 {
8080 struct nfsdevice *ds;
8081 char *addrp, *dnshostp, *dspathp, *mdspathp;
8082 int error, i;
8083
8084 addrp = args->addr;
8085 dnshostp = args->dnshost;
8086 dspathp = args->dspath;
8087 mdspathp = args->mdspath;
8088 nfsrv_maxpnfsmirror = args->mirrorcnt;
8089 if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
8090 mdspathp == NULL)
8091 return (0);
8092
8093 /*
8094 * Loop around for each nul-terminated string in args->addr,
8095 * args->dnshost, args->dnspath and args->mdspath.
8096 */
8097 while (addrp < (args->addr + args->addrlen) &&
8098 dnshostp < (args->dnshost + args->dnshostlen) &&
8099 dspathp < (args->dspath + args->dspathlen) &&
8100 mdspathp < (args->mdspath + args->mdspathlen)) {
8101 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
8102 if (error != 0) {
8103 /* Free all DS servers. */
8104 nfsrv_freealldevids();
8105 nfsrv_devidcnt = 0;
8106 return (ENXIO);
8107 }
8108 nfsrv_allocdevid(ds, addrp, dnshostp);
8109 addrp += (strlen(addrp) + 1);
8110 dnshostp += (strlen(dnshostp) + 1);
8111 dspathp += (strlen(dspathp) + 1);
8112 mdspathp += (strlen(mdspathp) + 1);
8113 }
8114 if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
8115 /* Free all DS servers. */
8116 nfsrv_freealldevids();
8117 nfsrv_devidcnt = 0;
8118 nfsrv_maxpnfsmirror = 1;
8119 return (ENXIO);
8120 }
8121 /* We can fail at most one less DS than the mirror level. */
8122 nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
8123
8124 /*
8125 * Allocate the nfslayout hash table now, since this is a pNFS server.
8126 * Make it 1% of the high water mark and at least 100.
8127 */
8128 if (nfslayouthash == NULL) {
8129 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
8130 if (nfsrv_layouthashsize < 100)
8131 nfsrv_layouthashsize = 100;
8132 nfslayouthash = mallocarray(nfsrv_layouthashsize,
8133 sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
8134 M_ZERO);
8135 for (i = 0; i < nfsrv_layouthashsize; i++) {
8136 mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
8137 TAILQ_INIT(&nfslayouthash[i].list);
8138 }
8139 }
8140 return (0);
8141 }
8142
8143 /*
8144 * Free all device ids.
8145 */
8146 static void
nfsrv_freealldevids(void)8147 nfsrv_freealldevids(void)
8148 {
8149 struct nfsdevice *ds, *nds;
8150
8151 TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
8152 nfsrv_freedevid(ds);
8153 }
8154
8155 /*
8156 * Check to see if there is a Read/Write Layout plus either:
8157 * - A Write Delegation
8158 * or
8159 * - An Open with Write_access.
8160 * Return 1 if this is the case and 0 otherwise.
8161 * This function is used by nfsrv_proxyds() to decide if doing a Proxy
8162 * Getattr RPC to the Data Server (DS) is necessary.
8163 */
8164 #define NFSCLIDVECSIZE 6
8165 int
nfsrv_checkdsattr(vnode_t vp,NFSPROC_T * p)8166 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
8167 {
8168 fhandle_t fh, *tfhp;
8169 struct nfsstate *stp;
8170 struct nfslayout *lyp;
8171 struct nfslayouthash *lhyp;
8172 struct nfslockhashhead *hp;
8173 struct nfslockfile *lfp;
8174 nfsquad_t clid[NFSCLIDVECSIZE];
8175 int clidcnt, ret;
8176
8177 ret = nfsvno_getfh(vp, &fh, p);
8178 if (ret != 0)
8179 return (0);
8180
8181 /* First check for a Read/Write Layout. */
8182 clidcnt = 0;
8183 lhyp = NFSLAYOUTHASH(&fh);
8184 NFSLOCKLAYOUT(lhyp);
8185 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8186 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8187 ((lyp->lay_flags & NFSLAY_RW) != 0 ||
8188 ((lyp->lay_flags & NFSLAY_READ) != 0 &&
8189 nfsrv_pnfsatime != 0))) {
8190 if (clidcnt < NFSCLIDVECSIZE)
8191 clid[clidcnt].qval = lyp->lay_clientid.qval;
8192 clidcnt++;
8193 }
8194 }
8195 NFSUNLOCKLAYOUT(lhyp);
8196 if (clidcnt == 0) {
8197 /* None found, so return 0. */
8198 return (0);
8199 }
8200
8201 /* Get the nfslockfile for this fh. */
8202 NFSLOCKSTATE();
8203 hp = NFSLOCKHASH(&fh);
8204 LIST_FOREACH(lfp, hp, lf_hash) {
8205 tfhp = &lfp->lf_fh;
8206 if (NFSVNO_CMPFH(&fh, tfhp))
8207 break;
8208 }
8209 if (lfp == NULL) {
8210 /* None found, so return 0. */
8211 NFSUNLOCKSTATE();
8212 return (0);
8213 }
8214
8215 /* Now, look for a Write delegation for this clientid. */
8216 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
8217 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8218 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8219 break;
8220 }
8221 if (stp != NULL) {
8222 /* Found one, so return 1. */
8223 NFSUNLOCKSTATE();
8224 return (1);
8225 }
8226
8227 /* No Write delegation, so look for an Open with Write_access. */
8228 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
8229 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
8230 ("nfsrv_checkdsattr: Non-open in Open list\n"));
8231 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
8232 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8233 break;
8234 }
8235 NFSUNLOCKSTATE();
8236 if (stp != NULL)
8237 return (1);
8238 return (0);
8239 }
8240
8241 /*
8242 * Look for a matching clientid in the vector. Return 1 if one might match.
8243 */
8244 static int
nfsrv_fndclid(nfsquad_t * clidvec,nfsquad_t clid,int clidcnt)8245 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
8246 {
8247 int i;
8248
8249 /* If too many for the vector, return 1 since there might be a match. */
8250 if (clidcnt > NFSCLIDVECSIZE)
8251 return (1);
8252
8253 for (i = 0; i < clidcnt; i++)
8254 if (clidvec[i].qval == clid.qval)
8255 return (1);
8256 return (0);
8257 }
8258
8259 /*
8260 * Check the don't list for "vp" and see if issuing an rw layout is allowed.
8261 * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
8262 */
8263 static int
nfsrv_dontlayout(fhandle_t * fhp)8264 nfsrv_dontlayout(fhandle_t *fhp)
8265 {
8266 struct nfsdontlist *mrp;
8267 int ret;
8268
8269 if (nfsrv_dontlistlen == 0)
8270 return (0);
8271 ret = 0;
8272 NFSDDONTLISTLOCK();
8273 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8274 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
8275 (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
8276 ret = 1;
8277 break;
8278 }
8279 }
8280 NFSDDONTLISTUNLOCK();
8281 return (ret);
8282 }
8283
8284 #define PNFSDS_COPYSIZ 65536
8285 /*
8286 * Create a new file on a DS and copy the contents of an extant DS file to it.
8287 * This can be used for recovery of a DS file onto a recovered DS.
8288 * The steps are:
8289 * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
8290 * - Disable issuing of read/write layouts for the file via the nfsdontlist,
8291 * so that they will be disabled after the MDS file's vnode is unlocked.
8292 * - Set up the nfsrv_recalllist so that recall of read/write layouts can
8293 * be done.
8294 * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
8295 * writes, LayoutCommits and LayoutReturns for the file when completing the
8296 * LayoutReturn requested by the LayoutRecall callback.
8297 * - Issue a LayoutRecall callback for all read/write layouts and wait for
8298 * them to be returned. (If the LayoutRecall callback replies
8299 * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
8300 * - Exclusively lock the MDS file's vnode. This ensures that no proxied
8301 * writes are in progress or can occur during the DS file copy.
8302 * It also blocks Setattr operations.
8303 * - Create the file on the recovered mirror.
8304 * - Copy the file from the operational DS.
8305 * - Copy any ACL from the MDS file to the new DS file.
8306 * - Set the modify time of the new DS file to that of the MDS file.
8307 * - Update the extended attribute for the MDS file.
8308 * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
8309 * - The caller will unlock the MDS file's vnode allowing operations
8310 * to continue normally, since it is now on the mirror again.
8311 */
8312 int
nfsrv_copymr(vnode_t vp,vnode_t fvp,vnode_t dvp,struct nfsdevice * ds,struct pnfsdsfile * pf,struct pnfsdsfile * wpf,int mirrorcnt,struct ucred * cred,NFSPROC_T * p)8313 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
8314 struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
8315 struct ucred *cred, NFSPROC_T *p)
8316 {
8317 struct nfsdontlist *mrp, *nmrp;
8318 struct nfslayouthash *lhyp;
8319 struct nfslayout *lyp, *nlyp;
8320 struct nfslayouthead thl;
8321 struct mount *mp, *tvmp;
8322 struct acl *aclp;
8323 struct vattr va;
8324 struct timespec mtime;
8325 fhandle_t fh;
8326 vnode_t tvp;
8327 off_t rdpos, wrpos;
8328 ssize_t aresid;
8329 char *dat;
8330 int didprintf, ret, retacl, xfer;
8331
8332 ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
8333 ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
8334 /*
8335 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
8336 * so that no more RW layouts will get issued.
8337 */
8338 ret = nfsvno_getfh(vp, &fh, p);
8339 if (ret != 0) {
8340 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
8341 return (ret);
8342 }
8343 nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
8344 nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
8345 NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
8346 NFSDDONTLISTLOCK();
8347 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8348 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
8349 break;
8350 }
8351 if (mrp == NULL) {
8352 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
8353 mrp = nmrp;
8354 nmrp = NULL;
8355 nfsrv_dontlistlen++;
8356 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
8357 } else {
8358 NFSDDONTLISTUNLOCK();
8359 free(nmrp, M_NFSDSTATE);
8360 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
8361 return (ENXIO);
8362 }
8363 NFSDDONTLISTUNLOCK();
8364
8365 /*
8366 * Search for all RW layouts for this file. Move them to the
8367 * recall list, so they can be recalled and their return noted.
8368 */
8369 lhyp = NFSLAYOUTHASH(&fh);
8370 NFSDRECALLLOCK();
8371 NFSLOCKLAYOUT(lhyp);
8372 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
8373 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8374 (lyp->lay_flags & NFSLAY_RW) != 0) {
8375 TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
8376 TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
8377 lyp->lay_trycnt = 0;
8378 }
8379 }
8380 NFSUNLOCKLAYOUT(lhyp);
8381 NFSDRECALLUNLOCK();
8382
8383 ret = 0;
8384 mp = tvmp = NULL;
8385 didprintf = 0;
8386 TAILQ_INIT(&thl);
8387 /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
8388 NFSVOPUNLOCK(vp);
8389 /* Now, do a recall for all layouts not yet recalled. */
8390 tryagain:
8391 NFSDRECALLLOCK();
8392 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8393 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8394 (lyp->lay_flags & NFSLAY_RECALL) == 0) {
8395 lyp->lay_flags |= NFSLAY_RECALL;
8396 /*
8397 * The layout stateid.seqid needs to be incremented
8398 * before doing a LAYOUT_RECALL callback.
8399 */
8400 if (++lyp->lay_stateid.seqid == 0)
8401 lyp->lay_stateid.seqid = 1;
8402 NFSDRECALLUNLOCK();
8403 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
8404 &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
8405 NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
8406 goto tryagain;
8407 }
8408 }
8409
8410 /* Now wait for them to be returned. */
8411 tryagain2:
8412 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8413 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
8414 if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
8415 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
8416 lay_list);
8417 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
8418 NFSD_DEBUG(4,
8419 "nfsrv_copymr: layout returned\n");
8420 } else {
8421 lyp->lay_trycnt++;
8422 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
8423 PVFS | PCATCH, "nfsmrl", hz);
8424 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
8425 ret);
8426 if (ret == EINTR || ret == ERESTART)
8427 break;
8428 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
8429 /*
8430 * Give up after 60sec and return
8431 * ENXIO, failing the copymr.
8432 * This layout will remain on the
8433 * recalllist. It can only be cleared
8434 * by restarting the nfsd.
8435 * This seems the safe way to handle
8436 * it, since it cannot be safely copied
8437 * with an outstanding RW layout.
8438 */
8439 if (lyp->lay_trycnt >= 60) {
8440 ret = ENXIO;
8441 break;
8442 }
8443 if (didprintf == 0) {
8444 printf("nfsrv_copymr: layout "
8445 "not returned\n");
8446 didprintf = 1;
8447 }
8448 }
8449 }
8450 goto tryagain2;
8451 }
8452 }
8453 NFSDRECALLUNLOCK();
8454 /* We can now get rid of the layouts that have been returned. */
8455 TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
8456 nfsrv_freelayout(&thl, lyp);
8457
8458 /*
8459 * Do the vn_start_write() calls here, before the MDS vnode is
8460 * locked and the tvp is created (locked) in the NFS file system
8461 * that dvp is in.
8462 * For tvmp, this probably isn't necessary, since it will be an
8463 * NFS mount and they are not suspendable at this time.
8464 */
8465 if (ret == 0)
8466 ret = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
8467 if (ret == 0) {
8468 tvmp = dvp->v_mount;
8469 ret = vn_start_write(NULL, &tvmp, V_WAIT | V_PCATCH);
8470 }
8471
8472 /*
8473 * LK_EXCLUSIVE lock the MDS vnode, so that any
8474 * proxied writes through the MDS will be blocked until we have
8475 * completed the copy and update of the extended attributes.
8476 * This will also ensure that any attributes and ACL will not be
8477 * changed until the copy is complete.
8478 */
8479 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
8480 if (ret == 0 && VN_IS_DOOMED(vp)) {
8481 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
8482 ret = ESTALE;
8483 }
8484
8485 /* Create the data file on the recovered DS. */
8486 if (ret == 0)
8487 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
8488
8489 /* Copy the DS file, if created successfully. */
8490 if (ret == 0) {
8491 /*
8492 * Get any NFSv4 ACL on the MDS file, so that it can be set
8493 * on the new DS file.
8494 */
8495 aclp = acl_alloc(M_WAITOK | M_ZERO);
8496 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
8497 if (retacl != 0 && retacl != ENOATTR)
8498 NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
8499 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
8500 /* Malloc a block of 0s used to check for holes. */
8501 if (nfsrv_zeropnfsdat == NULL)
8502 nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
8503 M_WAITOK | M_ZERO);
8504 rdpos = wrpos = 0;
8505 ret = VOP_GETATTR(fvp, &va, cred);
8506 aresid = 0;
8507 while (ret == 0 && aresid == 0) {
8508 ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
8509 rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
8510 &aresid, p);
8511 xfer = PNFSDS_COPYSIZ - aresid;
8512 if (ret == 0 && xfer > 0) {
8513 rdpos += xfer;
8514 /*
8515 * Skip the write for holes, except for the
8516 * last block.
8517 */
8518 if (xfer < PNFSDS_COPYSIZ || rdpos ==
8519 va.va_size || NFSBCMP(dat,
8520 nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
8521 ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
8522 wrpos, UIO_SYSSPACE, IO_NODELOCKED,
8523 cred, NULL, NULL, p);
8524 if (ret == 0)
8525 wrpos += xfer;
8526 }
8527 }
8528
8529 /* If there is an ACL and the copy succeeded, set the ACL. */
8530 if (ret == 0 && retacl == 0) {
8531 ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
8532 /*
8533 * Don't consider these as errors, since VOP_GETACL()
8534 * can return an ACL when they are not actually
8535 * supported. For example, for UFS, VOP_GETACL()
8536 * will return a trivial ACL based on the uid/gid/mode
8537 * when there is no ACL on the file.
8538 * This case should be recognized as a trivial ACL
8539 * by UFS's VOP_SETACL() and succeed, but...
8540 */
8541 if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
8542 ret = 0;
8543 }
8544
8545 if (ret == 0)
8546 ret = VOP_FSYNC(tvp, MNT_WAIT, p);
8547
8548 /* Set the DS data file's modify time that of the MDS file. */
8549 if (ret == 0)
8550 ret = VOP_GETATTR(vp, &va, cred);
8551 if (ret == 0) {
8552 mtime = va.va_mtime;
8553 VATTR_NULL(&va);
8554 va.va_mtime = mtime;
8555 ret = VOP_SETATTR(tvp, &va, cred);
8556 }
8557
8558 vput(tvp);
8559 acl_free(aclp);
8560 free(dat, M_TEMP);
8561 }
8562 if (tvmp != NULL)
8563 vn_finished_write(tvmp);
8564
8565 /* Update the extended attributes for the newly created DS file. */
8566 if (ret == 0)
8567 ret = vn_extattr_set(vp, IO_NODELOCKED,
8568 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
8569 sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
8570 if (mp != NULL)
8571 vn_finished_write(mp);
8572
8573 /* Get rid of the dontlist entry, so that Layouts can be issued. */
8574 NFSDDONTLISTLOCK();
8575 LIST_REMOVE(mrp, nfsmr_list);
8576 NFSDDONTLISTUNLOCK();
8577 free(mrp, M_NFSDSTATE);
8578 return (ret);
8579 }
8580
8581 /*
8582 * Create a data storage file on the recovered DS.
8583 */
8584 static int
nfsrv_createdsfile(vnode_t vp,fhandle_t * fhp,struct pnfsdsfile * pf,vnode_t dvp,struct nfsdevice * ds,struct ucred * cred,NFSPROC_T * p,vnode_t * tvpp)8585 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
8586 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
8587 vnode_t *tvpp)
8588 {
8589 struct vattr va, nva;
8590 int error;
8591
8592 /* Make data file name based on FH. */
8593 error = VOP_GETATTR(vp, &va, cred);
8594 if (error == 0) {
8595 /* Set the attributes for "vp" to Setattr the DS vp. */
8596 VATTR_NULL(&nva);
8597 nva.va_uid = va.va_uid;
8598 nva.va_gid = va.va_gid;
8599 nva.va_mode = va.va_mode;
8600 nva.va_size = 0;
8601 VATTR_NULL(&va);
8602 va.va_type = VREG;
8603 va.va_mode = nva.va_mode;
8604 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
8605 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
8606 pf->dsf_filename, cred, p, tvpp);
8607 }
8608 return (error);
8609 }
8610
8611 /*
8612 * Look up the MDS file shared locked, and then get the extended attribute
8613 * to find the extant DS file to be copied to the new mirror.
8614 * If successful, *vpp is set to the MDS file's vp and *nvpp is
8615 * set to a DS data file for the MDS file, both exclusively locked.
8616 * The "buf" argument has the pnfsdsfile structure from the MDS file
8617 * in it and buflen is set to its length.
8618 */
8619 int
nfsrv_mdscopymr(char * mdspathp,char * dspathp,char * curdspathp,char * buf,int * buflenp,char * fname,NFSPROC_T * p,struct vnode ** vpp,struct vnode ** nvpp,struct pnfsdsfile ** pfp,struct nfsdevice ** dsp,struct nfsdevice ** fdsp)8620 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
8621 int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
8622 struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
8623 struct nfsdevice **fdsp)
8624 {
8625 struct nameidata nd;
8626 struct vnode *vp, *curvp;
8627 struct pnfsdsfile *pf;
8628 struct nfsmount *nmp, *curnmp;
8629 int dsdir, error, mirrorcnt, ippos;
8630
8631 vp = NULL;
8632 curvp = NULL;
8633 curnmp = NULL;
8634 *dsp = NULL;
8635 *fdsp = NULL;
8636 if (dspathp == NULL && curdspathp != NULL)
8637 return (EPERM);
8638
8639 /*
8640 * Look up the MDS file shared locked. The lock will be upgraded
8641 * to an exclusive lock after any rw layouts have been returned.
8642 */
8643 NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
8644 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
8645 mdspathp);
8646 error = namei(&nd);
8647 NFSD_DEBUG(4, "lookup=%d\n", error);
8648 if (error != 0)
8649 return (error);
8650 if (nd.ni_vp->v_type != VREG) {
8651 vput(nd.ni_vp);
8652 NFSD_DEBUG(4, "mdspath not reg\n");
8653 return (EISDIR);
8654 }
8655 vp = nd.ni_vp;
8656
8657 if (curdspathp != NULL) {
8658 /*
8659 * Look up the current DS path and find the nfsdev structure for
8660 * it.
8661 */
8662 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
8663 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8664 UIO_SYSSPACE, curdspathp);
8665 error = namei(&nd);
8666 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8667 if (error != 0) {
8668 vput(vp);
8669 return (error);
8670 }
8671 if (nd.ni_vp->v_type != VDIR) {
8672 vput(nd.ni_vp);
8673 vput(vp);
8674 NFSD_DEBUG(4, "curdspath not dir\n");
8675 return (ENOTDIR);
8676 }
8677 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8678 vput(nd.ni_vp);
8679 vput(vp);
8680 NFSD_DEBUG(4, "curdspath not an NFS mount\n");
8681 return (ENXIO);
8682 }
8683 curnmp = VFSTONFS(nd.ni_vp->v_mount);
8684
8685 /* Search the nfsdev list for a match. */
8686 NFSDDSLOCK();
8687 *fdsp = nfsv4_findmirror(curnmp);
8688 NFSDDSUNLOCK();
8689 if (*fdsp == NULL)
8690 curnmp = NULL;
8691 if (curnmp == NULL) {
8692 vput(nd.ni_vp);
8693 vput(vp);
8694 NFSD_DEBUG(4, "mdscopymr: no current ds\n");
8695 return (ENXIO);
8696 }
8697 curvp = nd.ni_vp;
8698 }
8699
8700 if (dspathp != NULL) {
8701 /* Look up the nfsdev path and find the nfsdev structure. */
8702 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
8703 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8704 UIO_SYSSPACE, dspathp);
8705 error = namei(&nd);
8706 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8707 if (error != 0) {
8708 vput(vp);
8709 if (curvp != NULL)
8710 vput(curvp);
8711 return (error);
8712 }
8713 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
8714 vput(nd.ni_vp);
8715 vput(vp);
8716 if (curvp != NULL)
8717 vput(curvp);
8718 NFSD_DEBUG(4, "dspath not dir\n");
8719 if (nd.ni_vp == curvp)
8720 return (EPERM);
8721 return (ENOTDIR);
8722 }
8723 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8724 vput(nd.ni_vp);
8725 vput(vp);
8726 if (curvp != NULL)
8727 vput(curvp);
8728 NFSD_DEBUG(4, "dspath not an NFS mount\n");
8729 return (ENXIO);
8730 }
8731 nmp = VFSTONFS(nd.ni_vp->v_mount);
8732
8733 /*
8734 * Search the nfsdevice list for a match. If curnmp == NULL,
8735 * this is a recovery and there must be a mirror.
8736 */
8737 NFSDDSLOCK();
8738 if (curnmp == NULL)
8739 *dsp = nfsrv_findmirroredds(nmp);
8740 else
8741 *dsp = nfsv4_findmirror(nmp);
8742 NFSDDSUNLOCK();
8743 if (*dsp == NULL) {
8744 vput(nd.ni_vp);
8745 vput(vp);
8746 if (curvp != NULL)
8747 vput(curvp);
8748 NFSD_DEBUG(4, "mdscopymr: no ds\n");
8749 return (ENXIO);
8750 }
8751 } else {
8752 nd.ni_vp = NULL;
8753 nmp = NULL;
8754 }
8755
8756 /*
8757 * Get a vp for an available DS data file using the extended
8758 * attribute on the MDS file.
8759 * If there is a valid entry for the new DS in the extended attribute
8760 * on the MDS file (as checked via the nmp argument),
8761 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
8762 */
8763 error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
8764 NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
8765 if (curvp != NULL)
8766 vput(curvp);
8767 if (nd.ni_vp == NULL) {
8768 if (error == 0 && nmp != NULL) {
8769 /* Search the nfsdev list for a match. */
8770 NFSDDSLOCK();
8771 *dsp = nfsrv_findmirroredds(nmp);
8772 NFSDDSUNLOCK();
8773 }
8774 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
8775 if (nvpp != NULL && *nvpp != NULL) {
8776 vput(*nvpp);
8777 *nvpp = NULL;
8778 }
8779 error = ENXIO;
8780 }
8781 } else
8782 vput(nd.ni_vp);
8783
8784 /*
8785 * When dspathp != NULL and curdspathp == NULL, this is a recovery
8786 * and is only allowed if there is a 0.0.0.0 IP address entry.
8787 * When curdspathp != NULL, the ippos will be set to that entry.
8788 */
8789 if (error == 0 && dspathp != NULL && ippos == -1) {
8790 if (nvpp != NULL && *nvpp != NULL) {
8791 vput(*nvpp);
8792 *nvpp = NULL;
8793 }
8794 error = ENXIO;
8795 }
8796 if (error == 0) {
8797 *vpp = vp;
8798
8799 pf = (struct pnfsdsfile *)buf;
8800 if (ippos == -1) {
8801 /* If no zeroip pnfsdsfile, add one. */
8802 ippos = *buflenp / sizeof(*pf);
8803 *buflenp += sizeof(*pf);
8804 pf += ippos;
8805 pf->dsf_dir = dsdir;
8806 strlcpy(pf->dsf_filename, fname,
8807 sizeof(pf->dsf_filename));
8808 } else
8809 pf += ippos;
8810 *pfp = pf;
8811 } else
8812 vput(vp);
8813 return (error);
8814 }
8815
8816 /*
8817 * Search for a matching pnfsd mirror device structure, base on the nmp arg.
8818 * Return one if found, NULL otherwise.
8819 */
8820 static struct nfsdevice *
nfsrv_findmirroredds(struct nfsmount * nmp)8821 nfsrv_findmirroredds(struct nfsmount *nmp)
8822 {
8823 struct nfsdevice *ds, *fndds;
8824 int fndmirror;
8825
8826 mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
8827 /*
8828 * Search the DS server list for a match with nmp.
8829 * Remove the DS entry if found and there is a mirror.
8830 */
8831 fndds = NULL;
8832 fndmirror = 0;
8833 if (nfsrv_devidcnt == 0)
8834 return (fndds);
8835 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8836 if (ds->nfsdev_nmp == nmp) {
8837 NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
8838 fndds = ds;
8839 break;
8840 }
8841 }
8842 if (fndds == NULL)
8843 return (fndds);
8844 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
8845 fndmirror = 1;
8846 else if (fndds->nfsdev_mdsisset != 0) {
8847 /* For the fsid is set case, search for a mirror. */
8848 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8849 if (ds != fndds && ds->nfsdev_nmp != NULL &&
8850 ds->nfsdev_mdsisset != 0 &&
8851 fsidcmp(&ds->nfsdev_mdsfsid,
8852 &fndds->nfsdev_mdsfsid) == 0) {
8853 fndmirror = 1;
8854 break;
8855 }
8856 }
8857 }
8858 if (fndmirror == 0) {
8859 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
8860 return (NULL);
8861 }
8862 return (fndds);
8863 }
8864
8865 /*
8866 * Mark the appropriate devid and all associated layout as "out of space".
8867 */
8868 void
nfsrv_marknospc(char * devid,bool setit)8869 nfsrv_marknospc(char *devid, bool setit)
8870 {
8871 struct nfsdevice *ds;
8872 struct nfslayout *lyp;
8873 struct nfslayouthash *lhyp;
8874 int i;
8875
8876 NFSDDSLOCK();
8877 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8878 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
8879 NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
8880 ds->nfsdev_nospc = setit;
8881 }
8882 }
8883 NFSDDSUNLOCK();
8884
8885 for (i = 0; i < nfsrv_layouthashsize; i++) {
8886 lhyp = &nfslayouthash[i];
8887 NFSLOCKLAYOUT(lhyp);
8888 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8889 if (NFSBCMP(lyp->lay_deviceid, devid,
8890 NFSX_V4DEVICEID) == 0) {
8891 NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
8892 setit);
8893 if (setit)
8894 lyp->lay_flags |= NFSLAY_NOSPC;
8895 else
8896 lyp->lay_flags &= ~NFSLAY_NOSPC;
8897 }
8898 }
8899 NFSUNLOCKLAYOUT(lhyp);
8900 }
8901 }
8902
8903 /*
8904 * Check to see if SP4_MACH_CRED is in use and, if it is, check that the
8905 * correct machine credential is being used.
8906 */
8907 static int
nfsrv_checkmachcred(int op,struct nfsrv_descript * nd,struct nfsclient * clp)8908 nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
8909 {
8910
8911 if ((clp->lc_flags & LCL_MACHCRED) == 0 ||
8912 !NFSISSET_OPBIT(&clp->lc_mustops, op))
8913 return (0);
8914 KASSERT((nd->nd_flag & ND_NFSV41) != 0,
8915 ("nfsrv_checkmachcred: MachCred for NFSv4.0"));
8916 if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
8917 nd->nd_princlen == clp->lc_namelen &&
8918 !NFSBCMP(nd->nd_principal, clp->lc_name, nd->nd_princlen))
8919 return (0);
8920 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
8921 }
8922
8923 /*
8924 * Issue a delegation and, optionally set rflagsp for why not.
8925 */
8926 static void
nfsrv_issuedelegation(struct vnode * vp,struct nfsclient * clp,struct nfsrv_descript * nd,int delegate,int writedeleg,int readonly,u_quad_t filerev,uint64_t rdonly,struct nfsstate ** new_delegp,struct nfsstate * new_stp,struct nfslockfile * lfp,uint32_t * rflagsp,nfsv4stateid_t * delegstateidp)8927 nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
8928 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
8929 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
8930 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
8931 nfsv4stateid_t *delegstateidp)
8932 {
8933 struct nfsstate *up_deleg, *new_deleg;
8934
8935 new_deleg = *new_delegp;
8936 up_deleg = LIST_FIRST(&lfp->lf_deleg);
8937 if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
8938 *rflagsp |= NFSV4OPEN_WDNOTWANTED;
8939 else if (nfsrv_issuedelegs == 0)
8940 *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
8941 else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
8942 *rflagsp |= NFSV4OPEN_WDRESOURCE;
8943 else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
8944 (writedeleg == 0 && (readonly == 0 ||
8945 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
8946 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
8947 LCL_CALLBACKSON) {
8948 /* Is this a downgrade attempt? */
8949 if (up_deleg != NULL && up_deleg->ls_clp == clp &&
8950 (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8951 (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
8952 *rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
8953 else
8954 *rflagsp |= NFSV4OPEN_WDCONTENTION;
8955 } else if (up_deleg != NULL &&
8956 (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
8957 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
8958 /* This is an atomic upgrade. */
8959 up_deleg->ls_stateid.seqid++;
8960 delegstateidp->seqid = up_deleg->ls_stateid.seqid;
8961 delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
8962 delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
8963 delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
8964 up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8965 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8966 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8967 nfsrv_writedelegcnt++;
8968 } else {
8969 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
8970 new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
8971 = clp->lc_clientid.lval[0];
8972 new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
8973 = clp->lc_clientid.lval[1];
8974 new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
8975 = nfsrv_nextstateindex(clp);
8976 if (writedeleg && !rdonly &&
8977 (nfsrv_writedelegifpos || !readonly) &&
8978 (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
8979 NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
8980 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8981 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8982 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8983 nfsrv_writedelegcnt++;
8984 } else {
8985 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
8986 NFSLCK_READACCESS);
8987 *rflagsp |= NFSV4OPEN_READDELEGATE;
8988 }
8989 new_deleg->ls_uid = new_stp->ls_uid;
8990 new_deleg->ls_lfp = lfp;
8991 new_deleg->ls_clp = clp;
8992 new_deleg->ls_filerev = filerev;
8993 new_deleg->ls_compref = nd->nd_compref;
8994 new_deleg->ls_lastrecall = 0;
8995 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
8996 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
8997 new_deleg, ls_hash);
8998 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
8999 *new_delegp = NULL;
9000 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
9001 nfsrv_openpluslock++;
9002 nfsrv_delegatecnt++;
9003 }
9004 }
9005