1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Rick Macklem, University of Guelph
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include <sys/extattr.h>
34 #include <fs/nfs/nfsport.h>
35
36 int nfsrv_issuedelegs = 0;
37 int nfsrv_dolocallocks = 0;
38 struct nfsv4lock nfsv4rootfs_lock;
39 time_t nfsdev_time = 0;
40 int nfsrv_layouthashsize;
41 volatile int nfsrv_layoutcnt = 0;
42
43 VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst);
44
45 VNET_DECLARE(int, nfsrv_numnfsd);
46 VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
47
48 extern uint32_t nfs_srvmaxio;
49 extern int nfsrv_lease;
50 extern struct timeval nfsboottime;
51 extern u_int32_t newnfs_true, newnfs_false;
52 extern struct mtx nfsrv_dslock_mtx;
53 extern struct mtx nfsrv_recalllock_mtx;
54 extern struct mtx nfsrv_dontlistlock_mtx;
55 extern int nfsd_debuglevel;
56 extern u_int nfsrv_dsdirsize;
57 extern struct nfsdevicehead nfsrv_devidhead;
58 extern int nfsrv_doflexfile;
59 extern int nfsrv_maxpnfsmirror;
60 NFSV4ROOTLOCKMUTEX;
61 NFSSTATESPINLOCK;
62 extern struct nfsdontlisthead nfsrv_dontlisthead;
63 extern volatile int nfsrv_devidcnt;
64 extern struct nfslayouthead nfsrv_recalllisthead;
65 extern char *nfsrv_zeropnfsdat;
66 extern uint64_t nfsrv_stripesiz;
67
68 SYSCTL_DECL(_vfs_nfsd);
69 int nfsrv_statehashsize = NFSSTATEHASHSIZE;
70 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
71 &nfsrv_statehashsize, 0,
72 "Size of state hash table set via loader.conf");
73
74 int nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
75 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
76 &nfsrv_clienthashsize, 0,
77 "Size of client hash table set via loader.conf");
78
79 int nfsrv_lockhashsize = NFSLOCKHASHSIZE;
80 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
81 &nfsrv_lockhashsize, 0,
82 "Size of file handle hash table set via loader.conf");
83
84 int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
85 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
86 &nfsrv_sessionhashsize, 0,
87 "Size of session hash table set via loader.conf");
88
89 int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
90 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
91 &nfsrv_layouthighwater, 0,
92 "High water mark for number of layouts set via loader.conf");
93
94 static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
95 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
96 &nfsrv_v4statelimit, 0,
97 "High water limit for NFSv4 opens+locks+delegations");
98
99 static int nfsrv_writedelegifpos = 0;
100 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
101 &nfsrv_writedelegifpos, 0,
102 "Issue a write delegation for read opens if possible");
103
104 static int nfsrv_allowreadforwriteopen = 1;
105 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
106 &nfsrv_allowreadforwriteopen, 0,
107 "Allow Reads to be done with Write Access StateIDs");
108
109 int nfsrv_pnfsatime = 0;
110 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
111 &nfsrv_pnfsatime, 0,
112 "For pNFS service, do Getattr ops to keep atime up-to-date");
113
114 int nfsrv_flexlinuxhack = 0;
115 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
116 &nfsrv_flexlinuxhack, 0,
117 "For Linux clients, hack around Flex File Layout bug");
118
119 VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false;
120 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace,
121 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfsd_disable_grace),
122 0, "Disable grace for testing");
123
124 /*
125 * Hash lists for nfs V4.
126 */
127 VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash);
128 VNET_DEFINE(struct nfslockhashhead *, nfslockhash);
129 VNET_DEFINE(struct nfssessionhash *, nfssessionhash);
130
131 struct nfslayouthash *nfslayouthash;
132 volatile int nfsrv_dontlistlen = 0;
133
134 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
135 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
136 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
137 static int nfsrv_nogsscallback = 0;
138 static volatile int nfsrv_writedelegcnt = 0;
139 static int nfsrv_faildscnt;
140
141 VNET_DEFINE_STATIC(time_t, nfsrvboottime);
142
143 /* local functions */
144 static void nfsrv_dumpaclient(struct nfsclient *clp,
145 struct nfsd_dumpclients *dumpp);
146 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
147 NFSPROC_T *p);
148 static void nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
149 NFSPROC_T *p);
150 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
151 NFSPROC_T *p);
152 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
153 int cansleep, NFSPROC_T *p);
154 static void nfsrv_freenfslock(struct nfslock *lop);
155 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
156 static void nfsrv_freedeleg(struct nfsstate *);
157 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
158 u_int32_t flags, struct nfsstate **stpp);
159 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
160 struct nfsstate **stpp);
161 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
162 struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
163 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
164 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
165 static void nfsrv_insertlock(struct nfslock *new_lop,
166 struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
167 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
168 struct nfslock **other_lopp, struct nfslockfile *lfp);
169 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
170 nfsv4stateid_t *stateidp, int specialid);
171 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
172 u_int32_t flags);
173 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
174 nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
175 struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
176 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
177 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
178 int *slotposp);
179 static u_int32_t nfsrv_nextclientindex(void);
180 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
181 static void nfsrv_markstable(struct nfsclient *clp);
182 static void nfsrv_markreclaim(struct nfsclient *clp);
183 static int nfsrv_checkstable(struct nfsclient *clp);
184 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
185 vnode *vp, NFSPROC_T *p);
186 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
187 NFSPROC_T *p, vnode_t vp);
188 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
189 struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
190 static int nfsrv_notsamecredname(int op, struct nfsrv_descript *nd,
191 struct nfsclient *clp);
192 static time_t nfsrv_leaseexpiry(void);
193 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
194 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
195 struct nfsstate *stp, struct nfsrvcache *op);
196 static int nfsrv_nootherstate(struct nfsstate *stp);
197 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
198 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
199 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
200 uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
201 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
202 int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
203 NFSPROC_T *p);
204 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
205 NFSPROC_T *p);
206 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
207 uint64_t first, uint64_t end);
208 static void nfsrv_locklf(struct nfslockfile *lfp);
209 static void nfsrv_unlocklf(struct nfslockfile *lfp);
210 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
211 static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
212 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp);
213 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
214 int dont_replycache, struct nfsdsession **sepp, int *slotposp);
215 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
216 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
217 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
218 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
219 static void nfsrv_freelayoutlist(nfsquad_t clientid);
220 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
221 int iomode);
222 static void nfsrv_freealllayouts(void);
223 static void nfsrv_freedevid(struct nfsdevice *ds);
224 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
225 struct nfsdevice **dsp);
226 static void nfsrv_deleteds(struct nfsdevice *fndds);
227 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
228 static void nfsrv_freealldevids(void);
229 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
230 int maxcnt, NFSPROC_T *p);
231 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
232 fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
233 NFSPROC_T *p);
234 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
235 NFSPROC_T *, struct nfslayout **lypp);
236 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
237 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
238 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
239 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
240 int mirrorcnt, uint64_t stripesiz, int stripecnt, fhandle_t *fhp,
241 fhandle_t *dsfhp, char *devid, fsid_t fs);
242 static int nfsrv_dontlayout(fhandle_t *fhp);
243 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
244 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
245 vnode_t *tvpp);
246 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
247 static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
248 struct nfsclient *clp);
249 static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
250 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
251 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
252 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
253 nfsv4stateid_t *delegstateidp);
254 static void nfsrv_clientlock(bool mlocked);
255 static void nfsrv_clientunlock(bool mlocked);
256
257 /*
258 * Lock the client structure, either with the mutex or the exclusive nfsd lock.
259 */
260 static void
nfsrv_clientlock(bool mlocked)261 nfsrv_clientlock(bool mlocked)
262 {
263 int igotlock;
264
265 if (mlocked) {
266 NFSLOCKSTATE();
267 } else {
268 NFSLOCKV4ROOTMUTEX();
269 nfsv4_relref(&nfsv4rootfs_lock);
270 do {
271 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
272 NFSV4ROOTLOCKMUTEXPTR, NULL);
273 } while (!igotlock);
274 NFSUNLOCKV4ROOTMUTEX();
275 }
276 }
277
278 /*
279 * Unlock the client structure.
280 */
281 static void
nfsrv_clientunlock(bool mlocked)282 nfsrv_clientunlock(bool mlocked)
283 {
284
285 if (mlocked) {
286 NFSUNLOCKSTATE();
287 } else {
288 NFSLOCKV4ROOTMUTEX();
289 nfsv4_unlock(&nfsv4rootfs_lock, 1);
290 NFSUNLOCKV4ROOTMUTEX();
291 }
292 }
293
294 /*
295 * Scan the client list for a match and either return the current one,
296 * create a new entry or return an error.
297 * If returning a non-error, the clp structure must either be linked into
298 * the client list or free'd.
299 */
300 int
nfsrv_setclient(struct nfsrv_descript * nd,struct nfsclient ** new_clpp,nfsquad_t * clientidp,nfsquad_t * confirmp,NFSPROC_T * p)301 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
302 nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
303 {
304 struct nfsclient *clp = NULL, *new_clp = *new_clpp;
305 int i, error = 0, ret;
306 struct nfsstate *stp, *tstp;
307 #ifdef INET
308 struct sockaddr_in *sin, *rin;
309 #endif
310 #ifdef INET6
311 struct sockaddr_in6 *sin6, *rin6;
312 #endif
313 struct nfsdsession *sep, *nsep;
314 SVCXPRT *old_xprt;
315 struct nfssessionhead old_sess;
316 int zapit = 0, gotit, hasstate = 0;
317 bool mlocked;
318 static u_int64_t confirm_index = 0;
319
320 /*
321 * Check for state resource limit exceeded.
322 */
323 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
324 error = NFSERR_RESOURCE;
325 goto out;
326 }
327
328 if (nfsrv_issuedelegs == 0 ||
329 ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
330 /*
331 * Don't do callbacks when delegations are disabled or
332 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
333 * If establishing a callback connection is attempted
334 * when a firewall is blocking the callback path, the
335 * server may wait too long for the connect attempt to
336 * succeed during the Open. Some clients, such as Linux,
337 * may timeout and give up on the Open before the server
338 * replies. Also, since AUTH_GSS callbacks are not
339 * yet interoperability tested, they might cause the
340 * server to crap out, if they get past the Init call to
341 * the client.
342 */
343 new_clp->lc_program = 0;
344
345 mlocked = true;
346 if (nfsrv_dolocallocks != 0)
347 mlocked = false;
348 /* Lock out other nfsd threads */
349 nfsrv_clientlock(mlocked);
350
351 /*
352 * Search for a match in the client list.
353 */
354 gotit = i = 0;
355 while (i < nfsrv_clienthashsize && !gotit) {
356 LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) {
357 if (new_clp->lc_idlen == clp->lc_idlen &&
358 !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
359 gotit = 1;
360 break;
361 }
362 }
363 if (gotit == 0)
364 i++;
365 }
366 old_xprt = NULL;
367 if (!gotit ||
368 (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
369 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
370 /*
371 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
372 * client is trying to update a confirmed clientid.
373 */
374 nfsrv_clientunlock(mlocked);
375 confirmp->lval[1] = 0;
376 error = NFSERR_NOENT;
377 goto out;
378 }
379 /*
380 * Get rid of the old one.
381 */
382 if (i != nfsrv_clienthashsize) {
383 LIST_REMOVE(clp, lc_hash);
384 if (mlocked)
385 nfsrv_cleanclient(clp, p, true, &old_xprt);
386 else
387 nfsrv_cleanclient(clp, p, false, NULL);
388 nfsrv_freedeleglist(&clp->lc_deleg);
389 nfsrv_freedeleglist(&clp->lc_olddeleg);
390 zapit = 1;
391 }
392 /*
393 * Add it after assigning a client id to it.
394 */
395 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
396 if ((nd->nd_flag & ND_NFSV41) != 0) {
397 confirmp->lval[0] = ++confirm_index;
398 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
399 } else
400 confirmp->qval = new_clp->lc_confirm.qval =
401 ++confirm_index;
402 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
403 VNET(nfsrvboottime);
404 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
405 nfsrv_nextclientindex();
406 new_clp->lc_stateindex = 0;
407 new_clp->lc_statemaxindex = 0;
408 new_clp->lc_prevsess = 0;
409 new_clp->lc_cbref = 0;
410 new_clp->lc_expiry = nfsrv_leaseexpiry();
411 LIST_INIT(&new_clp->lc_open);
412 LIST_INIT(&new_clp->lc_deleg);
413 LIST_INIT(&new_clp->lc_olddeleg);
414 LIST_INIT(&new_clp->lc_session);
415 for (i = 0; i < nfsrv_statehashsize; i++)
416 LIST_INIT(&new_clp->lc_stateid[i]);
417 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
418 lc_hash);
419 VNET(nfsstatsv1_p)->srvclients++;
420 nfsrv_openpluslock++;
421 nfsrv_clients++;
422 nfsrv_clientunlock(mlocked);
423 if (zapit != 0) {
424 if (old_xprt != NULL)
425 SVC_RELEASE(old_xprt);
426 nfsrv_zapclient(clp, p);
427 }
428 *new_clpp = NULL;
429 goto out;
430 }
431
432 /*
433 * Now, handle the cases where the id is already issued.
434 */
435 if (nfsrv_notsamecredname(NFSV4OP_EXCHANGEID, nd, clp)) {
436 /*
437 * Check to see if there is expired state that should go away.
438 */
439 if (clp->lc_expiry < NFSD_MONOSEC &&
440 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
441 if (mlocked)
442 nfsrv_cleanclient(clp, p, true, &old_xprt);
443 else
444 nfsrv_cleanclient(clp, p, false, NULL);
445 nfsrv_freedeleglist(&clp->lc_deleg);
446 }
447
448 /*
449 * If there is outstanding state, then reply NFSERR_CLIDINUSE per
450 * RFC3530 Sec. 8.1.2 last para.
451 */
452 if (!LIST_EMPTY(&clp->lc_deleg)) {
453 hasstate = 1;
454 } else if (LIST_EMPTY(&clp->lc_open)) {
455 hasstate = 0;
456 } else {
457 hasstate = 0;
458 /* Look for an Open on the OpenOwner */
459 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
460 if (!LIST_EMPTY(&stp->ls_open)) {
461 hasstate = 1;
462 break;
463 }
464 }
465 }
466 if (hasstate) {
467 /*
468 * If the uid doesn't match, return NFSERR_CLIDINUSE after
469 * filling out the correct ipaddr and portnum.
470 */
471 switch (clp->lc_req.nr_nam->sa_family) {
472 #ifdef INET
473 case AF_INET:
474 sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
475 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
476 sin->sin_addr.s_addr = rin->sin_addr.s_addr;
477 sin->sin_port = rin->sin_port;
478 break;
479 #endif
480 #ifdef INET6
481 case AF_INET6:
482 sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
483 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
484 sin6->sin6_addr = rin6->sin6_addr;
485 sin6->sin6_port = rin6->sin6_port;
486 break;
487 #endif
488 }
489 nfsrv_clientunlock(mlocked);
490 if (old_xprt != NULL)
491 SVC_RELEASE(old_xprt);
492 error = NFSERR_CLIDINUSE;
493 goto out;
494 }
495 }
496
497 if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
498 /*
499 * If the verifier has changed, the client has rebooted
500 * and a new client id is issued. The old state info
501 * can be thrown away once the SetClientID_Confirm or
502 * Create_Session that confirms the clientid occurs.
503 */
504 LIST_REMOVE(clp, lc_hash);
505
506 LIST_NEWHEAD(&old_sess, &clp->lc_session, sess_list);
507
508 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
509 if ((nd->nd_flag & ND_NFSV41) != 0) {
510 confirmp->lval[0] = ++confirm_index;
511 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
512 } else
513 confirmp->qval = new_clp->lc_confirm.qval =
514 ++confirm_index;
515 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
516 VNET(nfsrvboottime);
517 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
518 nfsrv_nextclientindex();
519 new_clp->lc_stateindex = 0;
520 new_clp->lc_statemaxindex = 0;
521 new_clp->lc_prevsess = 0;
522 new_clp->lc_cbref = 0;
523 new_clp->lc_expiry = nfsrv_leaseexpiry();
524
525 /*
526 * Save the state until confirmed.
527 */
528 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
529 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
530 tstp->ls_clp = new_clp;
531 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
532 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
533 tstp->ls_clp = new_clp;
534 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
535 ls_list);
536 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
537 tstp->ls_clp = new_clp;
538 for (i = 0; i < nfsrv_statehashsize; i++) {
539 LIST_NEWHEAD(&new_clp->lc_stateid[i],
540 &clp->lc_stateid[i], ls_hash);
541 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
542 tstp->ls_clp = new_clp;
543 }
544 LIST_INIT(&new_clp->lc_session);
545 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
546 lc_hash);
547 VNET(nfsstatsv1_p)->srvclients++;
548 nfsrv_openpluslock++;
549 nfsrv_clients++;
550 if (!mlocked) {
551 nfsrv_clientunlock(mlocked);
552 NFSLOCKSTATE();
553 }
554
555 /*
556 * Must wait until any outstanding callback on the old clp
557 * completes.
558 */
559 while (clp->lc_cbref) {
560 clp->lc_flags |= LCL_WAKEUPWANTED;
561 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
562 "nfsd clp", 10 * hz);
563 }
564 NFSUNLOCKSTATE();
565 if (old_xprt != NULL)
566 SVC_RELEASE(old_xprt);
567 /* Get rid of all sessions on this clientid. */
568 LIST_FOREACH_SAFE(sep, &old_sess, sess_list, nsep) {
569 ret = nfsrv_freesession(NULL, sep, NULL, false, NULL);
570 if (ret != 0)
571 printf("nfsrv_setclient: verifier changed free"
572 " session failed=%d\n", ret);
573 }
574
575 nfsrv_zapclient(clp, p);
576 *new_clpp = NULL;
577 goto out;
578 }
579
580 /* For NFSv4.1, mark that we found a confirmed clientid. */
581 if ((nd->nd_flag & ND_NFSV41) != 0) {
582 clientidp->lval[0] = clp->lc_clientid.lval[0];
583 clientidp->lval[1] = clp->lc_clientid.lval[1];
584 confirmp->lval[0] = 0; /* Ignored by client */
585 confirmp->lval[1] = 1;
586 } else {
587 /*
588 * id and verifier match, so update the net address info
589 * and get rid of any existing callback authentication
590 * handle, so a new one will be acquired.
591 */
592 LIST_REMOVE(clp, lc_hash);
593 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
594 new_clp->lc_expiry = nfsrv_leaseexpiry();
595 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
596 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
597 clp->lc_clientid.lval[0];
598 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
599 clp->lc_clientid.lval[1];
600 new_clp->lc_delegtime = clp->lc_delegtime;
601 new_clp->lc_stateindex = clp->lc_stateindex;
602 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
603 new_clp->lc_cbref = 0;
604 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
605 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
606 tstp->ls_clp = new_clp;
607 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
608 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
609 tstp->ls_clp = new_clp;
610 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
611 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
612 tstp->ls_clp = new_clp;
613 for (i = 0; i < nfsrv_statehashsize; i++) {
614 LIST_NEWHEAD(&new_clp->lc_stateid[i],
615 &clp->lc_stateid[i], ls_hash);
616 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
617 tstp->ls_clp = new_clp;
618 }
619 LIST_INIT(&new_clp->lc_session);
620 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
621 lc_hash);
622 VNET(nfsstatsv1_p)->srvclients++;
623 nfsrv_openpluslock++;
624 nfsrv_clients++;
625 }
626 if (!mlocked)
627 nfsrv_clientunlock(mlocked);
628
629 if ((nd->nd_flag & ND_NFSV41) == 0) {
630 /*
631 * Must wait until any outstanding callback on the old clp
632 * completes.
633 */
634 if (!mlocked)
635 NFSLOCKSTATE();
636 while (clp->lc_cbref) {
637 clp->lc_flags |= LCL_WAKEUPWANTED;
638 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
639 "nfsdclp", 10 * hz);
640 }
641 NFSUNLOCKSTATE();
642 if (old_xprt != NULL)
643 SVC_RELEASE(old_xprt);
644 nfsrv_zapclient(clp, p);
645 *new_clpp = NULL;
646 } else {
647 if (mlocked)
648 NFSUNLOCKSTATE();
649 if (old_xprt != NULL)
650 SVC_RELEASE(old_xprt);
651 }
652
653 out:
654 NFSEXITCODE2(error, nd);
655 return (error);
656 }
657
658 /*
659 * Check to see if the client id exists and optionally confirm it.
660 */
661 int
nfsrv_getclient(nfsquad_t clientid,int opflags,struct nfsclient ** clpp,struct nfsdsession * nsep,nfsquad_t confirm,uint32_t cbprogram,struct nfsrv_descript * nd,NFSPROC_T * p)662 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
663 struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
664 struct nfsrv_descript *nd, NFSPROC_T *p)
665 {
666 struct nfsclient *clp;
667 struct nfsstate *stp;
668 int i;
669 struct nfsclienthashhead *hp;
670 int error = 0, doneok, igotlock;
671 struct nfssessionhash *shp;
672 struct nfsdsession *sep;
673 uint64_t sessid[2];
674 CLIENT *client;
675 SVCXPRT *old_xprt;
676 bool mlocked, sess_replay;
677 static uint64_t next_sess = 0;
678
679 if (clpp)
680 *clpp = NULL;
681 if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
682 opflags != CLOPS_RENEW) && VNET(nfsrvboottime) !=
683 clientid.lval[0]) {
684 error = NFSERR_STALECLIENTID;
685 goto out;
686 }
687
688 /*
689 * If called with opflags == CLOPS_RENEW, the State Lock is
690 * already held. Otherwise, we need to get either that or,
691 * for the case of Confirm, lock out the nfsd threads.
692 */
693 client = NULL;
694 old_xprt = NULL;
695 mlocked = true;
696 if (nfsrv_dolocallocks != 0)
697 mlocked = false;
698 if (opflags & CLOPS_CONFIRM) {
699 if (nsep != NULL &&
700 (nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
701 client = (struct __rpc_client *)
702 clnt_bck_create(nd->nd_xprt->xp_socket,
703 cbprogram, NFSV4_CBVERS);
704 if (mlocked) {
705 nfsrv_clientlock(mlocked);
706 } else {
707 NFSLOCKV4ROOTMUTEX();
708 nfsv4_relref(&nfsv4rootfs_lock);
709 do {
710 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1,
711 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
712 } while (!igotlock);
713 }
714 /*
715 * Create a new sessionid here, since we need to do it where
716 * there is a mutex held to serialize update of next_sess.
717 */
718 if ((nd->nd_flag & ND_NFSV41) != 0) {
719 sessid[0] = ++next_sess;
720 sessid[1] = clientid.qval;
721 }
722 if (!mlocked)
723 NFSUNLOCKV4ROOTMUTEX();
724 } else if (opflags != CLOPS_RENEW) {
725 NFSLOCKSTATE();
726 }
727
728 /* For NFSv4.1, the clp is acquired from the associated session. */
729 if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
730 opflags == CLOPS_RENEW) {
731 clp = NULL;
732 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
733 shp = NFSSESSIONHASH(nd->nd_sessionid);
734 NFSLOCKSESSION(shp);
735 sep = nfsrv_findsession(nd->nd_sessionid);
736 if (sep != NULL)
737 clp = sep->sess_clp;
738 NFSUNLOCKSESSION(shp);
739 }
740 } else {
741 hp = NFSCLIENTHASH(clientid);
742 LIST_FOREACH(clp, hp, lc_hash) {
743 if (clp->lc_clientid.lval[1] == clientid.lval[1])
744 break;
745 }
746 }
747 if (clp == NULL) {
748 if (opflags & CLOPS_CONFIRM)
749 error = NFSERR_STALECLIENTID;
750 else
751 error = NFSERR_EXPIRED;
752 } else if (clp->lc_flags & LCL_ADMINREVOKED) {
753 /*
754 * If marked admin revoked, just return the error.
755 */
756 error = NFSERR_ADMINREVOKED;
757 }
758 if (error) {
759 if (opflags & CLOPS_CONFIRM) {
760 nfsrv_clientunlock(mlocked);
761 if (client != NULL)
762 CLNT_RELEASE(client);
763 } else if (opflags != CLOPS_RENEW) {
764 NFSUNLOCKSTATE();
765 }
766 goto out;
767 }
768
769 /*
770 * Perform any operations specified by the opflags.
771 */
772 if (opflags & CLOPS_CONFIRM) {
773 sess_replay = false;
774 if ((nd->nd_flag & ND_NFSV41) != 0) {
775 /*
776 * For the case where lc_confirm.lval[0] == confirm.lval[0],
777 * use the new session, but with the previous sessionid.
778 * This is not exactly what the RFC describes, but should
779 * result in the same reply as the previous CreateSession.
780 */
781 if (clp->lc_confirm.lval[0] + 1 == confirm.lval[0]) {
782 clp->lc_confirm.lval[0] = confirm.lval[0];
783 clp->lc_prevsess = sessid[0];
784 } else if (clp->lc_confirm.lval[0] == confirm.lval[0]) {
785 if (clp->lc_prevsess == 0)
786 error = NFSERR_SEQMISORDERED;
787 else
788 sessid[0] = clp->lc_prevsess;
789 sess_replay = true;
790 } else
791 error = NFSERR_SEQMISORDERED;
792 } else if ((nd->nd_flag & ND_NFSV41) == 0 &&
793 clp->lc_confirm.qval != confirm.qval)
794 error = NFSERR_STALECLIENTID;
795 if (error == 0 && nfsrv_notsamecredname(NFSV4OP_CREATESESSION,
796 nd, clp))
797 error = NFSERR_CLIDINUSE;
798
799 if (!error) {
800 if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
801 LCL_NEEDSCONFIRM) {
802 /*
803 * Hang onto the delegations (as old delegations)
804 * for an Open with CLAIM_DELEGATE_PREV unless in
805 * grace, but get rid of the rest of the state.
806 */
807 if (mlocked)
808 nfsrv_cleanclient(clp, p, true, &old_xprt);
809 else
810 nfsrv_cleanclient(clp, p, false, NULL);
811 nfsrv_freedeleglist(&clp->lc_olddeleg);
812 if (nfsrv_checkgrace(nd, clp, 0)) {
813 /* In grace, so just delete delegations */
814 nfsrv_freedeleglist(&clp->lc_deleg);
815 } else {
816 LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
817 stp->ls_flags |= NFSLCK_OLDDELEG;
818 clp->lc_delegtime = NFSD_MONOSEC +
819 nfsrv_lease + NFSRV_LEASEDELTA;
820 LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
821 ls_list);
822 }
823 if ((nd->nd_flag & ND_NFSV41) != 0)
824 clp->lc_program = cbprogram;
825 }
826 clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
827 if (clp->lc_program)
828 clp->lc_flags |= LCL_NEEDSCBNULL;
829 /* For NFSv4.1, link the session onto the client. */
830 if (nsep != NULL) {
831 /* Hold a reference on the xprt for a backchannel. */
832 if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
833 != 0 && !sess_replay) {
834 if (clp->lc_req.nr_client == NULL) {
835 clp->lc_req.nr_client = client;
836 client = NULL;
837 }
838 if (clp->lc_req.nr_client != NULL) {
839 SVC_ACQUIRE(nd->nd_xprt);
840 CLNT_ACQUIRE(clp->lc_req.nr_client);
841 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
842 /* Disable idle timeout. */
843 nd->nd_xprt->xp_idletimeout = 0;
844 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
845 } else
846 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
847 }
848 NFSBCOPY(sessid, nsep->sess_sessionid,
849 NFSX_V4SESSIONID);
850 NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
851 NFSX_V4SESSIONID);
852 if (!sess_replay) {
853 shp = NFSSESSIONHASH(nsep->sess_sessionid);
854 if (!mlocked)
855 NFSLOCKSTATE();
856 NFSLOCKSESSION(shp);
857 LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
858 LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
859 nsep->sess_clp = clp;
860 NFSUNLOCKSESSION(shp);
861 if (!mlocked)
862 NFSUNLOCKSTATE();
863 }
864 }
865 }
866 } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
867 error = NFSERR_EXPIRED;
868 }
869
870 /*
871 * If called by the Renew Op, we must check the principal.
872 */
873 if (!error && (opflags & CLOPS_RENEWOP)) {
874 if (nfsrv_notsamecredname(0, nd, clp)) {
875 doneok = 0;
876 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
877 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
878 if ((stp->ls_flags & NFSLCK_OPEN) &&
879 stp->ls_uid == nd->nd_cred->cr_uid) {
880 doneok = 1;
881 break;
882 }
883 }
884 }
885 if (!doneok)
886 error = NFSERR_ACCES;
887 }
888 if (!error && (clp->lc_flags & LCL_CBDOWN))
889 error = NFSERR_CBPATHDOWN;
890 }
891 if ((!error || error == NFSERR_CBPATHDOWN) &&
892 (opflags & CLOPS_RENEW)) {
893 clp->lc_expiry = nfsrv_leaseexpiry();
894 }
895 if (opflags & CLOPS_CONFIRM) {
896 nfsrv_clientunlock(mlocked);
897 if (client != NULL)
898 CLNT_RELEASE(client);
899 if (old_xprt != NULL)
900 SVC_RELEASE(old_xprt);
901 } else if (opflags != CLOPS_RENEW) {
902 NFSUNLOCKSTATE();
903 }
904 if (clpp)
905 *clpp = clp;
906
907 out:
908 NFSEXITCODE2(error, nd);
909 return (error);
910 }
911
912 /*
913 * Perform the NFSv4.1 destroy clientid.
914 */
915 int
nfsrv_destroyclient(struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)916 nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
917 {
918 struct nfsclient *clp;
919 struct nfsclienthashhead *hp;
920 SVCXPRT *old_xprt;
921 int error = 0, i;
922 bool mlocked;
923
924 if (VNET(nfsrvboottime) != clientid.lval[0]) {
925 error = NFSERR_STALECLIENTID;
926 goto out;
927 }
928
929 mlocked = true;
930 if (nfsrv_dolocallocks != 0)
931 mlocked = false;
932 /* Lock out other nfsd threads */
933 nfsrv_clientlock(mlocked);
934
935 hp = NFSCLIENTHASH(clientid);
936 LIST_FOREACH(clp, hp, lc_hash) {
937 if (clp->lc_clientid.lval[1] == clientid.lval[1])
938 break;
939 }
940 if (clp == NULL) {
941 nfsrv_clientunlock(mlocked);
942 /* Just return ok, since it is gone. */
943 goto out;
944 }
945
946 /* Check for the SP4_MACH_CRED case. */
947 error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
948 if (error != 0) {
949 nfsrv_clientunlock(mlocked);
950 goto out;
951 }
952
953 /*
954 * Free up all layouts on the clientid. Should the client return the
955 * layouts?
956 */
957 nfsrv_freelayoutlist(clientid);
958
959 /* Scan for state on the clientid. */
960 for (i = 0; i < nfsrv_statehashsize; i++)
961 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
962 nfsrv_clientunlock(mlocked);
963 error = NFSERR_CLIENTIDBUSY;
964 goto out;
965 }
966 if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
967 nfsrv_clientunlock(mlocked);
968 error = NFSERR_CLIENTIDBUSY;
969 goto out;
970 }
971
972 /* Destroy the clientid and return ok. */
973 old_xprt = NULL;
974 if (mlocked)
975 nfsrv_cleanclient(clp, p, true, &old_xprt);
976 else
977 nfsrv_cleanclient(clp, p, false, NULL);
978 nfsrv_freedeleglist(&clp->lc_deleg);
979 nfsrv_freedeleglist(&clp->lc_olddeleg);
980 LIST_REMOVE(clp, lc_hash);
981 nfsrv_clientunlock(mlocked);
982 if (old_xprt != NULL)
983 SVC_RELEASE(old_xprt);
984 nfsrv_zapclient(clp, p);
985 out:
986 NFSEXITCODE2(error, nd);
987 return (error);
988 }
989
990 /*
991 * Called from the new nfssvc syscall to admin revoke a clientid.
992 * Returns 0 for success, error otherwise.
993 */
994 int
nfsrv_adminrevoke(struct nfsd_clid * revokep,NFSPROC_T * p)995 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
996 {
997 struct nfsclient *clp = NULL;
998 int i, error = 0;
999 int gotit, igotlock;
1000
1001 /*
1002 * First, lock out the nfsd so that state won't change while the
1003 * revocation record is being written to the stable storage restart
1004 * file.
1005 */
1006 NFSLOCKV4ROOTMUTEX();
1007 do {
1008 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
1009 NFSV4ROOTLOCKMUTEXPTR, NULL);
1010 } while (!igotlock);
1011 NFSUNLOCKV4ROOTMUTEX();
1012
1013 /*
1014 * Search for a match in the client list.
1015 */
1016 gotit = i = 0;
1017 while (i < nfsrv_clienthashsize && !gotit) {
1018 LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) {
1019 if (revokep->nclid_idlen == clp->lc_idlen &&
1020 !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
1021 gotit = 1;
1022 break;
1023 }
1024 }
1025 i++;
1026 }
1027 if (!gotit) {
1028 NFSLOCKV4ROOTMUTEX();
1029 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1030 NFSUNLOCKV4ROOTMUTEX();
1031 error = EPERM;
1032 goto out;
1033 }
1034
1035 /*
1036 * Now, write out the revocation record
1037 */
1038 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
1039 nfsrv_backupstable();
1040
1041 /*
1042 * and clear out the state, marking the clientid revoked.
1043 */
1044 clp->lc_flags &= ~LCL_CALLBACKSON;
1045 clp->lc_flags |= LCL_ADMINREVOKED;
1046 nfsrv_cleanclient(clp, p, false, NULL);
1047 nfsrv_freedeleglist(&clp->lc_deleg);
1048 nfsrv_freedeleglist(&clp->lc_olddeleg);
1049 NFSLOCKV4ROOTMUTEX();
1050 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1051 NFSUNLOCKV4ROOTMUTEX();
1052
1053 out:
1054 NFSEXITCODE(error);
1055 return (error);
1056 }
1057
1058 /*
1059 * Dump out stats for all clients. Called from nfssvc(2), that is used
1060 * nfsstatsv1.
1061 */
1062 void
nfsrv_dumpclients(struct nfsd_dumpclients * dumpp,int maxcnt)1063 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
1064 {
1065 struct nfsclient *clp;
1066 int i = 0, cnt = 0;
1067
1068 /*
1069 * First, get a reference on the nfsv4rootfs_lock so that an
1070 * exclusive lock cannot be acquired while dumping the clients.
1071 */
1072 NFSLOCKV4ROOTMUTEX();
1073 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1074 NFSUNLOCKV4ROOTMUTEX();
1075 NFSLOCKSTATE();
1076 /*
1077 * Rattle through the client lists until done.
1078 */
1079 while (i < nfsrv_clienthashsize && cnt < maxcnt) {
1080 clp = LIST_FIRST(&VNET(nfsclienthash)[i]);
1081 while (clp != LIST_END(&VNET(nfsclienthash)[i]) && cnt <
1082 maxcnt) {
1083 nfsrv_dumpaclient(clp, &dumpp[cnt]);
1084 cnt++;
1085 clp = LIST_NEXT(clp, lc_hash);
1086 }
1087 i++;
1088 }
1089 if (cnt < maxcnt)
1090 dumpp[cnt].ndcl_clid.nclid_idlen = 0;
1091 NFSUNLOCKSTATE();
1092 NFSLOCKV4ROOTMUTEX();
1093 nfsv4_relref(&nfsv4rootfs_lock);
1094 NFSUNLOCKV4ROOTMUTEX();
1095 }
1096
1097 /*
1098 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
1099 */
1100 static void
nfsrv_dumpaclient(struct nfsclient * clp,struct nfsd_dumpclients * dumpp)1101 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
1102 {
1103 struct nfsstate *stp, *openstp, *lckownstp;
1104 struct nfslock *lop;
1105 sa_family_t af;
1106 #ifdef INET
1107 struct sockaddr_in *rin;
1108 #endif
1109 #ifdef INET6
1110 struct sockaddr_in6 *rin6;
1111 #endif
1112
1113 dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
1114 dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
1115 dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
1116 dumpp->ndcl_flags = clp->lc_flags;
1117 dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
1118 NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
1119 af = clp->lc_req.nr_nam->sa_family;
1120 dumpp->ndcl_addrfam = af;
1121 switch (af) {
1122 #ifdef INET
1123 case AF_INET:
1124 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
1125 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
1126 break;
1127 #endif
1128 #ifdef INET6
1129 case AF_INET6:
1130 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
1131 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
1132 break;
1133 #endif
1134 }
1135
1136 /*
1137 * Now, scan the state lists and total up the opens and locks.
1138 */
1139 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
1140 dumpp->ndcl_nopenowners++;
1141 LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
1142 dumpp->ndcl_nopens++;
1143 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
1144 dumpp->ndcl_nlockowners++;
1145 LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
1146 dumpp->ndcl_nlocks++;
1147 }
1148 }
1149 }
1150 }
1151
1152 /*
1153 * and the delegation lists.
1154 */
1155 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
1156 dumpp->ndcl_ndelegs++;
1157 }
1158 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
1159 dumpp->ndcl_nolddelegs++;
1160 }
1161 }
1162
1163 /*
1164 * Dump out lock stats for a file.
1165 */
1166 void
nfsrv_dumplocks(vnode_t vp,struct nfsd_dumplocks * ldumpp,int maxcnt,NFSPROC_T * p)1167 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
1168 NFSPROC_T *p)
1169 {
1170 struct nfsstate *stp;
1171 struct nfslock *lop;
1172 int cnt = 0;
1173 struct nfslockfile *lfp;
1174 sa_family_t af;
1175 #ifdef INET
1176 struct sockaddr_in *rin;
1177 #endif
1178 #ifdef INET6
1179 struct sockaddr_in6 *rin6;
1180 #endif
1181 int ret;
1182 fhandle_t nfh;
1183
1184 ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
1185 /*
1186 * First, get a reference on the nfsv4rootfs_lock so that an
1187 * exclusive lock on it cannot be acquired while dumping the locks.
1188 */
1189 NFSLOCKV4ROOTMUTEX();
1190 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1191 NFSUNLOCKV4ROOTMUTEX();
1192 NFSLOCKSTATE();
1193 if (!ret)
1194 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
1195 if (ret) {
1196 ldumpp[0].ndlck_clid.nclid_idlen = 0;
1197 NFSUNLOCKSTATE();
1198 NFSLOCKV4ROOTMUTEX();
1199 nfsv4_relref(&nfsv4rootfs_lock);
1200 NFSUNLOCKV4ROOTMUTEX();
1201 return;
1202 }
1203
1204 /*
1205 * For each open share on file, dump it out.
1206 */
1207 stp = LIST_FIRST(&lfp->lf_open);
1208 while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1209 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1210 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1211 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1212 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1213 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1214 ldumpp[cnt].ndlck_owner.nclid_idlen =
1215 stp->ls_openowner->ls_ownerlen;
1216 NFSBCOPY(stp->ls_openowner->ls_owner,
1217 ldumpp[cnt].ndlck_owner.nclid_id,
1218 stp->ls_openowner->ls_ownerlen);
1219 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1220 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1221 stp->ls_clp->lc_idlen);
1222 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1223 ldumpp[cnt].ndlck_addrfam = af;
1224 switch (af) {
1225 #ifdef INET
1226 case AF_INET:
1227 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1228 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1229 break;
1230 #endif
1231 #ifdef INET6
1232 case AF_INET6:
1233 rin6 = (struct sockaddr_in6 *)
1234 stp->ls_clp->lc_req.nr_nam;
1235 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1236 break;
1237 #endif
1238 }
1239 stp = LIST_NEXT(stp, ls_file);
1240 cnt++;
1241 }
1242
1243 /*
1244 * and all locks.
1245 */
1246 lop = LIST_FIRST(&lfp->lf_lock);
1247 while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1248 stp = lop->lo_stp;
1249 ldumpp[cnt].ndlck_flags = lop->lo_flags;
1250 ldumpp[cnt].ndlck_first = lop->lo_first;
1251 ldumpp[cnt].ndlck_end = lop->lo_end;
1252 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1253 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1254 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1255 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1256 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1257 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1258 stp->ls_ownerlen);
1259 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1260 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1261 stp->ls_clp->lc_idlen);
1262 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1263 ldumpp[cnt].ndlck_addrfam = af;
1264 switch (af) {
1265 #ifdef INET
1266 case AF_INET:
1267 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1268 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1269 break;
1270 #endif
1271 #ifdef INET6
1272 case AF_INET6:
1273 rin6 = (struct sockaddr_in6 *)
1274 stp->ls_clp->lc_req.nr_nam;
1275 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1276 break;
1277 #endif
1278 }
1279 lop = LIST_NEXT(lop, lo_lckfile);
1280 cnt++;
1281 }
1282
1283 /*
1284 * and the delegations.
1285 */
1286 stp = LIST_FIRST(&lfp->lf_deleg);
1287 while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1288 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1289 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1290 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1291 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1292 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1293 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1294 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1295 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1296 stp->ls_clp->lc_idlen);
1297 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1298 ldumpp[cnt].ndlck_addrfam = af;
1299 switch (af) {
1300 #ifdef INET
1301 case AF_INET:
1302 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1303 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1304 break;
1305 #endif
1306 #ifdef INET6
1307 case AF_INET6:
1308 rin6 = (struct sockaddr_in6 *)
1309 stp->ls_clp->lc_req.nr_nam;
1310 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1311 break;
1312 #endif
1313 }
1314 stp = LIST_NEXT(stp, ls_file);
1315 cnt++;
1316 }
1317
1318 /*
1319 * If list isn't full, mark end of list by setting the client name
1320 * to zero length.
1321 */
1322 if (cnt < maxcnt)
1323 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1324 NFSUNLOCKSTATE();
1325 NFSLOCKV4ROOTMUTEX();
1326 nfsv4_relref(&nfsv4rootfs_lock);
1327 NFSUNLOCKV4ROOTMUTEX();
1328 }
1329
1330 /*
1331 * Server timer routine. It can scan any linked list, so long
1332 * as it holds the spin/mutex lock and there is no exclusive lock on
1333 * nfsv4rootfs_lock.
1334 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1335 * to do this from a callout, since the spin locks work. For
1336 * Darwin, I'm not sure what will work correctly yet.)
1337 * Should be called once per second.
1338 */
1339 void
nfsrv_servertimer(void * arg __unused)1340 nfsrv_servertimer(void *arg __unused)
1341 {
1342 struct nfsclient *clp, *nclp;
1343 struct nfsstate *stp, *nstp;
1344 int got_ref, i;
1345
1346 /*
1347 * Make sure nfsboottime is set. This is used by V3 as well
1348 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1349 * only used by the V4 server for leases.
1350 */
1351 if (nfsboottime.tv_sec == 0)
1352 NFSSETBOOTTIME(nfsboottime);
1353
1354 /*
1355 * If server hasn't started yet, just return.
1356 */
1357 NFSLOCKSTATE();
1358 if (VNET(nfsrv_stablefirst).nsf_eograce == 0) {
1359 NFSUNLOCKSTATE();
1360 return;
1361 }
1362 if (!(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) {
1363 if (!(VNET(nfsrv_stablefirst).nsf_flags &
1364 NFSNSF_GRACEOVER) &&
1365 NFSD_MONOSEC > VNET(nfsrv_stablefirst).nsf_eograce)
1366 VNET(nfsrv_stablefirst).nsf_flags |=
1367 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1368 NFSUNLOCKSTATE();
1369 return;
1370 }
1371
1372 /*
1373 * Try and get a reference count on the nfsv4rootfs_lock so that
1374 * no nfsd thread can acquire an exclusive lock on it before this
1375 * call is done. If it is already exclusively locked, just return.
1376 */
1377 NFSLOCKV4ROOTMUTEX();
1378 got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1379 NFSUNLOCKV4ROOTMUTEX();
1380 if (got_ref == 0) {
1381 NFSUNLOCKSTATE();
1382 return;
1383 }
1384
1385 /*
1386 * For each client...
1387 */
1388 for (i = 0; i < nfsrv_clienthashsize; i++) {
1389 clp = LIST_FIRST(&VNET(nfsclienthash)[i]);
1390 while (clp != LIST_END(&VNET(nfsclienthash)[i])) {
1391 nclp = LIST_NEXT(clp, lc_hash);
1392 if (!(clp->lc_flags & LCL_EXPIREIT)) {
1393 if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1394 && ((LIST_EMPTY(&clp->lc_deleg)
1395 && LIST_EMPTY(&clp->lc_open)) ||
1396 nfsrv_clients > nfsrv_clienthighwater)) ||
1397 (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1398 (clp->lc_expiry < NFSD_MONOSEC &&
1399 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1400 /*
1401 * Lease has expired several nfsrv_lease times ago:
1402 * PLUS
1403 * - no state is associated with it
1404 * OR
1405 * - above high water mark for number of clients
1406 * (nfsrv_clienthighwater should be large enough
1407 * that this only occurs when clients fail to
1408 * use the same nfs_client_id4.id. Maybe somewhat
1409 * higher that the maximum number of clients that
1410 * will mount this server?)
1411 * OR
1412 * Lease has expired a very long time ago
1413 * OR
1414 * Lease has expired PLUS the number of opens + locks
1415 * has exceeded 90% of capacity
1416 *
1417 * --> Mark for expiry. The actual expiry will be done
1418 * by an nfsd sometime soon.
1419 */
1420 clp->lc_flags |= LCL_EXPIREIT;
1421 VNET(nfsrv_stablefirst).nsf_flags |=
1422 (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1423 } else {
1424 /*
1425 * If there are no opens, increment no open tick cnt
1426 * If time exceeds NFSNOOPEN, mark it to be thrown away
1427 * otherwise, if there is an open, reset no open time
1428 * Hopefully, this will avoid excessive re-creation
1429 * of open owners and subsequent open confirms.
1430 */
1431 stp = LIST_FIRST(&clp->lc_open);
1432 while (stp != LIST_END(&clp->lc_open)) {
1433 nstp = LIST_NEXT(stp, ls_list);
1434 if (LIST_EMPTY(&stp->ls_open)) {
1435 stp->ls_noopens++;
1436 if (stp->ls_noopens > NFSNOOPEN ||
1437 (nfsrv_openpluslock * 2) >
1438 nfsrv_v4statelimit)
1439 VNET(nfsrv_stablefirst).nsf_flags |=
1440 NFSNSF_NOOPENS;
1441 } else {
1442 stp->ls_noopens = 0;
1443 }
1444 stp = nstp;
1445 }
1446 }
1447 }
1448 clp = nclp;
1449 }
1450 }
1451 NFSUNLOCKSTATE();
1452 NFSLOCKV4ROOTMUTEX();
1453 nfsv4_relref(&nfsv4rootfs_lock);
1454 NFSUNLOCKV4ROOTMUTEX();
1455 }
1456
1457 /*
1458 * The following set of functions free up the various data structures.
1459 */
1460 /*
1461 * Clear out all open/lock state related to this nfsclient.
1462 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1463 * there are no other active nfsd threads.
1464 */
1465 void
nfsrv_cleanclient(struct nfsclient * clp,NFSPROC_T * p,bool locked,SVCXPRT ** old_xprtp)1466 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p, bool locked,
1467 SVCXPRT **old_xprtp)
1468 {
1469 struct nfsstate *stp, *nstp;
1470 struct nfsdsession *sep, *nsep;
1471
1472 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
1473 if (locked)
1474 nfsrv_freeopenowner(stp, 0, p);
1475 else
1476 nfsrv_freeopenowner(stp, 1, p);
1477 }
1478 if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1479 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1480 (void)nfsrv_freesession(NULL, sep, NULL, locked,
1481 old_xprtp);
1482 }
1483
1484 /*
1485 * Free a client that has been cleaned. It should also already have been
1486 * removed from the lists.
1487 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1488 * softclock interrupts are enabled.)
1489 */
1490 void
nfsrv_zapclient(struct nfsclient * clp,NFSPROC_T * p)1491 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1492 {
1493
1494 #ifdef notyet
1495 if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1496 (LCL_GSS | LCL_CALLBACKSON) &&
1497 (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1498 clp->lc_handlelen > 0) {
1499 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1500 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1501 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1502 NULL, 0, NULL, NULL, NULL, 0, p);
1503 }
1504 #endif
1505 newnfs_disconnect(NULL, &clp->lc_req);
1506 free(clp->lc_req.nr_nam, M_SONAME);
1507 NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1508 crfree(clp->lc_req.nr_cred);
1509 free(clp->lc_stateid, M_NFSDCLIENT);
1510 free(clp, M_NFSDCLIENT);
1511 NFSLOCKSTATE();
1512 VNET(nfsstatsv1_p)->srvclients--;
1513 nfsrv_openpluslock--;
1514 nfsrv_clients--;
1515 NFSUNLOCKSTATE();
1516 }
1517
1518 /*
1519 * Free a list of delegation state structures.
1520 * (This function will also free all nfslockfile structures that no
1521 * longer have associated state.)
1522 */
1523 void
nfsrv_freedeleglist(struct nfsstatehead * sthp)1524 nfsrv_freedeleglist(struct nfsstatehead *sthp)
1525 {
1526 struct nfsstate *stp, *nstp;
1527
1528 LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1529 nfsrv_freedeleg(stp);
1530 }
1531 LIST_INIT(sthp);
1532 }
1533
1534 /*
1535 * Free up a delegation.
1536 */
1537 static void
nfsrv_freedeleg(struct nfsstate * stp)1538 nfsrv_freedeleg(struct nfsstate *stp)
1539 {
1540 struct nfslockfile *lfp;
1541
1542 LIST_REMOVE(stp, ls_hash);
1543 LIST_REMOVE(stp, ls_list);
1544 LIST_REMOVE(stp, ls_file);
1545 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1546 nfsrv_writedelegcnt--;
1547 lfp = stp->ls_lfp;
1548 if (LIST_EMPTY(&lfp->lf_open) &&
1549 LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1550 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1551 lfp->lf_usecount == 0 &&
1552 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1553 nfsrv_freenfslockfile(lfp);
1554 free(stp, M_NFSDSTATE);
1555 VNET(nfsstatsv1_p)->srvdelegates--;
1556 nfsrv_openpluslock--;
1557 nfsrv_delegatecnt--;
1558 }
1559
1560 /*
1561 * This function frees an open owner and all associated opens.
1562 */
1563 static void
nfsrv_freeopenowner(struct nfsstate * stp,int cansleep,NFSPROC_T * p)1564 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1565 {
1566 struct nfsstate *nstp, *tstp;
1567
1568 LIST_REMOVE(stp, ls_list);
1569 /*
1570 * Now, free all associated opens.
1571 */
1572 nstp = LIST_FIRST(&stp->ls_open);
1573 while (nstp != LIST_END(&stp->ls_open)) {
1574 tstp = nstp;
1575 nstp = LIST_NEXT(nstp, ls_list);
1576 nfsrv_freeopen(tstp, NULL, cansleep, p);
1577 }
1578 if (stp->ls_op)
1579 nfsrvd_derefcache(stp->ls_op);
1580 free(stp, M_NFSDSTATE);
1581 VNET(nfsstatsv1_p)->srvopenowners--;
1582 nfsrv_openpluslock--;
1583 }
1584
1585 /*
1586 * This function frees an open (nfsstate open structure) with all associated
1587 * lock_owners and locks. It also frees the nfslockfile structure iff there
1588 * are no other opens on the file.
1589 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1590 */
1591 static void
nfsrv_freeopen(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1592 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1593 {
1594 struct nfsstate *nstp, *tstp;
1595 struct nfslockfile *lfp;
1596
1597 LIST_REMOVE(stp, ls_hash);
1598 LIST_REMOVE(stp, ls_list);
1599 LIST_REMOVE(stp, ls_file);
1600
1601 lfp = stp->ls_lfp;
1602 /*
1603 * Now, free all lockowners associated with this open.
1604 * Note that, if vp != NULL, nfsrv_freelockowner() will
1605 * not call nfsrv_freeallnfslocks(), so it needs to be called, below.
1606 */
1607 LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1608 nfsrv_freelockowner(tstp, vp, cansleep, p);
1609
1610 if (vp != NULL) {
1611 KASSERT(cansleep != 0, ("nfsrv_freeopen: cansleep == 0"));
1612 mtx_assert(NFSSTATEMUTEXPTR, MA_OWNED);
1613 /*
1614 * Only called with vp != NULL for Close when
1615 * vfs.nfsd.enable_locallocks != 0.
1616 * Lock the lfp so that it will not go away and do the
1617 * nfsrv_freeallnfslocks() call that was not done by
1618 * nfsrv_freelockowner().
1619 */
1620 nfsrv_locklf(lfp);
1621 NFSUNLOCKSTATE();
1622 NFSVOPUNLOCK(vp);
1623 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1624 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1625 NFSLOCKSTATE();
1626 nfsrv_unlocklf(lfp);
1627 }
1628
1629 /*
1630 * The nfslockfile is freed here if there are no locks
1631 * associated with the open.
1632 * If there are locks associated with the open, the
1633 * nfslockfile structure can be freed via nfsrv_freelockowner().
1634 */
1635 if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1636 LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1637 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1638 lfp->lf_usecount == 0 &&
1639 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1640 nfsrv_freenfslockfile(lfp);
1641 free(stp, M_NFSDSTATE);
1642 VNET(nfsstatsv1_p)->srvopens--;
1643 nfsrv_openpluslock--;
1644 }
1645
1646 /*
1647 * Frees a lockowner and all associated locks.
1648 */
1649 static void
nfsrv_freelockowner(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1650 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1651 NFSPROC_T *p)
1652 {
1653
1654 LIST_REMOVE(stp, ls_hash);
1655 LIST_REMOVE(stp, ls_list);
1656 if (vp == NULL)
1657 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1658 if (stp->ls_op)
1659 nfsrvd_derefcache(stp->ls_op);
1660 free(stp, M_NFSDSTATE);
1661 VNET(nfsstatsv1_p)->srvlockowners--;
1662 nfsrv_openpluslock--;
1663 }
1664
1665 /*
1666 * Free all the nfs locks on a lockowner.
1667 */
1668 static void
nfsrv_freeallnfslocks(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1669 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1670 NFSPROC_T *p)
1671 {
1672 struct nfslock *lop, *nlop;
1673 struct nfsrollback *rlp, *nrlp;
1674 struct nfslockfile *lfp = NULL;
1675 int gottvp = 0;
1676 vnode_t tvp = NULL;
1677 uint64_t first, end;
1678
1679 if (vp != NULL)
1680 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1681 lop = LIST_FIRST(&stp->ls_lock);
1682 while (lop != LIST_END(&stp->ls_lock)) {
1683 nlop = LIST_NEXT(lop, lo_lckowner);
1684 /*
1685 * Since all locks should be for the same file, lfp should
1686 * not change.
1687 */
1688 if (lfp == NULL)
1689 lfp = lop->lo_lfp;
1690 else if (lfp != lop->lo_lfp)
1691 panic("allnfslocks");
1692 /*
1693 * If vp is NULL and cansleep != 0, a vnode must be acquired
1694 * from the file handle. This only occurs when called from
1695 * nfsrv_cleanclient().
1696 */
1697 if (gottvp == 0) {
1698 if (nfsrv_dolocallocks == 0)
1699 tvp = NULL;
1700 else if (vp == NULL && cansleep != 0) {
1701 tvp = nfsvno_getvp(&lfp->lf_fh);
1702 if (tvp != NULL)
1703 NFSVOPUNLOCK(tvp);
1704 } else
1705 tvp = vp;
1706 gottvp = 1;
1707 }
1708
1709 if (tvp != NULL) {
1710 if (cansleep == 0)
1711 panic("allnfs2");
1712 first = lop->lo_first;
1713 end = lop->lo_end;
1714 nfsrv_freenfslock(lop);
1715 nfsrv_localunlock(tvp, lfp, first, end, p);
1716 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1717 nrlp)
1718 free(rlp, M_NFSDROLLBACK);
1719 LIST_INIT(&lfp->lf_rollback);
1720 } else
1721 nfsrv_freenfslock(lop);
1722 lop = nlop;
1723 }
1724 if (vp == NULL && tvp != NULL)
1725 vrele(tvp);
1726 }
1727
1728 /*
1729 * Free an nfslock structure.
1730 */
1731 static void
nfsrv_freenfslock(struct nfslock * lop)1732 nfsrv_freenfslock(struct nfslock *lop)
1733 {
1734
1735 if (lop->lo_lckfile.le_prev != NULL) {
1736 LIST_REMOVE(lop, lo_lckfile);
1737 VNET(nfsstatsv1_p)->srvlocks--;
1738 nfsrv_openpluslock--;
1739 }
1740 LIST_REMOVE(lop, lo_lckowner);
1741 free(lop, M_NFSDLOCK);
1742 }
1743
1744 /*
1745 * This function frees an nfslockfile structure.
1746 */
1747 static void
nfsrv_freenfslockfile(struct nfslockfile * lfp)1748 nfsrv_freenfslockfile(struct nfslockfile *lfp)
1749 {
1750
1751 LIST_REMOVE(lfp, lf_hash);
1752 free(lfp, M_NFSDLOCKFILE);
1753 }
1754
1755 /*
1756 * This function looks up an nfsstate structure via stateid.
1757 */
1758 static int
nfsrv_getstate(struct nfsclient * clp,nfsv4stateid_t * stateidp,__unused u_int32_t flags,struct nfsstate ** stpp)1759 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1760 struct nfsstate **stpp)
1761 {
1762 struct nfsstate *stp;
1763 struct nfsstatehead *hp;
1764 int error = 0;
1765
1766 *stpp = NULL;
1767 hp = NFSSTATEHASH(clp, *stateidp);
1768 LIST_FOREACH(stp, hp, ls_hash) {
1769 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1770 NFSX_STATEIDOTHER))
1771 break;
1772 }
1773
1774 /*
1775 * If no state id in list, return NFSERR_BADSTATEID.
1776 */
1777 if (stp == LIST_END(hp)) {
1778 error = NFSERR_BADSTATEID;
1779 goto out;
1780 }
1781 *stpp = stp;
1782
1783 out:
1784 NFSEXITCODE(error);
1785 return (error);
1786 }
1787
1788 /*
1789 * This function gets an nfsstate structure via owner string.
1790 */
1791 static void
nfsrv_getowner(struct nfsstatehead * hp,struct nfsstate * new_stp,struct nfsstate ** stpp)1792 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1793 struct nfsstate **stpp)
1794 {
1795 struct nfsstate *stp;
1796
1797 *stpp = NULL;
1798 LIST_FOREACH(stp, hp, ls_list) {
1799 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1800 !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1801 *stpp = stp;
1802 return;
1803 }
1804 }
1805 }
1806
1807 /*
1808 * Lock control function called to update lock status.
1809 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1810 * that one isn't to be created and an NFSERR_xxx for other errors.
1811 * The structures new_stp and new_lop are passed in as pointers that should
1812 * be set to NULL if the structure is used and shouldn't be free'd.
1813 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1814 * never used and can safely be allocated on the stack. For all other
1815 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1816 * in case they are used.
1817 */
1818 int
nfsrv_lockctrl(vnode_t vp,struct nfsstate ** new_stpp,struct nfslock ** new_lopp,struct nfslockconflict * cfp,nfsquad_t clientid,nfsv4stateid_t * stateidp,__unused struct nfsexstuff * exp,struct nfsrv_descript * nd,NFSPROC_T * p)1819 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1820 struct nfslock **new_lopp, struct nfslockconflict *cfp,
1821 nfsquad_t clientid, nfsv4stateid_t *stateidp,
1822 __unused struct nfsexstuff *exp,
1823 struct nfsrv_descript *nd, NFSPROC_T *p)
1824 {
1825 struct nfslock *lop;
1826 struct nfsstate *new_stp = *new_stpp;
1827 struct nfslock *new_lop = *new_lopp;
1828 struct nfsstate *tstp, *mystp, *nstp;
1829 int specialid = 0;
1830 struct nfslockfile *lfp;
1831 struct nfslock *other_lop = NULL;
1832 struct nfsstate *stp, *lckstp = NULL;
1833 struct nfsclient *clp = NULL;
1834 u_int32_t bits;
1835 int error = 0, haslock = 0, ret, reterr;
1836 int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1837 fhandle_t nfh;
1838 uint64_t first, end;
1839 uint32_t lock_flags;
1840
1841 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1842 /*
1843 * Note the special cases of "all 1s" or "all 0s" stateids and
1844 * let reads with all 1s go ahead.
1845 */
1846 if (new_stp->ls_stateid.seqid == 0x0 &&
1847 new_stp->ls_stateid.other[0] == 0x0 &&
1848 new_stp->ls_stateid.other[1] == 0x0 &&
1849 new_stp->ls_stateid.other[2] == 0x0)
1850 specialid = 1;
1851 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1852 new_stp->ls_stateid.other[0] == 0xffffffff &&
1853 new_stp->ls_stateid.other[1] == 0xffffffff &&
1854 new_stp->ls_stateid.other[2] == 0xffffffff)
1855 specialid = 2;
1856 }
1857
1858 /*
1859 * Check for restart conditions (client and server).
1860 */
1861 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1862 &new_stp->ls_stateid, specialid);
1863 if (error)
1864 goto out;
1865
1866 /*
1867 * Check for state resource limit exceeded.
1868 */
1869 if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1870 nfsrv_openpluslock > nfsrv_v4statelimit) {
1871 error = NFSERR_RESOURCE;
1872 goto out;
1873 }
1874
1875 /*
1876 * For the lock case, get another nfslock structure,
1877 * just in case we need it.
1878 * Malloc now, before we start sifting through the linked lists,
1879 * in case we have to wait for memory.
1880 */
1881 tryagain:
1882 if (new_stp->ls_flags & NFSLCK_LOCK)
1883 other_lop = malloc(sizeof (struct nfslock),
1884 M_NFSDLOCK, M_WAITOK);
1885 filestruct_locked = 0;
1886 reterr = 0;
1887 lfp = NULL;
1888
1889 /*
1890 * Get the lockfile structure for CFH now, so we can do a sanity
1891 * check against the stateid, before incrementing the seqid#, since
1892 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1893 * shouldn't be incremented for this case.
1894 * If nfsrv_getlockfile() returns -1, it means "not found", which
1895 * will be handled later.
1896 * If we are doing Lock/LockU and local locking is enabled, sleep
1897 * lock the nfslockfile structure.
1898 */
1899 getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1900 NFSLOCKSTATE();
1901 if (getlckret == 0) {
1902 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1903 nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1904 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1905 &lfp, &nfh, 1);
1906 if (getlckret == 0)
1907 filestruct_locked = 1;
1908 } else
1909 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1910 &lfp, &nfh, 0);
1911 }
1912 if (getlckret != 0 && getlckret != -1)
1913 reterr = getlckret;
1914
1915 if (filestruct_locked != 0) {
1916 LIST_INIT(&lfp->lf_rollback);
1917 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1918 /*
1919 * For local locking, do the advisory locking now, so
1920 * that any conflict can be detected. A failure later
1921 * can be rolled back locally. If an error is returned,
1922 * struct nfslockfile has been unlocked and any local
1923 * locking rolled back.
1924 */
1925 NFSUNLOCKSTATE();
1926 if (vnode_unlocked == 0) {
1927 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1928 vnode_unlocked = 1;
1929 NFSVOPUNLOCK(vp);
1930 }
1931 reterr = nfsrv_locallock(vp, lfp,
1932 (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1933 new_lop->lo_first, new_lop->lo_end, cfp, p);
1934 NFSLOCKSTATE();
1935 }
1936 }
1937
1938 if (specialid == 0) {
1939 if (new_stp->ls_flags & NFSLCK_TEST) {
1940 /*
1941 * RFC 3530 does not list LockT as an op that renews a
1942 * lease, but the consensus seems to be that it is ok
1943 * for a server to do so.
1944 */
1945 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1946 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1947
1948 /*
1949 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1950 * error returns for LockT, just go ahead and test for a lock,
1951 * since there are no locks for this client, but other locks
1952 * can conflict. (ie. same client will always be false)
1953 */
1954 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1955 error = 0;
1956 lckstp = new_stp;
1957 } else {
1958 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1959 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1960 if (error == 0)
1961 /*
1962 * Look up the stateid
1963 */
1964 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1965 new_stp->ls_flags, &stp);
1966 /*
1967 * do some sanity checks for an unconfirmed open or a
1968 * stateid that refers to the wrong file, for an open stateid
1969 */
1970 if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1971 ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1972 (getlckret == 0 && stp->ls_lfp != lfp))){
1973 /*
1974 * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1975 * The only exception is using SETATTR with SIZE.
1976 * */
1977 if ((new_stp->ls_flags &
1978 (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1979 error = NFSERR_BADSTATEID;
1980 }
1981
1982 /*
1983 * Sanity check the stateid for the Lock/LockU cases.
1984 */
1985 if (error == 0 && (new_stp->ls_flags & NFSLCK_LOCK) != 0 &&
1986 (((new_stp->ls_flags & NFSLCK_OPENTOLOCK) != 0 &&
1987 (stp->ls_flags & NFSLCK_OPEN) == 0) ||
1988 ((new_stp->ls_flags & NFSLCK_OPENTOLOCK) == 0 &&
1989 (stp->ls_flags & NFSLCK_LOCK) == 0)))
1990 error = NFSERR_BADSTATEID;
1991 if (error == 0 && (new_stp->ls_flags & NFSLCK_UNLOCK) != 0 &&
1992 (stp->ls_flags & NFSLCK_LOCK) == 0)
1993 error = NFSERR_BADSTATEID;
1994
1995 /* Sanity check the delegation stateid. */
1996 if (error == 0 &&
1997 (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1998 getlckret == 0 && stp->ls_lfp != lfp)
1999 error = NFSERR_BADSTATEID;
2000
2001 /*
2002 * If the lockowner stateid doesn't refer to the same file,
2003 * I believe that is considered ok, since some clients will
2004 * only create a single lockowner and use that for all locks
2005 * on all files.
2006 * For now, log it as a diagnostic, instead of considering it
2007 * a BadStateid.
2008 */
2009 if (error == 0 && (stp->ls_flags &
2010 (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
2011 getlckret == 0 && stp->ls_lfp != lfp) {
2012 #ifdef DIAGNOSTIC
2013 printf("Got a lock statid for different file open\n");
2014 #endif
2015 /*
2016 error = NFSERR_BADSTATEID;
2017 */
2018 }
2019
2020 if (error == 0) {
2021 if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
2022 /*
2023 * If haslock set, we've already checked the seqid.
2024 */
2025 if (!haslock) {
2026 if (stp->ls_flags & NFSLCK_OPEN)
2027 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2028 stp->ls_openowner, new_stp->ls_op);
2029 else
2030 error = NFSERR_BADSTATEID;
2031 }
2032 if (!error)
2033 nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
2034 if (lckstp) {
2035 /*
2036 * For NFSv4.1 and NFSv4.2 allow an
2037 * open_to_lock_owner when the lock_owner already
2038 * exists. Just clear NFSLCK_OPENTOLOCK so that
2039 * a new lock_owner will not be created.
2040 * RFC7530 states that the error for NFSv4.0
2041 * is NFS4ERR_BAD_SEQID.
2042 */
2043 if ((nd->nd_flag & ND_NFSV41) != 0)
2044 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
2045 else
2046 error = NFSERR_BADSEQID;
2047 } else
2048 lckstp = new_stp;
2049 } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
2050 /*
2051 * If haslock set, ditto above.
2052 */
2053 if (!haslock) {
2054 if (stp->ls_flags & NFSLCK_OPEN)
2055 error = NFSERR_BADSTATEID;
2056 else
2057 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2058 stp, new_stp->ls_op);
2059 }
2060 lckstp = stp;
2061 } else {
2062 lckstp = stp;
2063 }
2064 }
2065 /*
2066 * If the seqid part of the stateid isn't the same, return
2067 * NFSERR_OLDSTATEID for cases other than I/O Ops.
2068 * For I/O Ops, only return NFSERR_OLDSTATEID if
2069 * nfsrv_returnoldstateid is set. (The consensus on the email
2070 * list was that most clients would prefer to not receive
2071 * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
2072 * is what will happen, so I use the nfsrv_returnoldstateid to
2073 * allow for either server configuration.)
2074 */
2075 if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
2076 (((nd->nd_flag & ND_NFSV41) == 0 &&
2077 (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2078 nfsrv_returnoldstateid)) ||
2079 ((nd->nd_flag & ND_NFSV41) != 0 &&
2080 new_stp->ls_stateid.seqid != 0)))
2081 error = NFSERR_OLDSTATEID;
2082 }
2083 }
2084
2085 /*
2086 * Now we can check for grace.
2087 */
2088 if (!error)
2089 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2090 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2091 nfsrv_checkstable(clp))
2092 error = NFSERR_NOGRACE;
2093 /*
2094 * If we successfully Reclaimed state, note that.
2095 */
2096 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
2097 nfsrv_markstable(clp);
2098
2099 /*
2100 * At this point, either error == NFSERR_BADSTATEID or the
2101 * seqid# has been updated, so we can return any error.
2102 * If error == 0, there may be an error in:
2103 * nd_repstat - Set by the calling function.
2104 * reterr - Set above, if getting the nfslockfile structure
2105 * or acquiring the local lock failed.
2106 * (If both of these are set, nd_repstat should probably be
2107 * returned, since that error was detected before this
2108 * function call.)
2109 */
2110 if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
2111 if (error == 0) {
2112 if (nd->nd_repstat != 0)
2113 error = nd->nd_repstat;
2114 else
2115 error = reterr;
2116 }
2117 if (filestruct_locked != 0) {
2118 /* Roll back local locks. */
2119 NFSUNLOCKSTATE();
2120 if (vnode_unlocked == 0) {
2121 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
2122 vnode_unlocked = 1;
2123 NFSVOPUNLOCK(vp);
2124 }
2125 nfsrv_locallock_rollback(vp, lfp, p);
2126 NFSLOCKSTATE();
2127 nfsrv_unlocklf(lfp);
2128 }
2129 NFSUNLOCKSTATE();
2130 goto out;
2131 }
2132
2133 /*
2134 * Check the nfsrv_getlockfile return.
2135 * Returned -1 if no structure found.
2136 */
2137 if (getlckret == -1) {
2138 error = NFSERR_EXPIRED;
2139 /*
2140 * Called from lockt, so no lock is OK.
2141 */
2142 if (new_stp->ls_flags & NFSLCK_TEST) {
2143 error = 0;
2144 } else if (new_stp->ls_flags &
2145 (NFSLCK_CHECK | NFSLCK_SETATTR)) {
2146 /*
2147 * Called to check for a lock, OK if the stateid is all
2148 * 1s or all 0s, but there should be an nfsstate
2149 * otherwise.
2150 * (ie. If there is no open, I'll assume no share
2151 * deny bits.)
2152 */
2153 if (specialid)
2154 error = 0;
2155 else
2156 error = NFSERR_BADSTATEID;
2157 }
2158 NFSUNLOCKSTATE();
2159 goto out;
2160 }
2161
2162 /*
2163 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
2164 * For NFSLCK_CHECK, allow a read if write access is granted,
2165 * but check for a deny. For NFSLCK_LOCK, require correct access,
2166 * which implies a conflicting deny can't exist.
2167 */
2168 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
2169 /*
2170 * Four kinds of state id:
2171 * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
2172 * - stateid for an open
2173 * - stateid for a delegation
2174 * - stateid for a lock owner
2175 */
2176 if (!specialid) {
2177 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2178 delegation = 1;
2179 mystp = stp;
2180 nfsrv_delaydelegtimeout(stp);
2181 } else if (stp->ls_flags & NFSLCK_OPEN) {
2182 mystp = stp;
2183 } else {
2184 mystp = stp->ls_openstp;
2185 }
2186 /*
2187 * If locking or checking, require correct access
2188 * bit set.
2189 */
2190 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
2191 !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
2192 mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
2193 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
2194 (NFSLCK_CHECK | NFSLCK_READACCESS) &&
2195 !(mystp->ls_flags & NFSLCK_READACCESS) &&
2196 nfsrv_allowreadforwriteopen == 0) ||
2197 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
2198 (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
2199 !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
2200 if (filestruct_locked != 0) {
2201 /* Roll back local locks. */
2202 NFSUNLOCKSTATE();
2203 if (vnode_unlocked == 0) {
2204 ASSERT_VOP_ELOCKED(vp,
2205 "nfsrv_lockctrl3");
2206 vnode_unlocked = 1;
2207 NFSVOPUNLOCK(vp);
2208 }
2209 nfsrv_locallock_rollback(vp, lfp, p);
2210 NFSLOCKSTATE();
2211 nfsrv_unlocklf(lfp);
2212 }
2213 NFSUNLOCKSTATE();
2214 error = NFSERR_OPENMODE;
2215 goto out;
2216 }
2217 } else
2218 mystp = NULL;
2219 if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
2220 /*
2221 * Check for a conflicting deny bit.
2222 */
2223 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
2224 if (tstp != mystp) {
2225 bits = tstp->ls_flags;
2226 bits >>= NFSLCK_SHIFT;
2227 if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
2228 KASSERT(vnode_unlocked == 0,
2229 ("nfsrv_lockctrl: vnode unlocked1"));
2230 ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
2231 vp, p);
2232 if (ret == 1) {
2233 /*
2234 * nfsrv_clientconflict unlocks state
2235 * when it returns non-zero.
2236 */
2237 lckstp = NULL;
2238 goto tryagain;
2239 }
2240 if (ret == 0)
2241 NFSUNLOCKSTATE();
2242 if (ret == 2)
2243 error = NFSERR_PERM;
2244 else
2245 error = NFSERR_OPENMODE;
2246 goto out;
2247 }
2248 }
2249 }
2250
2251 /* We're outta here */
2252 NFSUNLOCKSTATE();
2253 goto out;
2254 }
2255 }
2256
2257 /*
2258 * For setattr, just get rid of all the Delegations for other clients.
2259 */
2260 if (new_stp->ls_flags & NFSLCK_SETATTR) {
2261 KASSERT(vnode_unlocked == 0,
2262 ("nfsrv_lockctrl: vnode unlocked2"));
2263 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2264 if (ret) {
2265 /*
2266 * nfsrv_cleandeleg() unlocks state when it
2267 * returns non-zero.
2268 */
2269 if (ret == -1) {
2270 lckstp = NULL;
2271 goto tryagain;
2272 }
2273 error = ret;
2274 goto out;
2275 }
2276 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2277 (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2278 LIST_EMPTY(&lfp->lf_deleg))) {
2279 NFSUNLOCKSTATE();
2280 goto out;
2281 }
2282 }
2283
2284 /*
2285 * Check for a conflicting delegation. If one is found, call
2286 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2287 * been set yet, it will get the lock. Otherwise, it will recall
2288 * the delegation. Then, we try try again...
2289 * I currently believe the conflict algorithm to be:
2290 * For Lock Ops (Lock/LockT/LockU)
2291 * - there is a conflict iff a different client has a write delegation
2292 * For Reading (Read Op)
2293 * - there is a conflict iff a different client has a write delegation
2294 * (the specialids are always a different client)
2295 * For Writing (Write/Setattr of size)
2296 * - there is a conflict if a different client has any delegation
2297 * - there is a conflict if the same client has a read delegation
2298 * (I don't understand why this isn't allowed, but that seems to be
2299 * the current consensus?)
2300 */
2301 tstp = LIST_FIRST(&lfp->lf_deleg);
2302 while (tstp != LIST_END(&lfp->lf_deleg)) {
2303 nstp = LIST_NEXT(tstp, ls_file);
2304 if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2305 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2306 (new_lop->lo_flags & NFSLCK_READ))) &&
2307 clp != tstp->ls_clp &&
2308 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2309 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2310 (new_lop->lo_flags & NFSLCK_WRITE) &&
2311 (clp != tstp->ls_clp ||
2312 (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2313 ret = 0;
2314 if (filestruct_locked != 0) {
2315 /* Roll back local locks. */
2316 NFSUNLOCKSTATE();
2317 if (vnode_unlocked == 0) {
2318 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2319 NFSVOPUNLOCK(vp);
2320 }
2321 nfsrv_locallock_rollback(vp, lfp, p);
2322 NFSLOCKSTATE();
2323 nfsrv_unlocklf(lfp);
2324 NFSUNLOCKSTATE();
2325 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2326 vnode_unlocked = 0;
2327 if (VN_IS_DOOMED(vp))
2328 ret = NFSERR_SERVERFAULT;
2329 NFSLOCKSTATE();
2330 }
2331 if (ret == 0)
2332 ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2333 if (ret) {
2334 /*
2335 * nfsrv_delegconflict unlocks state when it
2336 * returns non-zero, which it always does.
2337 */
2338 if (other_lop) {
2339 free(other_lop, M_NFSDLOCK);
2340 other_lop = NULL;
2341 }
2342 if (ret == -1) {
2343 lckstp = NULL;
2344 goto tryagain;
2345 }
2346 error = ret;
2347 goto out;
2348 }
2349 /* Never gets here. */
2350 }
2351 tstp = nstp;
2352 }
2353
2354 /*
2355 * Handle the unlock case by calling nfsrv_updatelock().
2356 * (Should I have done some access checking above for unlock? For now,
2357 * just let it happen.)
2358 */
2359 if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2360 first = new_lop->lo_first;
2361 end = new_lop->lo_end;
2362 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2363 stateidp->seqid = ++(stp->ls_stateid.seqid);
2364 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2365 stateidp->seqid = stp->ls_stateid.seqid = 1;
2366 stateidp->other[0] = stp->ls_stateid.other[0];
2367 stateidp->other[1] = stp->ls_stateid.other[1];
2368 stateidp->other[2] = stp->ls_stateid.other[2];
2369 if (filestruct_locked != 0) {
2370 NFSUNLOCKSTATE();
2371 if (vnode_unlocked == 0) {
2372 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2373 vnode_unlocked = 1;
2374 NFSVOPUNLOCK(vp);
2375 }
2376 /* Update the local locks. */
2377 nfsrv_localunlock(vp, lfp, first, end, p);
2378 NFSLOCKSTATE();
2379 nfsrv_unlocklf(lfp);
2380 }
2381 NFSUNLOCKSTATE();
2382 goto out;
2383 }
2384
2385 /*
2386 * Search for a conflicting lock. A lock conflicts if:
2387 * - the lock range overlaps and
2388 * - at least one lock is a write lock and
2389 * - it is not owned by the same lock owner
2390 */
2391 if (!delegation) {
2392 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2393 if (new_lop->lo_end > lop->lo_first &&
2394 new_lop->lo_first < lop->lo_end &&
2395 (new_lop->lo_flags == NFSLCK_WRITE ||
2396 lop->lo_flags == NFSLCK_WRITE) &&
2397 lckstp != lop->lo_stp &&
2398 (clp != lop->lo_stp->ls_clp ||
2399 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2400 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2401 lckstp->ls_ownerlen))) {
2402 if (other_lop) {
2403 free(other_lop, M_NFSDLOCK);
2404 other_lop = NULL;
2405 }
2406 if (vnode_unlocked != 0)
2407 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2408 NULL, p);
2409 else
2410 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2411 vp, p);
2412 if (ret == 1) {
2413 if (filestruct_locked != 0) {
2414 if (vnode_unlocked == 0) {
2415 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2416 NFSVOPUNLOCK(vp);
2417 }
2418 /* Roll back local locks. */
2419 nfsrv_locallock_rollback(vp, lfp, p);
2420 NFSLOCKSTATE();
2421 nfsrv_unlocklf(lfp);
2422 NFSUNLOCKSTATE();
2423 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2424 vnode_unlocked = 0;
2425 if (VN_IS_DOOMED(vp)) {
2426 error = NFSERR_SERVERFAULT;
2427 goto out;
2428 }
2429 }
2430 /*
2431 * nfsrv_clientconflict() unlocks state when it
2432 * returns non-zero.
2433 */
2434 lckstp = NULL;
2435 goto tryagain;
2436 }
2437 /*
2438 * Found a conflicting lock, so record the conflict and
2439 * return the error.
2440 */
2441 if (cfp != NULL && ret == 0) {
2442 cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2443 cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2444 cfp->cl_first = lop->lo_first;
2445 cfp->cl_end = lop->lo_end;
2446 cfp->cl_flags = lop->lo_flags;
2447 cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2448 NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2449 cfp->cl_ownerlen);
2450 }
2451 if (ret == 2)
2452 error = NFSERR_PERM;
2453 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2454 error = NFSERR_RECLAIMCONFLICT;
2455 else if (new_stp->ls_flags & NFSLCK_CHECK)
2456 error = NFSERR_LOCKED;
2457 else
2458 error = NFSERR_DENIED;
2459 if (filestruct_locked != 0 && ret == 0) {
2460 /* Roll back local locks. */
2461 NFSUNLOCKSTATE();
2462 if (vnode_unlocked == 0) {
2463 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2464 vnode_unlocked = 1;
2465 NFSVOPUNLOCK(vp);
2466 }
2467 nfsrv_locallock_rollback(vp, lfp, p);
2468 NFSLOCKSTATE();
2469 nfsrv_unlocklf(lfp);
2470 }
2471 if (ret == 0)
2472 NFSUNLOCKSTATE();
2473 goto out;
2474 }
2475 }
2476 }
2477
2478 /*
2479 * We only get here if there was no lock that conflicted.
2480 */
2481 if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2482 NFSUNLOCKSTATE();
2483 goto out;
2484 }
2485
2486 /*
2487 * We only get here when we are creating or modifying a lock.
2488 * There are two variants:
2489 * - exist_lock_owner where lock_owner exists
2490 * - open_to_lock_owner with new lock_owner
2491 */
2492 first = new_lop->lo_first;
2493 end = new_lop->lo_end;
2494 lock_flags = new_lop->lo_flags;
2495 if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2496 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2497 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2498 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2499 stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2500 stateidp->other[0] = lckstp->ls_stateid.other[0];
2501 stateidp->other[1] = lckstp->ls_stateid.other[1];
2502 stateidp->other[2] = lckstp->ls_stateid.other[2];
2503 } else {
2504 /*
2505 * The new open_to_lock_owner case.
2506 * Link the new nfsstate into the lists.
2507 */
2508 new_stp->ls_seq = new_stp->ls_opentolockseq;
2509 nfsrvd_refcache(new_stp->ls_op);
2510 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2511 stateidp->other[0] = new_stp->ls_stateid.other[0] =
2512 clp->lc_clientid.lval[0];
2513 stateidp->other[1] = new_stp->ls_stateid.other[1] =
2514 clp->lc_clientid.lval[1];
2515 stateidp->other[2] = new_stp->ls_stateid.other[2] =
2516 nfsrv_nextstateindex(clp);
2517 new_stp->ls_clp = clp;
2518 LIST_INIT(&new_stp->ls_lock);
2519 new_stp->ls_openstp = stp;
2520 new_stp->ls_lfp = lfp;
2521 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2522 lfp);
2523 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2524 new_stp, ls_hash);
2525 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2526 *new_lopp = NULL;
2527 *new_stpp = NULL;
2528 VNET(nfsstatsv1_p)->srvlockowners++;
2529 nfsrv_openpluslock++;
2530 }
2531 if (filestruct_locked != 0) {
2532 NFSUNLOCKSTATE();
2533 nfsrv_locallock_commit(lfp, lock_flags, first, end);
2534 NFSLOCKSTATE();
2535 nfsrv_unlocklf(lfp);
2536 }
2537 NFSUNLOCKSTATE();
2538
2539 out:
2540 if (haslock) {
2541 NFSLOCKV4ROOTMUTEX();
2542 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2543 NFSUNLOCKV4ROOTMUTEX();
2544 }
2545 if (vnode_unlocked != 0) {
2546 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2547 if (error == 0 && VN_IS_DOOMED(vp))
2548 error = NFSERR_SERVERFAULT;
2549 }
2550 if (other_lop)
2551 free(other_lop, M_NFSDLOCK);
2552 NFSEXITCODE2(error, nd);
2553 return (error);
2554 }
2555
2556 /*
2557 * Check for state errors for Open.
2558 * repstat is passed back out as an error if more critical errors
2559 * are not detected.
2560 */
2561 int
nfsrv_opencheck(nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * new_stp,vnode_t vp,struct nfsrv_descript * nd,NFSPROC_T * p,int repstat)2562 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2563 struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2564 NFSPROC_T *p, int repstat)
2565 {
2566 struct nfsstate *stp, *nstp;
2567 struct nfsclient *clp;
2568 struct nfsstate *ownerstp;
2569 struct nfslockfile *lfp, *new_lfp;
2570 int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2571
2572 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2573 readonly = 1;
2574 /*
2575 * Check for restart conditions (client and server).
2576 */
2577 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2578 &new_stp->ls_stateid, 0);
2579 if (error)
2580 goto out;
2581
2582 /*
2583 * Check for state resource limit exceeded.
2584 * Technically this should be SMP protected, but the worst
2585 * case error is "out by one or two" on the count when it
2586 * returns NFSERR_RESOURCE and the limit is just a rather
2587 * arbitrary high water mark, so no harm is done.
2588 */
2589 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2590 error = NFSERR_RESOURCE;
2591 goto out;
2592 }
2593
2594 tryagain:
2595 new_lfp = malloc(sizeof (struct nfslockfile),
2596 M_NFSDLOCKFILE, M_WAITOK);
2597 if (vp)
2598 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2599 NULL, p);
2600 NFSLOCKSTATE();
2601 /*
2602 * Get the nfsclient structure.
2603 */
2604 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2605 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2606
2607 /*
2608 * Look up the open owner. See if it needs confirmation and
2609 * check the seq#, as required.
2610 */
2611 if (!error)
2612 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2613
2614 if (!error && ownerstp) {
2615 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2616 new_stp->ls_op);
2617 /*
2618 * If the OpenOwner hasn't been confirmed, assume the
2619 * old one was a replay and this one is ok.
2620 * See: RFC3530 Sec. 14.2.18.
2621 */
2622 if (error == NFSERR_BADSEQID &&
2623 (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2624 error = 0;
2625 }
2626
2627 /*
2628 * Check for grace.
2629 */
2630 if (!error)
2631 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2632 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2633 nfsrv_checkstable(clp))
2634 error = NFSERR_NOGRACE;
2635
2636 /*
2637 * If none of the above errors occurred, let repstat be
2638 * returned.
2639 */
2640 if (repstat && !error)
2641 error = repstat;
2642 if (error) {
2643 NFSUNLOCKSTATE();
2644 if (haslock) {
2645 NFSLOCKV4ROOTMUTEX();
2646 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2647 NFSUNLOCKV4ROOTMUTEX();
2648 }
2649 free(new_lfp, M_NFSDLOCKFILE);
2650 goto out;
2651 }
2652
2653 /*
2654 * If vp == NULL, the file doesn't exist yet, so return ok.
2655 * (This always happens on the first pass, so haslock must be 0.)
2656 */
2657 if (vp == NULL) {
2658 NFSUNLOCKSTATE();
2659 free(new_lfp, M_NFSDLOCKFILE);
2660 goto out;
2661 }
2662
2663 /*
2664 * Get the structure for the underlying file.
2665 */
2666 if (getfhret)
2667 error = getfhret;
2668 else
2669 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2670 NULL, 0);
2671 if (new_lfp)
2672 free(new_lfp, M_NFSDLOCKFILE);
2673 if (error) {
2674 NFSUNLOCKSTATE();
2675 if (haslock) {
2676 NFSLOCKV4ROOTMUTEX();
2677 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2678 NFSUNLOCKV4ROOTMUTEX();
2679 }
2680 goto out;
2681 }
2682
2683 /*
2684 * Search for a conflicting open/share.
2685 */
2686 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2687 /*
2688 * For Delegate_Cur, search for the matching Delegation,
2689 * which indicates no conflict.
2690 * An old delegation should have been recovered by the
2691 * client doing a Claim_DELEGATE_Prev, so I won't let
2692 * it match and return NFSERR_EXPIRED. Should I let it
2693 * match?
2694 */
2695 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2696 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2697 (((nd->nd_flag & ND_NFSV41) != 0 &&
2698 stateidp->seqid == 0) ||
2699 stateidp->seqid == stp->ls_stateid.seqid) &&
2700 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2701 NFSX_STATEIDOTHER))
2702 break;
2703 }
2704 if (stp == LIST_END(&lfp->lf_deleg) ||
2705 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2706 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2707 NFSUNLOCKSTATE();
2708 if (haslock) {
2709 NFSLOCKV4ROOTMUTEX();
2710 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2711 NFSUNLOCKV4ROOTMUTEX();
2712 }
2713 error = NFSERR_EXPIRED;
2714 goto out;
2715 }
2716 }
2717
2718 /*
2719 * Check for access/deny bit conflicts. I check for the same
2720 * owner as well, in case the client didn't bother.
2721 */
2722 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2723 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2724 (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2725 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2726 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2727 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2728 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2729 if (ret == 1) {
2730 /*
2731 * nfsrv_clientconflict() unlocks
2732 * state when it returns non-zero.
2733 */
2734 goto tryagain;
2735 }
2736 if (ret == 2)
2737 error = NFSERR_PERM;
2738 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2739 error = NFSERR_RECLAIMCONFLICT;
2740 else
2741 error = NFSERR_SHAREDENIED;
2742 if (ret == 0)
2743 NFSUNLOCKSTATE();
2744 if (haslock) {
2745 NFSLOCKV4ROOTMUTEX();
2746 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2747 NFSUNLOCKV4ROOTMUTEX();
2748 }
2749 goto out;
2750 }
2751 }
2752
2753 /*
2754 * Check for a conflicting delegation. If one is found, call
2755 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2756 * been set yet, it will get the lock. Otherwise, it will recall
2757 * the delegation. Then, we try try again...
2758 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2759 * isn't a conflict.)
2760 * I currently believe the conflict algorithm to be:
2761 * For Open with Read Access and Deny None
2762 * - there is a conflict iff a different client has a write delegation
2763 * For Open with other Write Access or any Deny except None
2764 * - there is a conflict if a different client has any delegation
2765 * - there is a conflict if the same client has a read delegation
2766 * (The current consensus is that this last case should be
2767 * considered a conflict since the client with a read delegation
2768 * could have done an Open with ReadAccess and WriteDeny
2769 * locally and then not have checked for the WriteDeny.)
2770 * The exception is a NFSv4.1/4.2 client that has requested
2771 * an atomic upgrade to a write delegation.
2772 * Don't check for a Reclaim, since that will be dealt with
2773 * by nfsrv_openctrl().
2774 */
2775 if (!(new_stp->ls_flags &
2776 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2777 stp = LIST_FIRST(&lfp->lf_deleg);
2778 while (stp != LIST_END(&lfp->lf_deleg)) {
2779 nstp = LIST_NEXT(stp, ls_file);
2780 if ((readonly && stp->ls_clp != clp &&
2781 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
2782 (!readonly && (stp->ls_clp != clp ||
2783 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
2784 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
2785 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2786 if (ret) {
2787 /*
2788 * nfsrv_delegconflict() unlocks state
2789 * when it returns non-zero.
2790 */
2791 if (ret == -1)
2792 goto tryagain;
2793 error = ret;
2794 goto out;
2795 }
2796 }
2797 stp = nstp;
2798 }
2799 }
2800 NFSUNLOCKSTATE();
2801 if (haslock) {
2802 NFSLOCKV4ROOTMUTEX();
2803 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2804 NFSUNLOCKV4ROOTMUTEX();
2805 }
2806
2807 out:
2808 NFSEXITCODE2(error, nd);
2809 return (error);
2810 }
2811
2812 /*
2813 * Open control function to create/update open state for an open.
2814 */
2815 int
nfsrv_openctrl(struct nfsrv_descript * nd,vnode_t vp,struct nfsstate ** new_stpp,nfsquad_t clientid,nfsv4stateid_t * stateidp,nfsv4stateid_t * delegstateidp,u_int32_t * rflagsp,struct nfsexstuff * exp,NFSPROC_T * p,u_quad_t filerev)2816 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2817 struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2818 nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2819 NFSPROC_T *p, u_quad_t filerev)
2820 {
2821 struct nfsstate *new_stp = *new_stpp;
2822 struct nfsstate *stp, *nstp;
2823 struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2824 struct nfslockfile *lfp, *new_lfp;
2825 struct nfsclient *clp;
2826 int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2827 int readonly = 0, cbret = 1, getfhret = 0;
2828 int gotstate = 0, len = 0;
2829 u_char *clidp = NULL;
2830
2831 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2832 readonly = 1;
2833 /*
2834 * Check for restart conditions (client and server).
2835 * (Paranoia, should have been detected by nfsrv_opencheck().)
2836 * If an error does show up, return NFSERR_EXPIRED, since the
2837 * the seqid# has already been incremented.
2838 */
2839 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2840 &new_stp->ls_stateid, 0);
2841 if (error) {
2842 printf("Nfsd: openctrl unexpected restart err=%d\n",
2843 error);
2844 error = NFSERR_EXPIRED;
2845 goto out;
2846 }
2847
2848 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2849 tryagain:
2850 new_lfp = malloc(sizeof (struct nfslockfile),
2851 M_NFSDLOCKFILE, M_WAITOK);
2852 new_open = malloc(sizeof (struct nfsstate),
2853 M_NFSDSTATE, M_WAITOK);
2854 new_deleg = malloc(sizeof (struct nfsstate),
2855 M_NFSDSTATE, M_WAITOK);
2856 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2857 NULL, p);
2858 NFSLOCKSTATE();
2859 /*
2860 * Get the client structure. Since the linked lists could be changed
2861 * by other nfsd processes if this process does a tsleep(), one of
2862 * two things must be done.
2863 * 1 - don't tsleep()
2864 * or
2865 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2866 * before using the lists, since this lock stops the other
2867 * nfsd. This should only be used for rare cases, since it
2868 * essentially single threads the nfsd.
2869 * At this time, it is only done for cases where the stable
2870 * storage file must be written prior to completion of state
2871 * expiration.
2872 */
2873 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2874 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2875 if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2876 clp->lc_program) {
2877 /*
2878 * This happens on the first open for a client
2879 * that supports callbacks.
2880 */
2881 NFSUNLOCKSTATE();
2882 /*
2883 * Although nfsrv_docallback() will sleep, clp won't
2884 * go away, since they are only removed when the
2885 * nfsv4_lock() has blocked the nfsd threads. The
2886 * fields in clp can change, but having multiple
2887 * threads do this Null callback RPC should be
2888 * harmless.
2889 */
2890 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2891 NULL, 0, NULL, NULL, NULL, 0, p);
2892 NFSLOCKSTATE();
2893 clp->lc_flags &= ~LCL_NEEDSCBNULL;
2894 if (!cbret)
2895 clp->lc_flags |= LCL_CALLBACKSON;
2896 }
2897
2898 /*
2899 * Look up the open owner. See if it needs confirmation and
2900 * check the seq#, as required.
2901 */
2902 if (!error)
2903 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2904
2905 if (error) {
2906 NFSUNLOCKSTATE();
2907 printf("Nfsd: openctrl unexpected state err=%d\n",
2908 error);
2909 free(new_lfp, M_NFSDLOCKFILE);
2910 free(new_open, M_NFSDSTATE);
2911 free(new_deleg, M_NFSDSTATE);
2912 if (haslock) {
2913 NFSLOCKV4ROOTMUTEX();
2914 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2915 NFSUNLOCKV4ROOTMUTEX();
2916 }
2917 error = NFSERR_EXPIRED;
2918 goto out;
2919 }
2920
2921 if (new_stp->ls_flags & NFSLCK_RECLAIM)
2922 nfsrv_markstable(clp);
2923
2924 /*
2925 * Get the structure for the underlying file.
2926 */
2927 if (getfhret)
2928 error = getfhret;
2929 else
2930 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2931 NULL, 0);
2932 if (new_lfp)
2933 free(new_lfp, M_NFSDLOCKFILE);
2934 if (error) {
2935 NFSUNLOCKSTATE();
2936 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2937 error);
2938 free(new_open, M_NFSDSTATE);
2939 free(new_deleg, M_NFSDSTATE);
2940 if (haslock) {
2941 NFSLOCKV4ROOTMUTEX();
2942 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2943 NFSUNLOCKV4ROOTMUTEX();
2944 }
2945 goto out;
2946 }
2947
2948 /*
2949 * Search for a conflicting open/share.
2950 */
2951 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2952 /*
2953 * For Delegate_Cur, search for the matching Delegation,
2954 * which indicates no conflict.
2955 * An old delegation should have been recovered by the
2956 * client doing a Claim_DELEGATE_Prev, so I won't let
2957 * it match and return NFSERR_EXPIRED. Should I let it
2958 * match?
2959 */
2960 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2961 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2962 (((nd->nd_flag & ND_NFSV41) != 0 &&
2963 stateidp->seqid == 0) ||
2964 stateidp->seqid == stp->ls_stateid.seqid) &&
2965 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2966 NFSX_STATEIDOTHER))
2967 break;
2968 }
2969 if (stp == LIST_END(&lfp->lf_deleg) ||
2970 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2971 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2972 NFSUNLOCKSTATE();
2973 printf("Nfsd openctrl unexpected expiry\n");
2974 free(new_open, M_NFSDSTATE);
2975 free(new_deleg, M_NFSDSTATE);
2976 if (haslock) {
2977 NFSLOCKV4ROOTMUTEX();
2978 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2979 NFSUNLOCKV4ROOTMUTEX();
2980 }
2981 error = NFSERR_EXPIRED;
2982 goto out;
2983 }
2984
2985 /*
2986 * Don't issue a Delegation, since one already exists and
2987 * delay delegation timeout, as required.
2988 */
2989 delegate = 0;
2990 nfsrv_delaydelegtimeout(stp);
2991 }
2992
2993 /*
2994 * Check for access/deny bit conflicts. I also check for the
2995 * same owner, since the client might not have bothered to check.
2996 * Also, note an open for the same file and owner, if found,
2997 * which is all we do here for Delegate_Cur, since conflict
2998 * checking is already done.
2999 */
3000 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
3001 if (ownerstp && stp->ls_openowner == ownerstp)
3002 openstp = stp;
3003 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
3004 /*
3005 * If another client has the file open, the only
3006 * delegation that can be issued is a Read delegation
3007 * and only if it is a Read open with Deny none.
3008 */
3009 if (clp != stp->ls_clp) {
3010 if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
3011 NFSLCK_READACCESS)
3012 writedeleg = 0;
3013 else
3014 delegate = 0;
3015 }
3016 if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
3017 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
3018 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
3019 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
3020 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
3021 if (ret == 1) {
3022 /*
3023 * nfsrv_clientconflict() unlocks state
3024 * when it returns non-zero.
3025 */
3026 free(new_open, M_NFSDSTATE);
3027 free(new_deleg, M_NFSDSTATE);
3028 openstp = NULL;
3029 goto tryagain;
3030 }
3031 if (ret == 2)
3032 error = NFSERR_PERM;
3033 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
3034 error = NFSERR_RECLAIMCONFLICT;
3035 else
3036 error = NFSERR_SHAREDENIED;
3037 if (ret == 0)
3038 NFSUNLOCKSTATE();
3039 if (haslock) {
3040 NFSLOCKV4ROOTMUTEX();
3041 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3042 NFSUNLOCKV4ROOTMUTEX();
3043 }
3044 free(new_open, M_NFSDSTATE);
3045 free(new_deleg, M_NFSDSTATE);
3046 printf("nfsd openctrl unexpected client cnfl\n");
3047 goto out;
3048 }
3049 }
3050 }
3051
3052 /*
3053 * Check for a conflicting delegation. If one is found, call
3054 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
3055 * been set yet, it will get the lock. Otherwise, it will recall
3056 * the delegation. Then, we try try again...
3057 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
3058 * isn't a conflict.)
3059 * I currently believe the conflict algorithm to be:
3060 * For Open with Read Access and Deny None
3061 * - there is a conflict iff a different client has a write delegation
3062 * For Open with other Write Access or any Deny except None
3063 * - there is a conflict if a different client has any delegation
3064 * - there is a conflict if the same client has a read delegation
3065 * (The current consensus is that this last case should be
3066 * considered a conflict since the client with a read delegation
3067 * could have done an Open with ReadAccess and WriteDeny
3068 * locally and then not have checked for the WriteDeny.)
3069 * The exception is a NFSv4.1/4.2 client that has requested
3070 * an atomic upgrade to a write delegation.
3071 */
3072 if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
3073 stp = LIST_FIRST(&lfp->lf_deleg);
3074 while (stp != LIST_END(&lfp->lf_deleg)) {
3075 nstp = LIST_NEXT(stp, ls_file);
3076 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
3077 writedeleg = 0;
3078 else if (stp->ls_clp != clp ||
3079 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
3080 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
3081 delegate = 0;
3082 if ((readonly && stp->ls_clp != clp &&
3083 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
3084 (!readonly && (stp->ls_clp != clp ||
3085 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
3086 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
3087 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3088 delegate = 2;
3089 } else {
3090 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
3091 if (ret) {
3092 /*
3093 * nfsrv_delegconflict() unlocks state
3094 * when it returns non-zero.
3095 */
3096 printf("Nfsd openctrl unexpected deleg cnfl\n");
3097 free(new_open, M_NFSDSTATE);
3098 free(new_deleg, M_NFSDSTATE);
3099 if (ret == -1) {
3100 openstp = NULL;
3101 goto tryagain;
3102 }
3103 error = ret;
3104 goto out;
3105 }
3106 }
3107 }
3108 stp = nstp;
3109 }
3110 }
3111
3112 /*
3113 * We only get here if there was no open that conflicted.
3114 * If an open for the owner exists, or in the access/deny bits.
3115 * Otherwise it is a new open. If the open_owner hasn't been
3116 * confirmed, replace the open with the new one needing confirmation,
3117 * otherwise add the open.
3118 */
3119 if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
3120 /*
3121 * Handle NFSLCK_DELEGPREV by searching the old delegations for
3122 * a match. If found, just move the old delegation to the current
3123 * delegation list and issue open. If not found, return
3124 * NFSERR_EXPIRED.
3125 */
3126 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
3127 if (stp->ls_lfp == lfp) {
3128 /* Found it */
3129 if (stp->ls_clp != clp)
3130 panic("olddeleg clp");
3131 LIST_REMOVE(stp, ls_list);
3132 LIST_REMOVE(stp, ls_hash);
3133 stp->ls_flags &= ~NFSLCK_OLDDELEG;
3134 stp->ls_stateid.seqid = delegstateidp->seqid = 1;
3135 stp->ls_stateid.other[0] = delegstateidp->other[0] =
3136 clp->lc_clientid.lval[0];
3137 stp->ls_stateid.other[1] = delegstateidp->other[1] =
3138 clp->lc_clientid.lval[1];
3139 stp->ls_stateid.other[2] = delegstateidp->other[2] =
3140 nfsrv_nextstateindex(clp);
3141 stp->ls_compref = nd->nd_compref;
3142 LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
3143 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3144 stp->ls_stateid), stp, ls_hash);
3145 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3146 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3147 else
3148 *rflagsp |= NFSV4OPEN_READDELEGATE;
3149 clp->lc_delegtime = NFSD_MONOSEC +
3150 nfsrv_lease + NFSRV_LEASEDELTA;
3151
3152 /*
3153 * Now, do the associated open.
3154 */
3155 new_open->ls_stateid.seqid = 1;
3156 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3157 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3158 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3159 new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
3160 NFSLCK_OPEN;
3161 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3162 new_open->ls_flags |= (NFSLCK_READACCESS |
3163 NFSLCK_WRITEACCESS);
3164 else
3165 new_open->ls_flags |= NFSLCK_READACCESS;
3166 new_open->ls_uid = new_stp->ls_uid;
3167 new_open->ls_lfp = lfp;
3168 new_open->ls_clp = clp;
3169 LIST_INIT(&new_open->ls_open);
3170 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3171 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3172 new_open, ls_hash);
3173 /*
3174 * and handle the open owner
3175 */
3176 if (ownerstp) {
3177 new_open->ls_openowner = ownerstp;
3178 LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
3179 } else {
3180 new_open->ls_openowner = new_stp;
3181 new_stp->ls_flags = 0;
3182 nfsrvd_refcache(new_stp->ls_op);
3183 new_stp->ls_noopens = 0;
3184 LIST_INIT(&new_stp->ls_open);
3185 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3186 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3187 *new_stpp = NULL;
3188 VNET(nfsstatsv1_p)->srvopenowners++;
3189 nfsrv_openpluslock++;
3190 }
3191 openstp = new_open;
3192 new_open = NULL;
3193 VNET(nfsstatsv1_p)->srvopens++;
3194 nfsrv_openpluslock++;
3195 break;
3196 }
3197 }
3198 if (stp == LIST_END(&clp->lc_olddeleg))
3199 error = NFSERR_EXPIRED;
3200 } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
3201 /*
3202 * Scan to see that no delegation for this client and file
3203 * doesn't already exist.
3204 * There also shouldn't yet be an Open for this file and
3205 * openowner.
3206 */
3207 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
3208 if (stp->ls_clp == clp)
3209 break;
3210 }
3211 if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
3212 /*
3213 * This is the Claim_Previous case with a delegation
3214 * type != Delegate_None.
3215 */
3216 /*
3217 * First, add the delegation. (Although we must issue the
3218 * delegation, we can also ask for an immediate return.)
3219 */
3220 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3221 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
3222 clp->lc_clientid.lval[0];
3223 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
3224 clp->lc_clientid.lval[1];
3225 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
3226 nfsrv_nextstateindex(clp);
3227 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
3228 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3229 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3230 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3231 nfsrv_writedelegcnt++;
3232 } else {
3233 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3234 NFSLCK_READACCESS);
3235 *rflagsp |= NFSV4OPEN_READDELEGATE;
3236 }
3237 new_deleg->ls_uid = new_stp->ls_uid;
3238 new_deleg->ls_lfp = lfp;
3239 new_deleg->ls_clp = clp;
3240 new_deleg->ls_filerev = filerev;
3241 new_deleg->ls_compref = nd->nd_compref;
3242 new_deleg->ls_lastrecall = 0;
3243 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3244 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3245 new_deleg->ls_stateid), new_deleg, ls_hash);
3246 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3247 new_deleg = NULL;
3248 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3249 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3250 LCL_CALLBACKSON ||
3251 NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3252 !NFSVNO_DELEGOK(vp))
3253 *rflagsp |= NFSV4OPEN_RECALL;
3254 VNET(nfsstatsv1_p)->srvdelegates++;
3255 nfsrv_openpluslock++;
3256 nfsrv_delegatecnt++;
3257
3258 /*
3259 * Now, do the associated open.
3260 */
3261 new_open->ls_stateid.seqid = 1;
3262 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3263 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3264 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3265 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3266 NFSLCK_OPEN;
3267 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3268 new_open->ls_flags |= (NFSLCK_READACCESS |
3269 NFSLCK_WRITEACCESS);
3270 else
3271 new_open->ls_flags |= NFSLCK_READACCESS;
3272 new_open->ls_uid = new_stp->ls_uid;
3273 new_open->ls_lfp = lfp;
3274 new_open->ls_clp = clp;
3275 LIST_INIT(&new_open->ls_open);
3276 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3277 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3278 new_open, ls_hash);
3279 /*
3280 * and handle the open owner
3281 */
3282 if (ownerstp) {
3283 new_open->ls_openowner = ownerstp;
3284 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3285 } else {
3286 new_open->ls_openowner = new_stp;
3287 new_stp->ls_flags = 0;
3288 nfsrvd_refcache(new_stp->ls_op);
3289 new_stp->ls_noopens = 0;
3290 LIST_INIT(&new_stp->ls_open);
3291 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3292 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3293 *new_stpp = NULL;
3294 VNET(nfsstatsv1_p)->srvopenowners++;
3295 nfsrv_openpluslock++;
3296 }
3297 openstp = new_open;
3298 new_open = NULL;
3299 VNET(nfsstatsv1_p)->srvopens++;
3300 nfsrv_openpluslock++;
3301 } else {
3302 error = NFSERR_RECLAIMCONFLICT;
3303 }
3304 } else if (ownerstp) {
3305 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3306 /* Replace the open */
3307 if (ownerstp->ls_op)
3308 nfsrvd_derefcache(ownerstp->ls_op);
3309 ownerstp->ls_op = new_stp->ls_op;
3310 nfsrvd_refcache(ownerstp->ls_op);
3311 ownerstp->ls_seq = new_stp->ls_seq;
3312 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3313 stp = LIST_FIRST(&ownerstp->ls_open);
3314 stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3315 NFSLCK_OPEN;
3316 stp->ls_stateid.seqid = 1;
3317 stp->ls_uid = new_stp->ls_uid;
3318 if (lfp != stp->ls_lfp) {
3319 LIST_REMOVE(stp, ls_file);
3320 LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3321 stp->ls_lfp = lfp;
3322 }
3323 openstp = stp;
3324 } else if (openstp) {
3325 openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3326 openstp->ls_stateid.seqid++;
3327 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3328 openstp->ls_stateid.seqid == 0)
3329 openstp->ls_stateid.seqid = 1;
3330
3331 /*
3332 * This is where we can choose to issue a delegation.
3333 */
3334 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3335 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3336 new_stp, lfp, rflagsp, delegstateidp);
3337 } else {
3338 new_open->ls_stateid.seqid = 1;
3339 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3340 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3341 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3342 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3343 NFSLCK_OPEN;
3344 new_open->ls_uid = new_stp->ls_uid;
3345 new_open->ls_openowner = ownerstp;
3346 new_open->ls_lfp = lfp;
3347 new_open->ls_clp = clp;
3348 LIST_INIT(&new_open->ls_open);
3349 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3350 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3351 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3352 new_open, ls_hash);
3353 openstp = new_open;
3354 new_open = NULL;
3355 VNET(nfsstatsv1_p)->srvopens++;
3356 nfsrv_openpluslock++;
3357
3358 /*
3359 * This is where we can choose to issue a delegation.
3360 */
3361 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3362 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3363 new_stp, lfp, rflagsp, delegstateidp);
3364 }
3365 } else {
3366 /*
3367 * New owner case. Start the open_owner sequence with a
3368 * Needs confirmation (unless a reclaim) and hang the
3369 * new open off it.
3370 */
3371 new_open->ls_stateid.seqid = 1;
3372 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3373 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3374 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3375 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3376 NFSLCK_OPEN;
3377 new_open->ls_uid = new_stp->ls_uid;
3378 LIST_INIT(&new_open->ls_open);
3379 new_open->ls_openowner = new_stp;
3380 new_open->ls_lfp = lfp;
3381 new_open->ls_clp = clp;
3382 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3383 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3384 new_stp->ls_flags = 0;
3385 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
3386 /*
3387 * This is where we can choose to issue a delegation.
3388 */
3389 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3390 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3391 new_stp, lfp, rflagsp, delegstateidp);
3392 /* NFSv4.1 never needs confirmation. */
3393 new_stp->ls_flags = 0;
3394
3395 /*
3396 * Since NFSv4.1 never does an OpenConfirm, the first
3397 * open state will be acquired here.
3398 */
3399 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3400 clp->lc_flags |= LCL_STAMPEDSTABLE;
3401 len = clp->lc_idlen;
3402 NFSBCOPY(clp->lc_id, clidp, len);
3403 gotstate = 1;
3404 }
3405 } else {
3406 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3407 new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3408 }
3409 nfsrvd_refcache(new_stp->ls_op);
3410 new_stp->ls_noopens = 0;
3411 LIST_INIT(&new_stp->ls_open);
3412 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3413 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3414 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3415 new_open, ls_hash);
3416 openstp = new_open;
3417 new_open = NULL;
3418 *new_stpp = NULL;
3419 VNET(nfsstatsv1_p)->srvopens++;
3420 nfsrv_openpluslock++;
3421 VNET(nfsstatsv1_p)->srvopenowners++;
3422 nfsrv_openpluslock++;
3423 }
3424 if (!error) {
3425 stateidp->seqid = openstp->ls_stateid.seqid;
3426 stateidp->other[0] = openstp->ls_stateid.other[0];
3427 stateidp->other[1] = openstp->ls_stateid.other[1];
3428 stateidp->other[2] = openstp->ls_stateid.other[2];
3429 }
3430 NFSUNLOCKSTATE();
3431 if (haslock) {
3432 NFSLOCKV4ROOTMUTEX();
3433 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3434 NFSUNLOCKV4ROOTMUTEX();
3435 }
3436 if (new_open)
3437 free(new_open, M_NFSDSTATE);
3438 if (new_deleg)
3439 free(new_deleg, M_NFSDSTATE);
3440
3441 /*
3442 * If the NFSv4.1 client just acquired its first open, write a timestamp
3443 * to the stable storage file.
3444 */
3445 if (gotstate != 0) {
3446 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3447 nfsrv_backupstable();
3448 }
3449
3450 out:
3451 free(clidp, M_TEMP);
3452 NFSEXITCODE2(error, nd);
3453 return (error);
3454 }
3455
3456 /*
3457 * Open update. Does the confirm, downgrade and close.
3458 */
3459 int
nfsrv_openupdate(vnode_t vp,struct nfsstate * new_stp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsrv_descript * nd,NFSPROC_T * p,int * retwriteaccessp)3460 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3461 nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
3462 int *retwriteaccessp)
3463 {
3464 struct nfsstate *stp;
3465 struct nfsclient *clp;
3466 u_int32_t bits;
3467 int error = 0, gotstate = 0, len = 0;
3468 u_char *clidp = NULL;
3469
3470 /*
3471 * Check for restart conditions (client and server).
3472 */
3473 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3474 &new_stp->ls_stateid, 0);
3475 if (error)
3476 goto out;
3477
3478 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3479 NFSLOCKSTATE();
3480 /*
3481 * Get the open structure via clientid and stateid.
3482 */
3483 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3484 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3485 if (!error)
3486 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3487 new_stp->ls_flags, &stp);
3488
3489 /*
3490 * Sanity check the open.
3491 */
3492 if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3493 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3494 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3495 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3496 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3497 error = NFSERR_BADSTATEID;
3498
3499 if (!error)
3500 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3501 stp->ls_openowner, new_stp->ls_op);
3502 if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3503 (((nd->nd_flag & ND_NFSV41) == 0 &&
3504 !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3505 ((nd->nd_flag & ND_NFSV41) != 0 &&
3506 new_stp->ls_stateid.seqid != 0)))
3507 error = NFSERR_OLDSTATEID;
3508 if (!error && vp->v_type != VREG) {
3509 if (vp->v_type == VDIR)
3510 error = NFSERR_ISDIR;
3511 else
3512 error = NFSERR_INVAL;
3513 }
3514
3515 if (error) {
3516 /*
3517 * If a client tries to confirm an Open with a bad
3518 * seqid# and there are no byte range locks or other Opens
3519 * on the openowner, just throw it away, so the next use of the
3520 * openowner will start a fresh seq#.
3521 */
3522 if (error == NFSERR_BADSEQID &&
3523 (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3524 nfsrv_nootherstate(stp))
3525 nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3526 NFSUNLOCKSTATE();
3527 goto out;
3528 }
3529
3530 /*
3531 * Set the return stateid.
3532 */
3533 stateidp->seqid = stp->ls_stateid.seqid + 1;
3534 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3535 stateidp->seqid = 1;
3536 stateidp->other[0] = stp->ls_stateid.other[0];
3537 stateidp->other[1] = stp->ls_stateid.other[1];
3538 stateidp->other[2] = stp->ls_stateid.other[2];
3539 /*
3540 * Now, handle the three cases.
3541 */
3542 if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3543 /*
3544 * If the open doesn't need confirmation, it seems to me that
3545 * there is a client error, but I'll just log it and keep going?
3546 */
3547 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3548 printf("Nfsv4d: stray open confirm\n");
3549 stp->ls_openowner->ls_flags = 0;
3550 stp->ls_stateid.seqid++;
3551 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3552 stp->ls_stateid.seqid == 0)
3553 stp->ls_stateid.seqid = 1;
3554 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3555 clp->lc_flags |= LCL_STAMPEDSTABLE;
3556 len = clp->lc_idlen;
3557 NFSBCOPY(clp->lc_id, clidp, len);
3558 gotstate = 1;
3559 }
3560 } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3561 if (retwriteaccessp != NULL) {
3562 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
3563 *retwriteaccessp = 1;
3564 else
3565 *retwriteaccessp = 0;
3566 }
3567 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3568 ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3569 nfsrv_freeopen(stp, vp, 1, p);
3570 } else {
3571 nfsrv_freeopen(stp, NULL, 0, p);
3572 }
3573 } else {
3574 /*
3575 * Update the share bits, making sure that the new set are a
3576 * subset of the old ones.
3577 */
3578 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3579 if (~(stp->ls_flags) & bits) {
3580 NFSUNLOCKSTATE();
3581 error = NFSERR_INVAL;
3582 goto out;
3583 }
3584 stp->ls_flags = (bits | NFSLCK_OPEN);
3585 stp->ls_stateid.seqid++;
3586 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3587 stp->ls_stateid.seqid == 0)
3588 stp->ls_stateid.seqid = 1;
3589 }
3590 NFSUNLOCKSTATE();
3591
3592 /*
3593 * If the client just confirmed its first open, write a timestamp
3594 * to the stable storage file.
3595 */
3596 if (gotstate != 0) {
3597 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3598 nfsrv_backupstable();
3599 }
3600
3601 out:
3602 free(clidp, M_TEMP);
3603 NFSEXITCODE2(error, nd);
3604 return (error);
3605 }
3606
3607 /*
3608 * Delegation update. Does the purge and return.
3609 */
3610 int
nfsrv_delegupdate(struct nfsrv_descript * nd,nfsquad_t clientid,nfsv4stateid_t * stateidp,vnode_t vp,int op,struct ucred * cred,NFSPROC_T * p,int * retwriteaccessp)3611 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3612 nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3613 NFSPROC_T *p, int *retwriteaccessp)
3614 {
3615 struct nfsstate *stp;
3616 struct nfsclient *clp;
3617 int error = 0;
3618 fhandle_t fh;
3619
3620 /*
3621 * Do a sanity check against the file handle for DelegReturn.
3622 */
3623 if (vp) {
3624 error = nfsvno_getfh(vp, &fh, p);
3625 if (error)
3626 goto out;
3627 }
3628 /*
3629 * Check for restart conditions (client and server).
3630 */
3631 if (op == NFSV4OP_DELEGRETURN)
3632 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3633 stateidp, 0);
3634 else
3635 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3636 stateidp, 0);
3637
3638 NFSLOCKSTATE();
3639 /*
3640 * Get the open structure via clientid and stateid.
3641 */
3642 if (!error)
3643 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3644 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3645 if (error) {
3646 if (error == NFSERR_CBPATHDOWN)
3647 error = 0;
3648 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3649 error = NFSERR_STALESTATEID;
3650 }
3651 if (!error && op == NFSV4OP_DELEGRETURN) {
3652 error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3653 if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3654 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3655 error = NFSERR_OLDSTATEID;
3656 }
3657 /*
3658 * NFSERR_EXPIRED means that the state has gone away,
3659 * so Delegations have been purged. Just return ok.
3660 */
3661 if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3662 NFSUNLOCKSTATE();
3663 error = 0;
3664 goto out;
3665 }
3666 if (error) {
3667 NFSUNLOCKSTATE();
3668 goto out;
3669 }
3670
3671 if (op == NFSV4OP_DELEGRETURN) {
3672 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3673 sizeof (fhandle_t))) {
3674 NFSUNLOCKSTATE();
3675 error = NFSERR_BADSTATEID;
3676 goto out;
3677 }
3678 if (retwriteaccessp != NULL) {
3679 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
3680 *retwriteaccessp = 1;
3681 else
3682 *retwriteaccessp = 0;
3683 }
3684 nfsrv_freedeleg(stp);
3685 } else {
3686 nfsrv_freedeleglist(&clp->lc_olddeleg);
3687 }
3688 NFSUNLOCKSTATE();
3689 error = 0;
3690
3691 out:
3692 NFSEXITCODE(error);
3693 return (error);
3694 }
3695
3696 /*
3697 * Release lock owner.
3698 */
3699 int
nfsrv_releaselckown(struct nfsstate * new_stp,nfsquad_t clientid,NFSPROC_T * p)3700 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3701 NFSPROC_T *p)
3702 {
3703 struct nfsstate *stp, *nstp, *openstp, *ownstp;
3704 struct nfsclient *clp;
3705 int error = 0;
3706
3707 /*
3708 * Check for restart conditions (client and server).
3709 */
3710 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3711 &new_stp->ls_stateid, 0);
3712 if (error)
3713 goto out;
3714
3715 NFSLOCKSTATE();
3716 /*
3717 * Get the lock owner by name.
3718 */
3719 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3720 (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3721 if (error) {
3722 NFSUNLOCKSTATE();
3723 goto out;
3724 }
3725 LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3726 LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3727 stp = LIST_FIRST(&openstp->ls_open);
3728 while (stp != LIST_END(&openstp->ls_open)) {
3729 nstp = LIST_NEXT(stp, ls_list);
3730 /*
3731 * If the owner matches, check for locks and
3732 * then free or return an error.
3733 */
3734 if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3735 !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3736 stp->ls_ownerlen)){
3737 if (LIST_EMPTY(&stp->ls_lock)) {
3738 nfsrv_freelockowner(stp, NULL, 0, p);
3739 } else {
3740 NFSUNLOCKSTATE();
3741 error = NFSERR_LOCKSHELD;
3742 goto out;
3743 }
3744 }
3745 stp = nstp;
3746 }
3747 }
3748 }
3749 NFSUNLOCKSTATE();
3750
3751 out:
3752 NFSEXITCODE(error);
3753 return (error);
3754 }
3755
3756 /*
3757 * Get the file handle for a lock structure.
3758 */
3759 static int
nfsrv_getlockfh(vnode_t vp,u_short flags,struct nfslockfile * new_lfp,fhandle_t * nfhp,NFSPROC_T * p)3760 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3761 fhandle_t *nfhp, NFSPROC_T *p)
3762 {
3763 fhandle_t *fhp = NULL;
3764 int error;
3765
3766 /*
3767 * For lock, use the new nfslock structure, otherwise just
3768 * a fhandle_t on the stack.
3769 */
3770 if (flags & NFSLCK_OPEN) {
3771 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3772 fhp = &new_lfp->lf_fh;
3773 } else if (nfhp) {
3774 fhp = nfhp;
3775 } else {
3776 panic("nfsrv_getlockfh");
3777 }
3778 error = nfsvno_getfh(vp, fhp, p);
3779 NFSEXITCODE(error);
3780 return (error);
3781 }
3782
3783 /*
3784 * Get an nfs lock structure. Allocate one, as required, and return a
3785 * pointer to it.
3786 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3787 */
3788 static int
nfsrv_getlockfile(u_short flags,struct nfslockfile ** new_lfpp,struct nfslockfile ** lfpp,fhandle_t * nfhp,int lockit)3789 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3790 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3791 {
3792 struct nfslockfile *lfp;
3793 fhandle_t *fhp = NULL, *tfhp;
3794 struct nfslockhashhead *hp;
3795 struct nfslockfile *new_lfp = NULL;
3796
3797 /*
3798 * For lock, use the new nfslock structure, otherwise just
3799 * a fhandle_t on the stack.
3800 */
3801 if (flags & NFSLCK_OPEN) {
3802 new_lfp = *new_lfpp;
3803 fhp = &new_lfp->lf_fh;
3804 } else if (nfhp) {
3805 fhp = nfhp;
3806 } else {
3807 panic("nfsrv_getlockfile");
3808 }
3809
3810 hp = NFSLOCKHASH(fhp);
3811 LIST_FOREACH(lfp, hp, lf_hash) {
3812 tfhp = &lfp->lf_fh;
3813 if (NFSVNO_CMPFH(fhp, tfhp)) {
3814 if (lockit)
3815 nfsrv_locklf(lfp);
3816 *lfpp = lfp;
3817 return (0);
3818 }
3819 }
3820 if (!(flags & NFSLCK_OPEN))
3821 return (-1);
3822
3823 /*
3824 * No match, so chain the new one into the list.
3825 */
3826 LIST_INIT(&new_lfp->lf_open);
3827 LIST_INIT(&new_lfp->lf_lock);
3828 LIST_INIT(&new_lfp->lf_deleg);
3829 LIST_INIT(&new_lfp->lf_locallock);
3830 LIST_INIT(&new_lfp->lf_rollback);
3831 new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3832 new_lfp->lf_locallock_lck.nfslock_lock = 0;
3833 new_lfp->lf_usecount = 0;
3834 LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3835 *lfpp = new_lfp;
3836 *new_lfpp = NULL;
3837 return (0);
3838 }
3839
3840 /*
3841 * This function adds a nfslock lock structure to the list for the associated
3842 * nfsstate and nfslockfile structures. It will be inserted after the
3843 * entry pointed at by insert_lop.
3844 */
3845 static void
nfsrv_insertlock(struct nfslock * new_lop,struct nfslock * insert_lop,struct nfsstate * stp,struct nfslockfile * lfp)3846 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3847 struct nfsstate *stp, struct nfslockfile *lfp)
3848 {
3849 struct nfslock *lop, *nlop;
3850
3851 new_lop->lo_stp = stp;
3852 new_lop->lo_lfp = lfp;
3853
3854 if (stp != NULL) {
3855 /* Insert in increasing lo_first order */
3856 lop = LIST_FIRST(&lfp->lf_lock);
3857 if (lop == LIST_END(&lfp->lf_lock) ||
3858 new_lop->lo_first <= lop->lo_first) {
3859 LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3860 } else {
3861 nlop = LIST_NEXT(lop, lo_lckfile);
3862 while (nlop != LIST_END(&lfp->lf_lock) &&
3863 nlop->lo_first < new_lop->lo_first) {
3864 lop = nlop;
3865 nlop = LIST_NEXT(lop, lo_lckfile);
3866 }
3867 LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3868 }
3869 } else {
3870 new_lop->lo_lckfile.le_prev = NULL; /* list not used */
3871 }
3872
3873 /*
3874 * Insert after insert_lop, which is overloaded as stp or lfp for
3875 * an empty list.
3876 */
3877 if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3878 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3879 else if ((struct nfsstate *)insert_lop == stp)
3880 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3881 else
3882 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3883 if (stp != NULL) {
3884 VNET(nfsstatsv1_p)->srvlocks++;
3885 nfsrv_openpluslock++;
3886 }
3887 }
3888
3889 /*
3890 * This function updates the locking for a lock owner and given file. It
3891 * maintains a list of lock ranges ordered on increasing file offset that
3892 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3893 * It always adds new_lop to the list and sometimes uses the one pointed
3894 * at by other_lopp.
3895 */
3896 static void
nfsrv_updatelock(struct nfsstate * stp,struct nfslock ** new_lopp,struct nfslock ** other_lopp,struct nfslockfile * lfp)3897 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3898 struct nfslock **other_lopp, struct nfslockfile *lfp)
3899 {
3900 struct nfslock *new_lop = *new_lopp;
3901 struct nfslock *lop, *tlop, *ilop;
3902 struct nfslock *other_lop = *other_lopp;
3903 int unlock = 0, myfile = 0;
3904 u_int64_t tmp;
3905
3906 /*
3907 * Work down the list until the lock is merged.
3908 */
3909 if (new_lop->lo_flags & NFSLCK_UNLOCK)
3910 unlock = 1;
3911 if (stp != NULL) {
3912 ilop = (struct nfslock *)stp;
3913 lop = LIST_FIRST(&stp->ls_lock);
3914 } else {
3915 ilop = (struct nfslock *)lfp;
3916 lop = LIST_FIRST(&lfp->lf_locallock);
3917 }
3918 while (lop != NULL) {
3919 /*
3920 * Only check locks for this file that aren't before the start of
3921 * new lock's range.
3922 */
3923 if (lop->lo_lfp == lfp) {
3924 myfile = 1;
3925 if (lop->lo_end >= new_lop->lo_first) {
3926 if (new_lop->lo_end < lop->lo_first) {
3927 /*
3928 * If the new lock ends before the start of the
3929 * current lock's range, no merge, just insert
3930 * the new lock.
3931 */
3932 break;
3933 }
3934 if (new_lop->lo_flags == lop->lo_flags ||
3935 (new_lop->lo_first <= lop->lo_first &&
3936 new_lop->lo_end >= lop->lo_end)) {
3937 /*
3938 * This lock can be absorbed by the new lock/unlock.
3939 * This happens when it covers the entire range
3940 * of the old lock or is contiguous
3941 * with the old lock and is of the same type or an
3942 * unlock.
3943 */
3944 if (lop->lo_first < new_lop->lo_first)
3945 new_lop->lo_first = lop->lo_first;
3946 if (lop->lo_end > new_lop->lo_end)
3947 new_lop->lo_end = lop->lo_end;
3948 tlop = lop;
3949 lop = LIST_NEXT(lop, lo_lckowner);
3950 nfsrv_freenfslock(tlop);
3951 continue;
3952 }
3953
3954 /*
3955 * All these cases are for contiguous locks that are not the
3956 * same type, so they can't be merged.
3957 */
3958 if (new_lop->lo_first <= lop->lo_first) {
3959 /*
3960 * This case is where the new lock overlaps with the
3961 * first part of the old lock. Move the start of the
3962 * old lock to just past the end of the new lock. The
3963 * new lock will be inserted in front of the old, since
3964 * ilop hasn't been updated. (We are done now.)
3965 */
3966 lop->lo_first = new_lop->lo_end;
3967 break;
3968 }
3969 if (new_lop->lo_end >= lop->lo_end) {
3970 /*
3971 * This case is where the new lock overlaps with the
3972 * end of the old lock's range. Move the old lock's
3973 * end to just before the new lock's first and insert
3974 * the new lock after the old lock.
3975 * Might not be done yet, since the new lock could
3976 * overlap further locks with higher ranges.
3977 */
3978 lop->lo_end = new_lop->lo_first;
3979 ilop = lop;
3980 lop = LIST_NEXT(lop, lo_lckowner);
3981 continue;
3982 }
3983 /*
3984 * The final case is where the new lock's range is in the
3985 * middle of the current lock's and splits the current lock
3986 * up. Use *other_lopp to handle the second part of the
3987 * split old lock range. (We are done now.)
3988 * For unlock, we use new_lop as other_lop and tmp, since
3989 * other_lop and new_lop are the same for this case.
3990 * We noted the unlock case above, so we don't need
3991 * new_lop->lo_flags any longer.
3992 */
3993 tmp = new_lop->lo_first;
3994 if (other_lop == NULL) {
3995 if (!unlock)
3996 panic("nfsd srv update unlock");
3997 other_lop = new_lop;
3998 *new_lopp = NULL;
3999 }
4000 other_lop->lo_first = new_lop->lo_end;
4001 other_lop->lo_end = lop->lo_end;
4002 other_lop->lo_flags = lop->lo_flags;
4003 other_lop->lo_stp = stp;
4004 other_lop->lo_lfp = lfp;
4005 lop->lo_end = tmp;
4006 nfsrv_insertlock(other_lop, lop, stp, lfp);
4007 *other_lopp = NULL;
4008 ilop = lop;
4009 break;
4010 }
4011 }
4012 ilop = lop;
4013 lop = LIST_NEXT(lop, lo_lckowner);
4014 if (myfile && (lop == NULL || lop->lo_lfp != lfp))
4015 break;
4016 }
4017
4018 /*
4019 * Insert the new lock in the list at the appropriate place.
4020 */
4021 if (!unlock) {
4022 nfsrv_insertlock(new_lop, ilop, stp, lfp);
4023 *new_lopp = NULL;
4024 }
4025 }
4026
4027 /*
4028 * This function handles sequencing of locks, etc.
4029 * It returns an error that indicates what the caller should do.
4030 */
4031 static int
nfsrv_checkseqid(struct nfsrv_descript * nd,u_int32_t seqid,struct nfsstate * stp,struct nfsrvcache * op)4032 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
4033 struct nfsstate *stp, struct nfsrvcache *op)
4034 {
4035 int error = 0;
4036
4037 if ((nd->nd_flag & ND_NFSV41) != 0)
4038 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
4039 goto out;
4040 if (op != nd->nd_rp)
4041 panic("nfsrvstate checkseqid");
4042 if (!(op->rc_flag & RC_INPROG))
4043 panic("nfsrvstate not inprog");
4044 if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
4045 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
4046 panic("nfsrvstate op refcnt");
4047 }
4048
4049 /* If ND_ERELOOKUP is set, the seqid has already been handled. */
4050 if ((nd->nd_flag & ND_ERELOOKUP) != 0)
4051 goto out;
4052
4053 if ((stp->ls_seq + 1) == seqid) {
4054 if (stp->ls_op)
4055 nfsrvd_derefcache(stp->ls_op);
4056 stp->ls_op = op;
4057 nfsrvd_refcache(op);
4058 stp->ls_seq = seqid;
4059 goto out;
4060 } else if (stp->ls_seq == seqid && stp->ls_op &&
4061 op->rc_xid == stp->ls_op->rc_xid &&
4062 op->rc_refcnt == 0 &&
4063 op->rc_reqlen == stp->ls_op->rc_reqlen &&
4064 op->rc_cksum == stp->ls_op->rc_cksum) {
4065 if (stp->ls_op->rc_flag & RC_INPROG) {
4066 error = NFSERR_DONTREPLY;
4067 goto out;
4068 }
4069 nd->nd_rp = stp->ls_op;
4070 nd->nd_rp->rc_flag |= RC_INPROG;
4071 nfsrvd_delcache(op);
4072 error = NFSERR_REPLYFROMCACHE;
4073 goto out;
4074 }
4075 error = NFSERR_BADSEQID;
4076
4077 out:
4078 NFSEXITCODE2(error, nd);
4079 return (error);
4080 }
4081
4082 /*
4083 * Just set lc_program to 0 to indicate no callbacks are possible.
4084 * Set the address to the client's transport address. This won't be used
4085 * for callbacks, but can be printed out by nfsstats for info.
4086 * Return error if the xdr can't be parsed, 0 otherwise.
4087 */
4088 int
nfsrv_getclientipaddr(struct nfsrv_descript * nd,struct nfsclient * clp)4089 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
4090 {
4091 uint32_t *tl;
4092 int error = 0, i;
4093 #ifdef INET
4094 struct sockaddr_in *rin, *sin;
4095 #endif
4096 #ifdef INET6
4097 struct sockaddr_in6 *rin6, *sin6;
4098 #endif
4099
4100 clp->lc_req.nr_client = NULL;
4101 clp->lc_req.nr_lock = 0;
4102 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4103 i = fxdr_unsigned(int, *tl);
4104 if (i < 0) {
4105 error = NFSERR_BADXDR;
4106 goto nfsmout;
4107 } else if (i > 0) {
4108 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4109 if (error)
4110 goto nfsmout;
4111 }
4112 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4113 i = fxdr_unsigned(int, *tl);
4114 if (i < 0) {
4115 error = NFSERR_BADXDR;
4116 goto nfsmout;
4117 } else if (i > 0) {
4118 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4119 if (error)
4120 goto nfsmout;
4121 }
4122 switch (nd->nd_nam->sa_family) {
4123 #ifdef INET
4124 case AF_INET:
4125 sin = (struct sockaddr_in *)nd->nd_nam;
4126 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4127 rin->sin_family = AF_INET;
4128 rin->sin_len = sizeof(struct sockaddr_in);
4129 rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4130 rin->sin_port = 0x0;
4131 break;
4132 #endif
4133 #ifdef INET6
4134 case AF_INET6:
4135 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4136 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4137 rin6->sin6_family = AF_INET6;
4138 rin6->sin6_len = sizeof(struct sockaddr_in6);
4139 rin6->sin6_addr = sin6->sin6_addr;
4140 rin6->sin6_port = 0x0;
4141 break;
4142 #endif
4143 }
4144 clp->lc_program = 0;
4145 nfsmout:
4146 NFSEXITCODE2(error, nd);
4147 return (error);
4148 }
4149
4150 /*
4151 * This function checks for restart conditions.
4152 */
4153 static int
nfsrv_checkrestart(nfsquad_t clientid,u_int32_t flags,nfsv4stateid_t * stateidp,int specialid)4154 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4155 nfsv4stateid_t *stateidp, int specialid)
4156 {
4157 int ret = 0;
4158
4159 /*
4160 * First check for a server restart. Open, LockT, ReleaseLockOwner
4161 * and DelegPurge have a clientid, the rest a stateid.
4162 */
4163 if (flags &
4164 (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4165 if (clientid.lval[0] != VNET(nfsrvboottime)) {
4166 ret = NFSERR_STALECLIENTID;
4167 goto out;
4168 }
4169 } else if (stateidp->other[0] != VNET(nfsrvboottime) &&
4170 specialid == 0) {
4171 ret = NFSERR_STALESTATEID;
4172 goto out;
4173 }
4174
4175 /*
4176 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4177 * not use a lock/open owner seqid#, so the check can be done now.
4178 * (The others will be checked, as required, later.)
4179 */
4180 if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4181 goto out;
4182
4183 NFSLOCKSTATE();
4184 ret = nfsrv_checkgrace(NULL, NULL, flags);
4185 NFSUNLOCKSTATE();
4186
4187 out:
4188 NFSEXITCODE(ret);
4189 return (ret);
4190 }
4191
4192 /*
4193 * Check for grace.
4194 */
4195 static int
nfsrv_checkgrace(struct nfsrv_descript * nd,struct nfsclient * clp,u_int32_t flags)4196 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4197 u_int32_t flags)
4198 {
4199 int error = 0, notreclaimed;
4200 struct nfsrv_stable *sp;
4201
4202 if ((VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE |
4203 NFSNSF_GRACEOVER)) == 0) {
4204 /*
4205 * First, check to see if all of the clients have done a
4206 * ReclaimComplete. If so, grace can end now.
4207 */
4208 notreclaimed = 0;
4209 if (!VNET(nfsd_disable_grace)) {
4210 LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head,
4211 nst_list) {
4212 if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4213 notreclaimed = 1;
4214 break;
4215 }
4216 }
4217 }
4218 if (notreclaimed == 0)
4219 VNET(nfsrv_stablefirst).nsf_flags |=
4220 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4221 }
4222
4223 if ((VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) {
4224 if (flags & NFSLCK_RECLAIM) {
4225 error = NFSERR_NOGRACE;
4226 goto out;
4227 }
4228 } else {
4229 if (!(flags & NFSLCK_RECLAIM)) {
4230 error = NFSERR_GRACE;
4231 goto out;
4232 }
4233 if (nd != NULL && clp != NULL &&
4234 (nd->nd_flag & ND_NFSV41) != 0 &&
4235 (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4236 error = NFSERR_NOGRACE;
4237 goto out;
4238 }
4239
4240 /*
4241 * If grace is almost over and we are still getting Reclaims,
4242 * extend grace a bit.
4243 */
4244 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4245 VNET(nfsrv_stablefirst).nsf_eograce)
4246 VNET(nfsrv_stablefirst).nsf_eograce =
4247 NFSD_MONOSEC + NFSRV_LEASEDELTA;
4248 }
4249
4250 out:
4251 NFSEXITCODE(error);
4252 return (error);
4253 }
4254
4255 /*
4256 * Do a server callback.
4257 * The "trunc" argument is slightly overloaded and refers to different
4258 * boolean arguments for CBRECALL and CBLAYOUTRECALL.
4259 */
4260 static int
nfsrv_docallback(struct nfsclient * clp,int procnum,nfsv4stateid_t * stateidp,int trunc,fhandle_t * fhp,struct nfsvattr * nap,nfsattrbit_t * attrbitp,int laytype,NFSPROC_T * p)4261 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
4262 int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
4263 int laytype, NFSPROC_T *p)
4264 {
4265 struct mbuf *m;
4266 u_int32_t *tl;
4267 struct nfsrv_descript *nd;
4268 struct ucred *cred;
4269 int error = 0, slotpos;
4270 u_int32_t callback;
4271 struct nfsdsession *sep = NULL;
4272 uint64_t tval;
4273 bool dotls;
4274
4275 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
4276 cred = newnfs_getcred();
4277 NFSLOCKSTATE(); /* mostly for lc_cbref++ */
4278 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4279 NFSUNLOCKSTATE();
4280 panic("docallb");
4281 }
4282 clp->lc_cbref++;
4283
4284 /*
4285 * Fill the callback program# and version into the request
4286 * structure for newnfs_connect() to use.
4287 */
4288 clp->lc_req.nr_prog = clp->lc_program;
4289 #ifdef notnow
4290 if ((clp->lc_flags & LCL_NFSV41) != 0)
4291 clp->lc_req.nr_vers = NFSV41_CBVERS;
4292 else
4293 #endif
4294 clp->lc_req.nr_vers = NFSV4_CBVERS;
4295
4296 /*
4297 * First, fill in some of the fields of nd and cr.
4298 */
4299 nd->nd_flag = ND_NFSV4;
4300 if (clp->lc_flags & LCL_GSS)
4301 nd->nd_flag |= ND_KERBV;
4302 if ((clp->lc_flags & LCL_NFSV41) != 0)
4303 nd->nd_flag |= ND_NFSV41;
4304 if ((clp->lc_flags & LCL_NFSV42) != 0)
4305 nd->nd_flag |= ND_NFSV42;
4306 nd->nd_repstat = 0;
4307 cred->cr_uid = clp->lc_uid;
4308 cred->cr_gid = clp->lc_gid;
4309 callback = clp->lc_callback;
4310 NFSUNLOCKSTATE();
4311 cred->cr_ngroups = 1;
4312
4313 /*
4314 * Get the first mbuf for the request.
4315 */
4316 MGET(m, M_WAITOK, MT_DATA);
4317 m->m_len = 0;
4318 nd->nd_mreq = nd->nd_mb = m;
4319 nd->nd_bpos = mtod(m, caddr_t);
4320
4321 /*
4322 * and build the callback request.
4323 */
4324 if (procnum == NFSV4OP_CBGETATTR) {
4325 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4326 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4327 "CB Getattr", &sep, &slotpos);
4328 if (error != 0) {
4329 m_freem(nd->nd_mreq);
4330 goto errout;
4331 }
4332 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4333 (void)nfsrv_putattrbit(nd, attrbitp);
4334 } else if (procnum == NFSV4OP_CBRECALL) {
4335 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4336 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4337 "CB Recall", &sep, &slotpos);
4338 if (error != 0) {
4339 m_freem(nd->nd_mreq);
4340 goto errout;
4341 }
4342 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4343 *tl++ = txdr_unsigned(stateidp->seqid);
4344 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4345 NFSX_STATEIDOTHER);
4346 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4347 if (trunc)
4348 *tl = newnfs_true;
4349 else
4350 *tl = newnfs_false;
4351 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4352 } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
4353 NFSD_DEBUG(4, "docallback layout recall\n");
4354 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4355 error = nfsrv_cbcallargs(nd, clp, callback,
4356 NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
4357 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
4358 if (error != 0) {
4359 m_freem(nd->nd_mreq);
4360 goto errout;
4361 }
4362 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
4363 *tl++ = txdr_unsigned(laytype);
4364 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
4365 if (trunc)
4366 *tl++ = newnfs_true;
4367 else
4368 *tl++ = newnfs_false;
4369 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
4370 (void)nfsm_fhtom(NULL, nd, (uint8_t *)fhp, NFSX_MYFH, 0);
4371 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
4372 tval = 0;
4373 txdr_hyper(tval, tl); tl += 2;
4374 tval = UINT64_MAX;
4375 txdr_hyper(tval, tl); tl += 2;
4376 *tl++ = txdr_unsigned(stateidp->seqid);
4377 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
4378 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4379 NFSD_DEBUG(4, "aft args\n");
4380 } else if (procnum == NFSV4PROC_CBNULL) {
4381 nd->nd_procnum = NFSV4PROC_CBNULL;
4382 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4383 error = nfsv4_getcbsession(clp, &sep);
4384 if (error != 0) {
4385 m_freem(nd->nd_mreq);
4386 goto errout;
4387 }
4388 }
4389 } else {
4390 error = NFSERR_SERVERFAULT;
4391 m_freem(nd->nd_mreq);
4392 goto errout;
4393 }
4394
4395 /*
4396 * Call newnfs_connect(), as required, and then newnfs_request().
4397 */
4398 dotls = false;
4399 if ((clp->lc_flags & LCL_TLSCB) != 0)
4400 dotls = true;
4401 (void) newnfs_sndlock(&clp->lc_req.nr_lock);
4402 if (clp->lc_req.nr_client == NULL) {
4403 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4404 error = ECONNREFUSED;
4405 if (procnum != NFSV4PROC_CBNULL)
4406 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4407 true);
4408 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4409 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4410 error = newnfs_connect(NULL, &clp->lc_req, cred,
4411 NULL, 1, dotls, &clp->lc_req.nr_client);
4412 else
4413 error = newnfs_connect(NULL, &clp->lc_req, cred,
4414 NULL, 3, dotls, &clp->lc_req.nr_client);
4415 }
4416 newnfs_sndunlock(&clp->lc_req.nr_lock);
4417 NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
4418 if (!error) {
4419 if ((nd->nd_flag & ND_NFSV41) != 0) {
4420 KASSERT(sep != NULL, ("sep NULL"));
4421 if (sep->sess_cbsess.nfsess_xprt != NULL)
4422 error = newnfs_request(nd, NULL, clp,
4423 &clp->lc_req, NULL, NULL, cred,
4424 clp->lc_program, clp->lc_req.nr_vers, NULL,
4425 1, NULL, &sep->sess_cbsess);
4426 else {
4427 /*
4428 * This should probably never occur, but if a
4429 * client somehow does an RPC without a
4430 * SequenceID Op that causes a callback just
4431 * after the nfsd threads have been terminated
4432 * and restarted we could conceivably get here
4433 * without a backchannel xprt.
4434 */
4435 printf("nfsrv_docallback: no xprt\n");
4436 error = ECONNREFUSED;
4437 }
4438 NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
4439 if (error != 0 && procnum != NFSV4PROC_CBNULL) {
4440 /*
4441 * It is likely that the callback was never
4442 * processed by the client and, as such,
4443 * the sequence# for the session slot needs
4444 * to be backed up by one to avoid a
4445 * NFSERR_SEQMISORDERED error reply.
4446 * For the unlikely case where the callback
4447 * was processed by the client, this will
4448 * make the next callback on the slot
4449 * appear to be a retry.
4450 * Since callbacks never specify that the
4451 * reply be cached, this "apparent retry"
4452 * should not be a problem.
4453 */
4454 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4455 true);
4456 }
4457 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4458 } else
4459 error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4460 NULL, NULL, cred, clp->lc_program,
4461 clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4462 }
4463 errout:
4464 NFSFREECRED(cred);
4465
4466 /*
4467 * If error is set here, the Callback path isn't working
4468 * properly, so twiddle the appropriate LCL_ flags.
4469 * (nd_repstat != 0 indicates the Callback path is working,
4470 * but the callback failed on the client.)
4471 */
4472 if (error) {
4473 /*
4474 * Mark the callback pathway down, which disabled issuing
4475 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4476 */
4477 NFSLOCKSTATE();
4478 clp->lc_flags |= LCL_CBDOWN;
4479 NFSUNLOCKSTATE();
4480 } else {
4481 /*
4482 * Callback worked. If the callback path was down, disable
4483 * callbacks, so no more delegations will be issued. (This
4484 * is done on the assumption that the callback pathway is
4485 * flakey.)
4486 */
4487 NFSLOCKSTATE();
4488 if (clp->lc_flags & LCL_CBDOWN)
4489 clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4490 NFSUNLOCKSTATE();
4491 if (nd->nd_repstat) {
4492 error = nd->nd_repstat;
4493 NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
4494 procnum, error);
4495 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4496 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4497 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4498 NULL, NULL, NULL, p, NULL);
4499 m_freem(nd->nd_mrep);
4500 }
4501 NFSLOCKSTATE();
4502 clp->lc_cbref--;
4503 if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4504 clp->lc_flags &= ~LCL_WAKEUPWANTED;
4505 wakeup(clp);
4506 }
4507 NFSUNLOCKSTATE();
4508
4509 free(nd, M_TEMP);
4510 NFSEXITCODE(error);
4511 return (error);
4512 }
4513
4514 /*
4515 * Set up the compound RPC for the callback.
4516 */
4517 static int
nfsrv_cbcallargs(struct nfsrv_descript * nd,struct nfsclient * clp,uint32_t callback,int op,const char * optag,struct nfsdsession ** sepp,int * slotposp)4518 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4519 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
4520 int *slotposp)
4521 {
4522 uint32_t *tl;
4523 int error, len;
4524
4525 len = strlen(optag);
4526 (void)nfsm_strtom(nd, optag, len);
4527 NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4528 if ((nd->nd_flag & ND_NFSV41) != 0) {
4529 if ((nd->nd_flag & ND_NFSV42) != 0)
4530 *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
4531 else
4532 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4533 *tl++ = txdr_unsigned(callback);
4534 *tl++ = txdr_unsigned(2);
4535 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4536 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
4537 if (error != 0)
4538 return (error);
4539 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4540 *tl = txdr_unsigned(op);
4541 } else {
4542 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4543 *tl++ = txdr_unsigned(callback);
4544 *tl++ = txdr_unsigned(1);
4545 *tl = txdr_unsigned(op);
4546 }
4547 return (0);
4548 }
4549
4550 /*
4551 * Return the next index# for a clientid. Mostly just increment and return
4552 * the next one, but... if the 32bit unsigned does actually wrap around,
4553 * it should be rebooted.
4554 * At an average rate of one new client per second, it will wrap around in
4555 * approximately 136 years. (I think the server will have been shut
4556 * down or rebooted before then.)
4557 */
4558 static u_int32_t
nfsrv_nextclientindex(void)4559 nfsrv_nextclientindex(void)
4560 {
4561 static u_int32_t client_index = 0;
4562
4563 client_index++;
4564 if (client_index != 0)
4565 return (client_index);
4566
4567 printf("%s: out of clientids\n", __func__);
4568 return (client_index);
4569 }
4570
4571 /*
4572 * Return the next index# for a stateid. Mostly just increment and return
4573 * the next one, but... if the 32bit unsigned does actually wrap around
4574 * (will a BSD server stay up that long?), find
4575 * new start and end values.
4576 */
4577 static u_int32_t
nfsrv_nextstateindex(struct nfsclient * clp)4578 nfsrv_nextstateindex(struct nfsclient *clp)
4579 {
4580 struct nfsstate *stp;
4581 int i;
4582 u_int32_t canuse, min_index, max_index;
4583
4584 if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4585 clp->lc_stateindex++;
4586 if (clp->lc_stateindex != clp->lc_statemaxindex)
4587 return (clp->lc_stateindex);
4588 }
4589
4590 /*
4591 * Yuck, we've hit the end.
4592 * Look for a new min and max.
4593 */
4594 min_index = 0;
4595 max_index = 0xffffffff;
4596 for (i = 0; i < nfsrv_statehashsize; i++) {
4597 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4598 if (stp->ls_stateid.other[2] > 0x80000000) {
4599 if (stp->ls_stateid.other[2] < max_index)
4600 max_index = stp->ls_stateid.other[2];
4601 } else {
4602 if (stp->ls_stateid.other[2] > min_index)
4603 min_index = stp->ls_stateid.other[2];
4604 }
4605 }
4606 }
4607
4608 /*
4609 * Yikes, highly unlikely, but I'll handle it anyhow.
4610 */
4611 if (min_index == 0x80000000 && max_index == 0x80000001) {
4612 canuse = 0;
4613 /*
4614 * Loop around until we find an unused entry. Return that
4615 * and set LCL_INDEXNOTOK, so the search will continue next time.
4616 * (This is one of those rare cases where a goto is the
4617 * cleanest way to code the loop.)
4618 */
4619 tryagain:
4620 for (i = 0; i < nfsrv_statehashsize; i++) {
4621 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4622 if (stp->ls_stateid.other[2] == canuse) {
4623 canuse++;
4624 goto tryagain;
4625 }
4626 }
4627 }
4628 clp->lc_flags |= LCL_INDEXNOTOK;
4629 return (canuse);
4630 }
4631
4632 /*
4633 * Ok to start again from min + 1.
4634 */
4635 clp->lc_stateindex = min_index + 1;
4636 clp->lc_statemaxindex = max_index;
4637 clp->lc_flags &= ~LCL_INDEXNOTOK;
4638 return (clp->lc_stateindex);
4639 }
4640
4641 /*
4642 * The following functions handle the stable storage file that deals with
4643 * the edge conditions described in RFC3530 Sec. 8.6.3.
4644 * The file is as follows:
4645 * - a single record at the beginning that has the lease time of the
4646 * previous server instance (before the last reboot) and the nfsrvboottime
4647 * values for the previous server boots.
4648 * These previous boot times are used to ensure that the current
4649 * nfsrvboottime does not, somehow, get set to a previous one.
4650 * (This is important so that Stale ClientIDs and StateIDs can
4651 * be recognized.)
4652 * The number of previous nfsvrboottime values precedes the list.
4653 * - followed by some number of appended records with:
4654 * - client id string
4655 * - flag that indicates it is a record revoking state via lease
4656 * expiration or similar
4657 * OR has successfully acquired state.
4658 * These structures vary in length, with the client string at the end, up
4659 * to NFSV4_OPAQUELIMIT in size.
4660 *
4661 * At the end of the grace period, the file is truncated, the first
4662 * record is rewritten with updated information and any acquired state
4663 * records for successful reclaims of state are written.
4664 *
4665 * Subsequent records are appended when the first state is issued to
4666 * a client and when state is revoked for a client.
4667 *
4668 * When reading the file in, state issued records that come later in
4669 * the file override older ones, since the append log is in cronological order.
4670 * If, for some reason, the file can't be read, the grace period is
4671 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4672 */
4673
4674 /*
4675 * Read in the stable storage file. Called by nfssvc() before the nfsd
4676 * processes start servicing requests.
4677 */
4678 void
nfsrv_setupstable(NFSPROC_T * p)4679 nfsrv_setupstable(NFSPROC_T *p)
4680 {
4681 struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst);
4682 struct nfsrv_stable *sp, *nsp;
4683 struct nfst_rec *tsp;
4684 int error, i, tryagain;
4685 off_t off = 0;
4686 ssize_t aresid, len;
4687
4688 /*
4689 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4690 * a reboot, so state has not been lost.
4691 */
4692 if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4693 return;
4694 /*
4695 * Set Grace over just until the file reads successfully.
4696 */
4697 VNET(nfsrvboottime) = time_second;
4698 LIST_INIT(&sf->nsf_head);
4699 sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4700 sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4701 if (sf->nsf_fp == NULL)
4702 return;
4703 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4704 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4705 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4706 if (error || aresid || sf->nsf_numboots == 0 ||
4707 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4708 return;
4709
4710 /*
4711 * Now, read in the boottimes.
4712 */
4713 sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4714 sizeof(time_t), M_TEMP, M_WAITOK);
4715 off = sizeof (struct nfsf_rec);
4716 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4717 (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4718 UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4719 if (error || aresid) {
4720 free(sf->nsf_bootvals, M_TEMP);
4721 sf->nsf_bootvals = NULL;
4722 return;
4723 }
4724
4725 /*
4726 * Make sure this nfsrvboottime is different from all recorded
4727 * previous ones.
4728 */
4729 do {
4730 tryagain = 0;
4731 for (i = 0; i < sf->nsf_numboots; i++) {
4732 if (VNET(nfsrvboottime) == sf->nsf_bootvals[i]) {
4733 VNET(nfsrvboottime)++;
4734 tryagain = 1;
4735 break;
4736 }
4737 }
4738 } while (tryagain);
4739
4740 sf->nsf_flags |= NFSNSF_OK;
4741 off += (sf->nsf_numboots * sizeof (time_t));
4742
4743 /*
4744 * Read through the file, building a list of records for grace
4745 * checking.
4746 * Each record is between sizeof (struct nfst_rec) and
4747 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4748 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4749 */
4750 tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4751 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4752 do {
4753 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4754 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4755 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4756 len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4757 if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4758 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4759 /*
4760 * Yuck, the file has been corrupted, so just return
4761 * after clearing out any restart state, so the grace period
4762 * is over.
4763 */
4764 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4765 LIST_REMOVE(sp, nst_list);
4766 free(sp, M_TEMP);
4767 }
4768 free(tsp, M_TEMP);
4769 sf->nsf_flags &= ~NFSNSF_OK;
4770 free(sf->nsf_bootvals, M_TEMP);
4771 sf->nsf_bootvals = NULL;
4772 return;
4773 }
4774 if (len > 0) {
4775 off += sizeof (struct nfst_rec) + tsp->len - 1;
4776 /*
4777 * Search the list for a matching client.
4778 */
4779 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4780 if (tsp->len == sp->nst_len &&
4781 !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4782 break;
4783 }
4784 if (sp == LIST_END(&sf->nsf_head)) {
4785 sp = (struct nfsrv_stable *)malloc(tsp->len +
4786 sizeof (struct nfsrv_stable) - 1, M_TEMP,
4787 M_WAITOK);
4788 NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4789 sizeof (struct nfst_rec) + tsp->len - 1);
4790 LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4791 } else {
4792 if (tsp->flag == NFSNST_REVOKE)
4793 sp->nst_flag |= NFSNST_REVOKE;
4794 else
4795 /*
4796 * A subsequent timestamp indicates the client
4797 * did a setclientid/confirm and any previous
4798 * revoke is no longer relevant.
4799 */
4800 sp->nst_flag &= ~NFSNST_REVOKE;
4801 }
4802 }
4803 } while (len > 0);
4804 free(tsp, M_TEMP);
4805 sf->nsf_flags = NFSNSF_OK;
4806 sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4807 NFSRV_LEASEDELTA;
4808 }
4809
4810 /*
4811 * Update the stable storage file, now that the grace period is over.
4812 */
4813 void
nfsrv_updatestable(NFSPROC_T * p)4814 nfsrv_updatestable(NFSPROC_T *p)
4815 {
4816 struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst);
4817 struct nfsrv_stable *sp, *nsp;
4818 int i;
4819 struct nfsvattr nva;
4820 vnode_t vp;
4821 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4822 mount_t mp = NULL;
4823 #endif
4824 int error;
4825
4826 if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4827 return;
4828 sf->nsf_flags |= NFSNSF_UPDATEDONE;
4829 /*
4830 * Ok, we need to rewrite the stable storage file.
4831 * - truncate to 0 length
4832 * - write the new first structure
4833 * - loop through the data structures, writing out any that
4834 * have timestamps older than the old boot
4835 */
4836 if (sf->nsf_bootvals) {
4837 sf->nsf_numboots++;
4838 for (i = sf->nsf_numboots - 2; i >= 0; i--)
4839 sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4840 } else {
4841 sf->nsf_numboots = 1;
4842 sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t),
4843 M_TEMP, M_WAITOK);
4844 }
4845 sf->nsf_bootvals[0] = VNET(nfsrvboottime);
4846 sf->nsf_lease = nfsrv_lease;
4847 NFSVNO_ATTRINIT(&nva);
4848 NFSVNO_SETATTRVAL(&nva, size, 0);
4849 vp = NFSFPVNODE(sf->nsf_fp);
4850 vn_start_write(vp, &mp, V_WAIT);
4851 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4852 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4853 NULL);
4854 NFSVOPUNLOCK(vp);
4855 } else
4856 error = EPERM;
4857 vn_finished_write(mp);
4858 if (!error)
4859 error = NFSD_RDWR(UIO_WRITE, vp,
4860 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4861 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4862 if (!error)
4863 error = NFSD_RDWR(UIO_WRITE, vp,
4864 (caddr_t)sf->nsf_bootvals,
4865 sf->nsf_numboots * sizeof (time_t),
4866 (off_t)(sizeof (struct nfsf_rec)),
4867 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4868 free(sf->nsf_bootvals, M_TEMP);
4869 sf->nsf_bootvals = NULL;
4870 if (error) {
4871 sf->nsf_flags &= ~NFSNSF_OK;
4872 printf("EEK! Can't write NfsV4 stable storage file\n");
4873 return;
4874 }
4875 sf->nsf_flags |= NFSNSF_OK;
4876
4877 /*
4878 * Loop through the list and write out timestamp records for
4879 * any clients that successfully reclaimed state.
4880 */
4881 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4882 if (sp->nst_flag & NFSNST_GOTSTATE) {
4883 nfsrv_writestable(sp->nst_client, sp->nst_len,
4884 NFSNST_NEWSTATE, p);
4885 sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4886 }
4887 LIST_REMOVE(sp, nst_list);
4888 free(sp, M_TEMP);
4889 }
4890 nfsrv_backupstable();
4891 }
4892
4893 /*
4894 * Append a record to the stable storage file.
4895 */
4896 void
nfsrv_writestable(u_char * client,int len,int flag,NFSPROC_T * p)4897 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4898 {
4899 struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst);
4900 struct nfst_rec *sp;
4901 int error;
4902
4903 if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4904 return;
4905 sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4906 len - 1, M_TEMP, M_WAITOK);
4907 sp->len = len;
4908 NFSBCOPY(client, sp->client, len);
4909 sp->flag = flag;
4910 error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4911 (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4912 UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4913 free(sp, M_TEMP);
4914 if (error) {
4915 sf->nsf_flags &= ~NFSNSF_OK;
4916 printf("EEK! Can't write NfsV4 stable storage file\n");
4917 }
4918 }
4919
4920 /*
4921 * This function is called during the grace period to mark a client
4922 * that successfully reclaimed state.
4923 */
4924 static void
nfsrv_markstable(struct nfsclient * clp)4925 nfsrv_markstable(struct nfsclient *clp)
4926 {
4927 struct nfsrv_stable *sp;
4928
4929 /*
4930 * First find the client structure.
4931 */
4932 LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) {
4933 if (sp->nst_len == clp->lc_idlen &&
4934 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4935 break;
4936 }
4937 if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head))
4938 return;
4939
4940 /*
4941 * Now, just mark it and set the nfsclient back pointer.
4942 */
4943 sp->nst_flag |= NFSNST_GOTSTATE;
4944 sp->nst_clp = clp;
4945 }
4946
4947 /*
4948 * This function is called when a NFSv4.1 client does a ReclaimComplete.
4949 * Very similar to nfsrv_markstable(), except for the flag being set.
4950 */
4951 static void
nfsrv_markreclaim(struct nfsclient * clp)4952 nfsrv_markreclaim(struct nfsclient *clp)
4953 {
4954 struct nfsrv_stable *sp;
4955
4956 /*
4957 * First find the client structure.
4958 */
4959 LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) {
4960 if (sp->nst_len == clp->lc_idlen &&
4961 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4962 break;
4963 }
4964 if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head))
4965 return;
4966
4967 /*
4968 * Now, just set the flag.
4969 */
4970 sp->nst_flag |= NFSNST_RECLAIMED;
4971
4972 /*
4973 * Free up any old delegations.
4974 */
4975 nfsrv_freedeleglist(&clp->lc_olddeleg);
4976 }
4977
4978 /*
4979 * This function is called for a reclaim, to see if it gets grace.
4980 * It returns 0 if a reclaim is allowed, 1 otherwise.
4981 */
4982 static int
nfsrv_checkstable(struct nfsclient * clp)4983 nfsrv_checkstable(struct nfsclient *clp)
4984 {
4985 struct nfsrv_stable *sp;
4986
4987 /*
4988 * First, find the entry for the client.
4989 */
4990 LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) {
4991 if (sp->nst_len == clp->lc_idlen &&
4992 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4993 break;
4994 }
4995
4996 /*
4997 * If not in the list, state was revoked or no state was issued
4998 * since the previous reboot, a reclaim is denied.
4999 */
5000 if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head) ||
5001 (sp->nst_flag & NFSNST_REVOKE) ||
5002 !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK))
5003 return (1);
5004 return (0);
5005 }
5006
5007 /*
5008 * Test for and try to clear out a conflicting client. This is called by
5009 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5010 * a found.
5011 * The trick here is that it can't revoke a conflicting client with an
5012 * expired lease unless it holds the v4root lock, so...
5013 * If no v4root lock, get the lock and return 1 to indicate "try again".
5014 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5015 * the revocation worked and the conflicting client is "bye, bye", so it
5016 * can be tried again.
5017 * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
5018 * Unlocks State before a non-zero value is returned.
5019 */
5020 static int
nfsrv_clientconflict(struct nfsclient * clp,int * haslockp,vnode_t vp,NFSPROC_T * p)5021 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5022 NFSPROC_T *p)
5023 {
5024 int gotlock, lktype = 0;
5025
5026 /*
5027 * If lease hasn't expired, we can't fix it.
5028 */
5029 if (clp->lc_expiry >= NFSD_MONOSEC ||
5030 !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE))
5031 return (0);
5032 if (*haslockp == 0) {
5033 NFSUNLOCKSTATE();
5034 if (vp != NULL) {
5035 lktype = NFSVOPISLOCKED(vp);
5036 NFSVOPUNLOCK(vp);
5037 }
5038 NFSLOCKV4ROOTMUTEX();
5039 nfsv4_relref(&nfsv4rootfs_lock);
5040 do {
5041 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5042 NFSV4ROOTLOCKMUTEXPTR, NULL);
5043 } while (!gotlock);
5044 NFSUNLOCKV4ROOTMUTEX();
5045 *haslockp = 1;
5046 if (vp != NULL) {
5047 NFSVOPLOCK(vp, lktype | LK_RETRY);
5048 if (VN_IS_DOOMED(vp))
5049 return (2);
5050 }
5051 return (1);
5052 }
5053 NFSUNLOCKSTATE();
5054
5055 /*
5056 * Ok, we can expire the conflicting client.
5057 */
5058 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5059 nfsrv_backupstable();
5060 nfsrv_cleanclient(clp, p, false, NULL);
5061 nfsrv_freedeleglist(&clp->lc_deleg);
5062 nfsrv_freedeleglist(&clp->lc_olddeleg);
5063 LIST_REMOVE(clp, lc_hash);
5064 nfsrv_zapclient(clp, p);
5065 return (1);
5066 }
5067
5068 /*
5069 * Resolve a delegation conflict.
5070 * Returns 0 to indicate the conflict was resolved without sleeping.
5071 * Return -1 to indicate that the caller should check for conflicts again.
5072 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5073 *
5074 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5075 * for a return of 0, since there was no sleep and it could be required
5076 * later. It is released for a return of NFSERR_DELAY, since the caller
5077 * will return that error. It is released when a sleep was done waiting
5078 * for the delegation to be returned or expire (so that other nfsds can
5079 * handle ops). Then, it must be acquired for the write to stable storage.
5080 * (This function is somewhat similar to nfsrv_clientconflict(), but
5081 * the semantics differ in a couple of subtle ways. The return of 0
5082 * indicates the conflict was resolved without sleeping here, not
5083 * that the conflict can't be resolved and the handling of nfsv4root_lock
5084 * differs, as noted above.)
5085 * Unlocks State before returning a non-zero value.
5086 */
5087 static int
nfsrv_delegconflict(struct nfsstate * stp,int * haslockp,NFSPROC_T * p,vnode_t vp)5088 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5089 vnode_t vp)
5090 {
5091 struct nfsclient *clp = stp->ls_clp;
5092 int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5093 nfsv4stateid_t tstateid;
5094 fhandle_t tfh;
5095
5096 /*
5097 * If the conflict is with an old delegation...
5098 */
5099 if (stp->ls_flags & NFSLCK_OLDDELEG) {
5100 /*
5101 * You can delete it, if it has expired.
5102 */
5103 if (clp->lc_delegtime < NFSD_MONOSEC) {
5104 nfsrv_freedeleg(stp);
5105 NFSUNLOCKSTATE();
5106 error = -1;
5107 goto out;
5108 }
5109 NFSUNLOCKSTATE();
5110 /*
5111 * During this delay, the old delegation could expire or it
5112 * could be recovered by the client via an Open with
5113 * CLAIM_DELEGATE_PREV.
5114 * Release the nfsv4root_lock, if held.
5115 */
5116 if (*haslockp) {
5117 *haslockp = 0;
5118 NFSLOCKV4ROOTMUTEX();
5119 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5120 NFSUNLOCKV4ROOTMUTEX();
5121 }
5122 error = NFSERR_DELAY;
5123 goto out;
5124 }
5125
5126 /*
5127 * It's a current delegation, so:
5128 * - check to see if the delegation has expired
5129 * - if so, get the v4root lock and then expire it
5130 */
5131 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
5132 NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
5133 stp->ls_delegtime >= NFSD_MONOSEC)) {
5134 /*
5135 * - do a recall callback, since not yet done
5136 * For now, never allow truncate to be set. To use
5137 * truncate safely, it must be guaranteed that the
5138 * Remove, Rename or Setattr with size of 0 will
5139 * succeed and that would require major changes to
5140 * the VFS/Vnode OPs.
5141 * Set the expiry time large enough so that it won't expire
5142 * until after the callback, then set it correctly, once
5143 * the callback is done. (The delegation will now time
5144 * out whether or not the Recall worked ok. The timeout
5145 * will be extended when ops are done on the delegation
5146 * stateid, up to the timelimit.)
5147 */
5148 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
5149 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5150 NFSRV_LEASEDELTA;
5151 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
5152 nfsrv_lease) + NFSRV_LEASEDELTA;
5153 stp->ls_flags |= NFSLCK_DELEGRECALL;
5154 }
5155 stp->ls_lastrecall = time_uptime + 1;
5156
5157 /*
5158 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5159 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5160 * in order to try and avoid a race that could happen
5161 * when a CBRecall request passed the Open reply with
5162 * the delegation in it when transitting the network.
5163 * Since nfsrv_docallback will sleep, don't use stp after
5164 * the call.
5165 */
5166 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5167 sizeof (tstateid));
5168 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5169 sizeof (tfh));
5170 NFSUNLOCKSTATE();
5171 if (*haslockp) {
5172 *haslockp = 0;
5173 NFSLOCKV4ROOTMUTEX();
5174 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5175 NFSUNLOCKV4ROOTMUTEX();
5176 }
5177 retrycnt = 0;
5178 do {
5179 error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5180 &tstateid, 0, &tfh, NULL, NULL, 0, p);
5181 retrycnt++;
5182 } while ((error == NFSERR_BADSTATEID ||
5183 error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5184 error = NFSERR_DELAY;
5185 goto out;
5186 }
5187
5188 if (clp->lc_expiry >= NFSD_MONOSEC &&
5189 stp->ls_delegtime >= NFSD_MONOSEC) {
5190 NFSUNLOCKSTATE();
5191 /*
5192 * A recall has been done, but it has not yet expired.
5193 * So, RETURN_DELAY.
5194 */
5195 if (*haslockp) {
5196 *haslockp = 0;
5197 NFSLOCKV4ROOTMUTEX();
5198 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5199 NFSUNLOCKV4ROOTMUTEX();
5200 }
5201 error = NFSERR_DELAY;
5202 goto out;
5203 }
5204
5205 /*
5206 * If we don't yet have the lock, just get it and then return,
5207 * since we need that before deleting expired state, such as
5208 * this delegation.
5209 * When getting the lock, unlock the vnode, so other nfsds that
5210 * are in progress, won't get stuck waiting for the vnode lock.
5211 */
5212 if (*haslockp == 0) {
5213 NFSUNLOCKSTATE();
5214 if (vp != NULL) {
5215 lktype = NFSVOPISLOCKED(vp);
5216 NFSVOPUNLOCK(vp);
5217 }
5218 NFSLOCKV4ROOTMUTEX();
5219 nfsv4_relref(&nfsv4rootfs_lock);
5220 do {
5221 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5222 NFSV4ROOTLOCKMUTEXPTR, NULL);
5223 } while (!gotlock);
5224 NFSUNLOCKV4ROOTMUTEX();
5225 *haslockp = 1;
5226 if (vp != NULL) {
5227 NFSVOPLOCK(vp, lktype | LK_RETRY);
5228 if (VN_IS_DOOMED(vp)) {
5229 *haslockp = 0;
5230 NFSLOCKV4ROOTMUTEX();
5231 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5232 NFSUNLOCKV4ROOTMUTEX();
5233 error = NFSERR_PERM;
5234 goto out;
5235 }
5236 }
5237 error = -1;
5238 goto out;
5239 }
5240
5241 NFSUNLOCKSTATE();
5242 /*
5243 * Ok, we can delete the expired delegation.
5244 * First, write the Revoke record to stable storage and then
5245 * clear out the conflict.
5246 * Since all other nfsd threads are now blocked, we can safely
5247 * sleep without the state changing.
5248 */
5249 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5250 nfsrv_backupstable();
5251 if (clp->lc_expiry < NFSD_MONOSEC) {
5252 nfsrv_cleanclient(clp, p, false, NULL);
5253 nfsrv_freedeleglist(&clp->lc_deleg);
5254 nfsrv_freedeleglist(&clp->lc_olddeleg);
5255 LIST_REMOVE(clp, lc_hash);
5256 zapped_clp = 1;
5257 } else {
5258 nfsrv_freedeleg(stp);
5259 zapped_clp = 0;
5260 }
5261 if (zapped_clp)
5262 nfsrv_zapclient(clp, p);
5263 error = -1;
5264
5265 out:
5266 NFSEXITCODE(error);
5267 return (error);
5268 }
5269
5270 /*
5271 * Check for a remove allowed, if remove is set to 1 and get rid of
5272 * delegations.
5273 */
5274 int
nfsrv_checkremove(vnode_t vp,int remove,struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)5275 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
5276 nfsquad_t clientid, NFSPROC_T *p)
5277 {
5278 struct nfsclient *clp;
5279 struct nfsstate *stp;
5280 struct nfslockfile *lfp;
5281 int error, haslock = 0;
5282 fhandle_t nfh;
5283
5284 clp = NULL;
5285 /*
5286 * First, get the lock file structure.
5287 * (A return of -1 means no associated state, so remove ok.)
5288 */
5289 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5290 tryagain:
5291 NFSLOCKSTATE();
5292 if (error == 0 && clientid.qval != 0)
5293 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
5294 (nfsquad_t)((u_quad_t)0), 0, nd, p);
5295 if (!error)
5296 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5297 if (error) {
5298 NFSUNLOCKSTATE();
5299 if (haslock) {
5300 NFSLOCKV4ROOTMUTEX();
5301 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5302 NFSUNLOCKV4ROOTMUTEX();
5303 }
5304 if (error == -1)
5305 error = 0;
5306 goto out;
5307 }
5308
5309 /*
5310 * Now, we must Recall any delegations.
5311 */
5312 error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
5313 if (error) {
5314 /*
5315 * nfsrv_cleandeleg() unlocks state for non-zero
5316 * return.
5317 */
5318 if (error == -1)
5319 goto tryagain;
5320 if (haslock) {
5321 NFSLOCKV4ROOTMUTEX();
5322 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5323 NFSUNLOCKV4ROOTMUTEX();
5324 }
5325 goto out;
5326 }
5327
5328 /*
5329 * Now, look for a conflicting open share.
5330 */
5331 if (remove) {
5332 /*
5333 * If the entry in the directory was the last reference to the
5334 * corresponding filesystem object, the object can be destroyed
5335 * */
5336 if(lfp->lf_usecount>1)
5337 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5338 if (stp->ls_flags & NFSLCK_WRITEDENY) {
5339 error = NFSERR_FILEOPEN;
5340 break;
5341 }
5342 }
5343 }
5344
5345 NFSUNLOCKSTATE();
5346 if (haslock) {
5347 NFSLOCKV4ROOTMUTEX();
5348 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5349 NFSUNLOCKV4ROOTMUTEX();
5350 }
5351
5352 out:
5353 NFSEXITCODE(error);
5354 return (error);
5355 }
5356
5357 /*
5358 * Clear out all delegations for the file referred to by lfp.
5359 * May return NFSERR_DELAY, if there will be a delay waiting for
5360 * delegations to expire.
5361 * Returns -1 to indicate it slept while recalling a delegation.
5362 * This function has the side effect of deleting the nfslockfile structure,
5363 * if it no longer has associated state and didn't have to sleep.
5364 * Unlocks State before a non-zero value is returned.
5365 */
5366 static int
nfsrv_cleandeleg(vnode_t vp,struct nfslockfile * lfp,struct nfsclient * clp,int * haslockp,NFSPROC_T * p)5367 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5368 struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5369 {
5370 struct nfsstate *stp, *nstp;
5371 int ret = 0;
5372
5373 stp = LIST_FIRST(&lfp->lf_deleg);
5374 while (stp != LIST_END(&lfp->lf_deleg)) {
5375 nstp = LIST_NEXT(stp, ls_file);
5376 if (stp->ls_clp != clp) {
5377 ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5378 if (ret) {
5379 /*
5380 * nfsrv_delegconflict() unlocks state
5381 * when it returns non-zero.
5382 */
5383 goto out;
5384 }
5385 }
5386 stp = nstp;
5387 }
5388 out:
5389 NFSEXITCODE(ret);
5390 return (ret);
5391 }
5392
5393 /*
5394 * There are certain operations that, when being done outside of NFSv4,
5395 * require that any NFSv4 delegation for the file be recalled.
5396 * This function is to be called for those cases:
5397 * VOP_RENAME() - When a delegation is being recalled for any reason,
5398 * the client may have to do Opens against the server, using the file's
5399 * final component name. If the file has been renamed on the server,
5400 * that component name will be incorrect and the Open will fail.
5401 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5402 * been removed on the server, if there is a delegation issued to
5403 * that client for the file. I say "theoretically" since clients
5404 * normally do an Access Op before the Open and that Access Op will
5405 * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5406 * they will detect the file's removal in the same manner. (There is
5407 * one case where RFC3530 allows a client to do an Open without first
5408 * doing an Access Op, which is passage of a check against the ACE
5409 * returned with a Write delegation, but current practice is to ignore
5410 * the ACE and always do an Access Op.)
5411 * Since the functions can only be called with an unlocked vnode, this
5412 * can't be done at this time.
5413 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5414 * locks locally in the client, which are not visible to the server. To
5415 * deal with this, issuing of delegations for a vnode must be disabled
5416 * and all delegations for the vnode recalled. This is done via the
5417 * second function, using the VV_DISABLEDELEG vflag on the vnode.
5418 */
5419 void
nfsd_recalldelegation(vnode_t vp,NFSPROC_T * p)5420 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5421 {
5422 time_t starttime;
5423 int error;
5424
5425 /*
5426 * First, check to see if the server is currently running and it has
5427 * been called for a regular file when issuing delegations.
5428 */
5429 if (VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG ||
5430 nfsrv_issuedelegs == 0)
5431 return;
5432
5433 KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5434 /*
5435 * First, get a reference on the nfsv4rootfs_lock so that an
5436 * exclusive lock cannot be acquired by another thread.
5437 */
5438 NFSLOCKV4ROOTMUTEX();
5439 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5440 NFSUNLOCKV4ROOTMUTEX();
5441
5442 /*
5443 * Now, call nfsrv_checkremove() in a loop while it returns
5444 * NFSERR_DELAY. Return upon any other error or when timed out.
5445 */
5446 starttime = NFSD_MONOSEC;
5447 do {
5448 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5449 error = nfsrv_checkremove(vp, 0, NULL,
5450 (nfsquad_t)((u_quad_t)0), p);
5451 NFSVOPUNLOCK(vp);
5452 } else
5453 error = EPERM;
5454 if (error == NFSERR_DELAY) {
5455 if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5456 break;
5457 /* Sleep for a short period of time */
5458 (void) nfs_catnap(PZERO, 0, "nfsremove");
5459 }
5460 } while (error == NFSERR_DELAY);
5461 NFSLOCKV4ROOTMUTEX();
5462 nfsv4_relref(&nfsv4rootfs_lock);
5463 NFSUNLOCKV4ROOTMUTEX();
5464 }
5465
5466 void
nfsd_disabledelegation(vnode_t vp,NFSPROC_T * p)5467 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5468 {
5469
5470 #ifdef VV_DISABLEDELEG
5471 /*
5472 * First, flag issuance of delegations disabled.
5473 */
5474 atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5475 #endif
5476
5477 /*
5478 * Then call nfsd_recalldelegation() to get rid of all extant
5479 * delegations.
5480 */
5481 nfsd_recalldelegation(vp, p);
5482 }
5483
5484 /*
5485 * Check for conflicting locks, etc. and then get rid of delegations.
5486 * (At one point I thought that I should get rid of delegations for any
5487 * Setattr, since it could potentially disallow the I/O op (read or write)
5488 * allowed by the delegation. However, Setattr Ops that aren't changing
5489 * the size get a stateid of all 0s, so you can't tell if it is a delegation
5490 * for the same client or a different one, so I decided to only get rid
5491 * of delegations for other clients when the size is being changed.)
5492 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5493 * as Write backs, even if there is no delegation, so it really isn't any
5494 * different?)
5495 */
5496 int
nfsrv_checksetattr(vnode_t vp,struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,struct nfsexstuff * exp,NFSPROC_T * p)5497 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5498 nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5499 struct nfsexstuff *exp, NFSPROC_T *p)
5500 {
5501 struct nfsstate st, *stp = &st;
5502 struct nfslock lo, *lop = &lo;
5503 int error = 0;
5504 nfsquad_t clientid;
5505
5506 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5507 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5508 lop->lo_first = nvap->na_size;
5509 } else {
5510 stp->ls_flags = 0;
5511 lop->lo_first = 0;
5512 }
5513 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5514 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5515 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5516 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5517 stp->ls_flags |= NFSLCK_SETATTR;
5518 if (stp->ls_flags == 0)
5519 goto out;
5520 lop->lo_end = NFS64BITSSET;
5521 lop->lo_flags = NFSLCK_WRITE;
5522 stp->ls_ownerlen = 0;
5523 stp->ls_op = NULL;
5524 stp->ls_uid = nd->nd_cred->cr_uid;
5525 stp->ls_stateid.seqid = stateidp->seqid;
5526 clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5527 clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5528 stp->ls_stateid.other[2] = stateidp->other[2];
5529 error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5530 stateidp, exp, nd, p);
5531
5532 out:
5533 NFSEXITCODE2(error, nd);
5534 return (error);
5535 }
5536
5537 /*
5538 * Check for a write delegation and do a CBGETATTR if there is one, updating
5539 * the attributes, as required.
5540 * Should I return an error if I can't get the attributes? (For now, I'll
5541 * just return ok.
5542 */
5543 int
nfsrv_checkgetattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSPROC_T * p)5544 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5545 struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
5546 {
5547 struct nfsstate *stp;
5548 struct nfslockfile *lfp;
5549 struct nfsclient *clp;
5550 struct nfsvattr nva;
5551 fhandle_t nfh;
5552 int error = 0;
5553 nfsattrbit_t cbbits;
5554 u_quad_t delegfilerev;
5555
5556 NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5557 if (!NFSNONZERO_ATTRBIT(&cbbits))
5558 goto out;
5559 if (nfsrv_writedelegcnt == 0)
5560 goto out;
5561
5562 /*
5563 * Get the lock file structure.
5564 * (A return of -1 means no associated state, so return ok.)
5565 */
5566 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5567 NFSLOCKSTATE();
5568 if (!error)
5569 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5570 if (error) {
5571 NFSUNLOCKSTATE();
5572 if (error == -1)
5573 error = 0;
5574 goto out;
5575 }
5576
5577 /*
5578 * Now, look for a write delegation.
5579 */
5580 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5581 if (stp->ls_flags & NFSLCK_DELEGWRITE)
5582 break;
5583 }
5584 if (stp == LIST_END(&lfp->lf_deleg)) {
5585 NFSUNLOCKSTATE();
5586 goto out;
5587 }
5588 clp = stp->ls_clp;
5589
5590 /* If the clientid is not confirmed, ignore the delegation. */
5591 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5592 NFSUNLOCKSTATE();
5593 goto out;
5594 }
5595
5596 delegfilerev = stp->ls_filerev;
5597 /*
5598 * If the Write delegation was issued as a part of this Compound RPC
5599 * or if we have an Implied Clientid (used in a previous Op in this
5600 * compound) and it is the client the delegation was issued to,
5601 * just return ok.
5602 * I also assume that it is from the same client iff the network
5603 * host IP address is the same as the callback address. (Not
5604 * exactly correct by the RFC, but avoids a lot of Getattr
5605 * callbacks.)
5606 */
5607 if (nd->nd_compref == stp->ls_compref ||
5608 ((nd->nd_flag & ND_IMPLIEDCLID) &&
5609 clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5610 nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5611 NFSUNLOCKSTATE();
5612 goto out;
5613 }
5614
5615 /*
5616 * We are now done with the delegation state structure,
5617 * so the statelock can be released and we can now tsleep().
5618 */
5619
5620 /*
5621 * Now, we must do the CB Getattr callback, to see if Change or Size
5622 * has changed.
5623 */
5624 if (clp->lc_expiry >= NFSD_MONOSEC) {
5625 NFSUNLOCKSTATE();
5626 NFSVNO_ATTRINIT(&nva);
5627 nva.na_filerev = NFS64BITSSET;
5628 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5629 0, &nfh, &nva, &cbbits, 0, p);
5630 if (!error) {
5631 if ((nva.na_filerev != NFS64BITSSET &&
5632 nva.na_filerev > delegfilerev) ||
5633 (NFSVNO_ISSETSIZE(&nva) &&
5634 nva.na_size != nvap->na_size)) {
5635 error = nfsvno_updfilerev(vp, nvap, nd, p);
5636 if (NFSVNO_ISSETSIZE(&nva))
5637 nvap->na_size = nva.na_size;
5638 }
5639 } else
5640 error = 0; /* Ignore callback errors for now. */
5641 } else {
5642 NFSUNLOCKSTATE();
5643 }
5644
5645 out:
5646 NFSEXITCODE2(error, nd);
5647 return (error);
5648 }
5649
5650 /*
5651 * This function looks for openowners that haven't had any opens for
5652 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5653 * is set.
5654 */
5655 void
nfsrv_throwawayopens(NFSPROC_T * p)5656 nfsrv_throwawayopens(NFSPROC_T *p)
5657 {
5658 struct nfsclient *clp, *nclp;
5659 struct nfsstate *stp, *nstp;
5660 int i;
5661
5662 NFSLOCKSTATE();
5663 VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS;
5664 /*
5665 * For each client...
5666 */
5667 for (i = 0; i < nfsrv_clienthashsize; i++) {
5668 LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash,
5669 nclp) {
5670 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5671 if (LIST_EMPTY(&stp->ls_open) &&
5672 (stp->ls_noopens > NFSNOOPEN ||
5673 (nfsrv_openpluslock * 2) >
5674 nfsrv_v4statelimit))
5675 nfsrv_freeopenowner(stp, 0, p);
5676 }
5677 }
5678 }
5679 NFSUNLOCKSTATE();
5680 }
5681
5682 /*
5683 * This function checks to see if the credentials are the same.
5684 * The check for same credentials is needed for state management operations
5685 * for NFSv4.0 or NFSv4.1/4.2 when SP4_MACH_CRED is configured via
5686 * ExchangeID.
5687 * Returns 1 for not same, 0 otherwise.
5688 */
5689 static int
nfsrv_notsamecredname(int op,struct nfsrv_descript * nd,struct nfsclient * clp)5690 nfsrv_notsamecredname(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
5691 {
5692
5693 /* Check for the SP4_MACH_CRED case. */
5694 if (op != 0 && nfsrv_checkmachcred(op, nd, clp) != 0)
5695 return (1);
5696
5697 /* For NFSv4.1/4.2, SP4_NONE always allows this. */
5698 if ((nd->nd_flag & ND_NFSV41) != 0)
5699 return (0);
5700
5701 if (nd->nd_flag & ND_GSS) {
5702 if (!(clp->lc_flags & LCL_GSS))
5703 return (1);
5704 if (clp->lc_flags & LCL_NAME) {
5705 if (nd->nd_princlen != clp->lc_namelen ||
5706 NFSBCMP(nd->nd_principal, clp->lc_name,
5707 clp->lc_namelen))
5708 return (1);
5709 else
5710 return (0);
5711 }
5712 if (nd->nd_cred->cr_uid == clp->lc_uid)
5713 return (0);
5714 else
5715 return (1);
5716 } else if (clp->lc_flags & LCL_GSS)
5717 return (1);
5718 /*
5719 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5720 * in RFC3530, which talks about principals, but doesn't say anything
5721 * about uids for AUTH_SYS.)
5722 */
5723 if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5724 return (0);
5725 else
5726 return (1);
5727 }
5728
5729 /*
5730 * Calculate the lease expiry time.
5731 */
5732 static time_t
nfsrv_leaseexpiry(void)5733 nfsrv_leaseexpiry(void)
5734 {
5735
5736 if (VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC)
5737 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5738 return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5739 }
5740
5741 /*
5742 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5743 */
5744 static void
nfsrv_delaydelegtimeout(struct nfsstate * stp)5745 nfsrv_delaydelegtimeout(struct nfsstate *stp)
5746 {
5747
5748 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5749 return;
5750
5751 if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5752 stp->ls_delegtime < stp->ls_delegtimelimit) {
5753 stp->ls_delegtime += nfsrv_lease;
5754 if (stp->ls_delegtime > stp->ls_delegtimelimit)
5755 stp->ls_delegtime = stp->ls_delegtimelimit;
5756 }
5757 }
5758
5759 /*
5760 * This function checks to see if there is any other state associated
5761 * with the openowner for this Open.
5762 * It returns 1 if there is no other state, 0 otherwise.
5763 */
5764 static int
nfsrv_nootherstate(struct nfsstate * stp)5765 nfsrv_nootherstate(struct nfsstate *stp)
5766 {
5767 struct nfsstate *tstp;
5768
5769 LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5770 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5771 return (0);
5772 }
5773 return (1);
5774 }
5775
5776 /*
5777 * Create a list of lock deltas (changes to local byte range locking
5778 * that can be rolled back using the list) and apply the changes via
5779 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5780 * the rollback or update function will be called after this.
5781 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5782 * call fails. If it returns an error, it will unlock the list.
5783 */
5784 static int
nfsrv_locallock(vnode_t vp,struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)5785 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5786 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5787 {
5788 struct nfslock *lop, *nlop;
5789 int error = 0;
5790
5791 /* Loop through the list of locks. */
5792 lop = LIST_FIRST(&lfp->lf_locallock);
5793 while (first < end && lop != NULL) {
5794 nlop = LIST_NEXT(lop, lo_lckowner);
5795 if (first >= lop->lo_end) {
5796 /* not there yet */
5797 lop = nlop;
5798 } else if (first < lop->lo_first) {
5799 /* new one starts before entry in list */
5800 if (end <= lop->lo_first) {
5801 /* no overlap between old and new */
5802 error = nfsrv_dolocal(vp, lfp, flags,
5803 NFSLCK_UNLOCK, first, end, cfp, p);
5804 if (error != 0)
5805 break;
5806 first = end;
5807 } else {
5808 /* handle fragment overlapped with new one */
5809 error = nfsrv_dolocal(vp, lfp, flags,
5810 NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5811 p);
5812 if (error != 0)
5813 break;
5814 first = lop->lo_first;
5815 }
5816 } else {
5817 /* new one overlaps this entry in list */
5818 if (end <= lop->lo_end) {
5819 /* overlaps all of new one */
5820 error = nfsrv_dolocal(vp, lfp, flags,
5821 lop->lo_flags, first, end, cfp, p);
5822 if (error != 0)
5823 break;
5824 first = end;
5825 } else {
5826 /* handle fragment overlapped with new one */
5827 error = nfsrv_dolocal(vp, lfp, flags,
5828 lop->lo_flags, first, lop->lo_end, cfp, p);
5829 if (error != 0)
5830 break;
5831 first = lop->lo_end;
5832 lop = nlop;
5833 }
5834 }
5835 }
5836 if (first < end && error == 0)
5837 /* handle fragment past end of list */
5838 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5839 end, cfp, p);
5840
5841 NFSEXITCODE(error);
5842 return (error);
5843 }
5844
5845 /*
5846 * Local lock unlock. Unlock all byte ranges that are no longer locked
5847 * by NFSv4. To do this, unlock any subranges of first-->end that
5848 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5849 * list. This list has all locks for the file held by other
5850 * <clientid, lockowner> tuples. The list is ordered by increasing
5851 * lo_first value, but may have entries that overlap each other, for
5852 * the case of read locks.
5853 */
5854 static void
nfsrv_localunlock(vnode_t vp,struct nfslockfile * lfp,uint64_t init_first,uint64_t init_end,NFSPROC_T * p)5855 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5856 uint64_t init_end, NFSPROC_T *p)
5857 {
5858 struct nfslock *lop;
5859 uint64_t first, end, prevfirst __unused;
5860
5861 first = init_first;
5862 end = init_end;
5863 while (first < init_end) {
5864 /* Loop through all nfs locks, adjusting first and end */
5865 prevfirst = 0;
5866 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5867 KASSERT(prevfirst <= lop->lo_first,
5868 ("nfsv4 locks out of order"));
5869 KASSERT(lop->lo_first < lop->lo_end,
5870 ("nfsv4 bogus lock"));
5871 prevfirst = lop->lo_first;
5872 if (first >= lop->lo_first &&
5873 first < lop->lo_end)
5874 /*
5875 * Overlaps with initial part, so trim
5876 * off that initial part by moving first past
5877 * it.
5878 */
5879 first = lop->lo_end;
5880 else if (end > lop->lo_first &&
5881 lop->lo_first > first) {
5882 /*
5883 * This lock defines the end of the
5884 * segment to unlock, so set end to the
5885 * start of it and break out of the loop.
5886 */
5887 end = lop->lo_first;
5888 break;
5889 }
5890 if (first >= end)
5891 /*
5892 * There is no segment left to do, so
5893 * break out of this loop and then exit
5894 * the outer while() since first will be set
5895 * to end, which must equal init_end here.
5896 */
5897 break;
5898 }
5899 if (first < end) {
5900 /* Unlock this segment */
5901 (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5902 NFSLCK_READ, first, end, NULL, p);
5903 nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5904 first, end);
5905 }
5906 /*
5907 * Now move past this segment and look for any further
5908 * segment in the range, if there is one.
5909 */
5910 first = end;
5911 end = init_end;
5912 }
5913 }
5914
5915 /*
5916 * Do the local lock operation and update the rollback list, as required.
5917 * Perform the rollback and return the error if nfsvno_advlock() fails.
5918 */
5919 static int
nfsrv_dolocal(vnode_t vp,struct nfslockfile * lfp,int flags,int oldflags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)5920 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5921 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5922 {
5923 struct nfsrollback *rlp;
5924 int error = 0, ltype, oldltype;
5925
5926 if (flags & NFSLCK_WRITE)
5927 ltype = F_WRLCK;
5928 else if (flags & NFSLCK_READ)
5929 ltype = F_RDLCK;
5930 else
5931 ltype = F_UNLCK;
5932 if (oldflags & NFSLCK_WRITE)
5933 oldltype = F_WRLCK;
5934 else if (oldflags & NFSLCK_READ)
5935 oldltype = F_RDLCK;
5936 else
5937 oldltype = F_UNLCK;
5938 if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5939 /* nothing to do */
5940 goto out;
5941 error = nfsvno_advlock(vp, ltype, first, end, p);
5942 if (error != 0) {
5943 if (cfp != NULL) {
5944 cfp->cl_clientid.lval[0] = 0;
5945 cfp->cl_clientid.lval[1] = 0;
5946 cfp->cl_first = 0;
5947 cfp->cl_end = NFS64BITSSET;
5948 cfp->cl_flags = NFSLCK_WRITE;
5949 cfp->cl_ownerlen = 5;
5950 NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5951 }
5952 nfsrv_locallock_rollback(vp, lfp, p);
5953 } else if (ltype != F_UNLCK) {
5954 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5955 M_WAITOK);
5956 rlp->rlck_first = first;
5957 rlp->rlck_end = end;
5958 rlp->rlck_type = oldltype;
5959 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5960 }
5961
5962 out:
5963 NFSEXITCODE(error);
5964 return (error);
5965 }
5966
5967 /*
5968 * Roll back local lock changes and free up the rollback list.
5969 */
5970 static void
nfsrv_locallock_rollback(vnode_t vp,struct nfslockfile * lfp,NFSPROC_T * p)5971 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5972 {
5973 struct nfsrollback *rlp, *nrlp;
5974
5975 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5976 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5977 rlp->rlck_end, p);
5978 free(rlp, M_NFSDROLLBACK);
5979 }
5980 LIST_INIT(&lfp->lf_rollback);
5981 }
5982
5983 /*
5984 * Update local lock list and delete rollback list (ie now committed to the
5985 * local locks). Most of the work is done by the internal function.
5986 */
5987 static void
nfsrv_locallock_commit(struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end)5988 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5989 uint64_t end)
5990 {
5991 struct nfsrollback *rlp, *nrlp;
5992 struct nfslock *new_lop, *other_lop;
5993
5994 new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5995 if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5996 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5997 M_WAITOK);
5998 else
5999 other_lop = NULL;
6000 new_lop->lo_flags = flags;
6001 new_lop->lo_first = first;
6002 new_lop->lo_end = end;
6003 nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
6004 if (new_lop != NULL)
6005 free(new_lop, M_NFSDLOCK);
6006 if (other_lop != NULL)
6007 free(other_lop, M_NFSDLOCK);
6008
6009 /* and get rid of the rollback list */
6010 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
6011 free(rlp, M_NFSDROLLBACK);
6012 LIST_INIT(&lfp->lf_rollback);
6013 }
6014
6015 /*
6016 * Lock the struct nfslockfile for local lock updating.
6017 */
6018 static void
nfsrv_locklf(struct nfslockfile * lfp)6019 nfsrv_locklf(struct nfslockfile *lfp)
6020 {
6021 int gotlock;
6022
6023 /* lf_usecount ensures *lfp won't be free'd */
6024 lfp->lf_usecount++;
6025 do {
6026 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6027 NFSSTATEMUTEXPTR, NULL);
6028 } while (gotlock == 0);
6029 lfp->lf_usecount--;
6030 }
6031
6032 /*
6033 * Unlock the struct nfslockfile after local lock updating.
6034 */
6035 static void
nfsrv_unlocklf(struct nfslockfile * lfp)6036 nfsrv_unlocklf(struct nfslockfile *lfp)
6037 {
6038
6039 nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6040 }
6041
6042 /*
6043 * Clear out all state for the NFSv4 server.
6044 * Must be called by a thread that can sleep when no nfsds are running.
6045 */
6046 void
nfsrv_throwawayallstate(NFSPROC_T * p)6047 nfsrv_throwawayallstate(NFSPROC_T *p)
6048 {
6049 struct nfsclient *clp, *nclp;
6050 struct nfslockfile *lfp, *nlfp;
6051 int i;
6052
6053 /*
6054 * For each client, clean out the state and then free the structure.
6055 */
6056 for (i = 0; i < nfsrv_clienthashsize; i++) {
6057 LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash,
6058 nclp) {
6059 nfsrv_cleanclient(clp, p, false, NULL);
6060 nfsrv_freedeleglist(&clp->lc_deleg);
6061 nfsrv_freedeleglist(&clp->lc_olddeleg);
6062 free(clp->lc_stateid, M_NFSDCLIENT);
6063 free(clp, M_NFSDCLIENT);
6064 }
6065 }
6066
6067 /*
6068 * Also, free up any remaining lock file structures.
6069 */
6070 for (i = 0; i < nfsrv_lockhashsize; i++) {
6071 LIST_FOREACH_SAFE(lfp, &VNET(nfslockhash)[i], lf_hash,
6072 nlfp) {
6073 printf("nfsd unload: fnd a lock file struct\n");
6074 nfsrv_freenfslockfile(lfp);
6075 }
6076 }
6077
6078 /* And get rid of the deviceid structures and layouts. */
6079 nfsrv_freealllayoutsanddevids();
6080 }
6081
6082 /*
6083 * Check the sequence# for the session and slot provided as an argument.
6084 * Also, renew the lease if the session will return NFS_OK.
6085 */
6086 int
nfsrv_checksequence(struct nfsrv_descript * nd,uint32_t sequenceid,uint32_t * highest_slotidp,uint32_t * target_highest_slotidp,int cache_this,uint32_t * sflagsp,NFSPROC_T * p)6087 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6088 uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6089 uint32_t *sflagsp, NFSPROC_T *p)
6090 {
6091 struct nfsdsession *sep;
6092 struct nfssessionhash *shp;
6093 int error;
6094
6095 shp = NFSSESSIONHASH(nd->nd_sessionid);
6096 NFSLOCKSESSION(shp);
6097 sep = nfsrv_findsession(nd->nd_sessionid);
6098 if (sep == NULL) {
6099 NFSUNLOCKSESSION(shp);
6100 return (NFSERR_BADSESSION);
6101 }
6102 error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6103 sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6104 if (error != 0) {
6105 NFSUNLOCKSESSION(shp);
6106 return (error);
6107 }
6108 if (cache_this != 0)
6109 nd->nd_flag |= ND_SAVEREPLY;
6110 /* Renew the lease. */
6111 sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6112 nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6113 nd->nd_flag |= ND_IMPLIEDCLID;
6114
6115 /* Handle the SP4_MECH_CRED case for NFSv4.1/4.2. */
6116 if ((sep->sess_clp->lc_flags & LCL_MACHCRED) != 0 &&
6117 (nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
6118 nd->nd_princlen == sep->sess_clp->lc_namelen &&
6119 !NFSBCMP(sep->sess_clp->lc_name, nd->nd_principal,
6120 nd->nd_princlen)) {
6121 nd->nd_flag |= ND_MACHCRED;
6122 NFSSET_OPBIT(&nd->nd_allowops, &sep->sess_clp->lc_allowops);
6123 }
6124
6125 /* Save maximum request and reply sizes. */
6126 nd->nd_maxreq = sep->sess_maxreq;
6127 nd->nd_maxresp = sep->sess_maxresp;
6128
6129 *sflagsp = 0;
6130 if (sep->sess_clp->lc_req.nr_client == NULL ||
6131 (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
6132 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
6133 NFSUNLOCKSESSION(shp);
6134 if (error == NFSERR_EXPIRED) {
6135 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6136 error = 0;
6137 } else if (error == NFSERR_ADMINREVOKED) {
6138 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6139 error = 0;
6140 }
6141 *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6142 return (0);
6143 }
6144
6145 /*
6146 * Check/set reclaim complete for this session/clientid.
6147 */
6148 int
nfsrv_checkreclaimcomplete(struct nfsrv_descript * nd,int onefs)6149 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6150 {
6151 struct nfsdsession *sep;
6152 struct nfssessionhash *shp;
6153 int error = 0;
6154
6155 shp = NFSSESSIONHASH(nd->nd_sessionid);
6156 NFSLOCKSTATE();
6157 NFSLOCKSESSION(shp);
6158 sep = nfsrv_findsession(nd->nd_sessionid);
6159 if (sep == NULL) {
6160 NFSUNLOCKSESSION(shp);
6161 NFSUNLOCKSTATE();
6162 return (NFSERR_BADSESSION);
6163 }
6164
6165 if (onefs != 0)
6166 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6167 /* Check to see if reclaim complete has already happened. */
6168 else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6169 error = NFSERR_COMPLETEALREADY;
6170 else {
6171 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6172 nfsrv_markreclaim(sep->sess_clp);
6173 }
6174 NFSUNLOCKSESSION(shp);
6175 NFSUNLOCKSTATE();
6176 return (error);
6177 }
6178
6179 /*
6180 * Cache the reply in a session slot.
6181 */
6182 void
nfsrv_cache_session(struct nfsrv_descript * nd,struct mbuf ** m)6183 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6184 {
6185 struct nfsdsession *sep;
6186 struct nfssessionhash *shp;
6187 char *buf, *cp;
6188 #ifdef INET
6189 struct sockaddr_in *sin;
6190 #endif
6191 #ifdef INET6
6192 struct sockaddr_in6 *sin6;
6193 #endif
6194
6195 shp = NFSSESSIONHASH(nd->nd_sessionid);
6196 NFSLOCKSESSION(shp);
6197 sep = nfsrv_findsession(nd->nd_sessionid);
6198 if (sep == NULL) {
6199 NFSUNLOCKSESSION(shp);
6200 if ((VNET(nfsrv_stablefirst).nsf_flags &
6201 NFSNSF_GRACEOVER) != 0) {
6202 buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6203 switch (nd->nd_nam->sa_family) {
6204 #ifdef INET
6205 case AF_INET:
6206 sin = (struct sockaddr_in *)nd->nd_nam;
6207 cp = inet_ntop(sin->sin_family,
6208 &sin->sin_addr.s_addr, buf,
6209 INET6_ADDRSTRLEN);
6210 break;
6211 #endif
6212 #ifdef INET6
6213 case AF_INET6:
6214 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6215 cp = inet_ntop(sin6->sin6_family,
6216 &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6217 break;
6218 #endif
6219 default:
6220 cp = NULL;
6221 }
6222 if (cp != NULL)
6223 printf("nfsrv_cache_session: no session "
6224 "IPaddr=%s, check NFS clients for unique "
6225 "/etc/hostid's\n", cp);
6226 else
6227 printf("nfsrv_cache_session: no session, "
6228 "check NFS clients for unique "
6229 "/etc/hostid's\n");
6230 free(buf, M_TEMP);
6231 }
6232 m_freem(*m);
6233 return;
6234 }
6235 nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6236 m);
6237 NFSUNLOCKSESSION(shp);
6238 }
6239
6240 /*
6241 * Search for a session that matches the sessionid.
6242 */
6243 static struct nfsdsession *
nfsrv_findsession(uint8_t * sessionid)6244 nfsrv_findsession(uint8_t *sessionid)
6245 {
6246 struct nfsdsession *sep;
6247 struct nfssessionhash *shp;
6248
6249 shp = NFSSESSIONHASH(sessionid);
6250 LIST_FOREACH(sep, &shp->list, sess_hash) {
6251 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6252 break;
6253 }
6254 return (sep);
6255 }
6256
6257 /*
6258 * Destroy a session.
6259 */
6260 int
nfsrv_destroysession(struct nfsrv_descript * nd,uint8_t * sessionid)6261 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6262 {
6263 int error, igotlock, samesess;
6264
6265 samesess = 0;
6266 if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6267 (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6268 samesess = 1;
6269 if ((nd->nd_flag & ND_LASTOP) == 0)
6270 return (NFSERR_BADSESSION);
6271 }
6272
6273 /* Lock out other nfsd threads */
6274 NFSLOCKV4ROOTMUTEX();
6275 nfsv4_relref(&nfsv4rootfs_lock);
6276 do {
6277 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6278 NFSV4ROOTLOCKMUTEXPTR, NULL);
6279 } while (igotlock == 0);
6280 NFSUNLOCKV4ROOTMUTEX();
6281
6282 error = nfsrv_freesession(nd, NULL, sessionid, false, NULL);
6283 if (error == 0 && samesess != 0)
6284 nd->nd_flag &= ~ND_HASSEQUENCE;
6285
6286 NFSLOCKV4ROOTMUTEX();
6287 nfsv4_unlock(&nfsv4rootfs_lock, 1);
6288 NFSUNLOCKV4ROOTMUTEX();
6289 return (error);
6290 }
6291
6292 /*
6293 * Bind a connection to a session.
6294 * For now, only certain variants are supported, since the current session
6295 * structure can only handle a single backchannel entry, which will be
6296 * applied to all connections if it is set.
6297 */
6298 int
nfsrv_bindconnsess(struct nfsrv_descript * nd,uint8_t * sessionid,int * foreaftp)6299 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6300 {
6301 struct nfssessionhash *shp;
6302 struct nfsdsession *sep;
6303 struct nfsclient *clp;
6304 SVCXPRT *savxprt;
6305 int error;
6306
6307 error = 0;
6308 savxprt = NULL;
6309 shp = NFSSESSIONHASH(sessionid);
6310 NFSLOCKSTATE();
6311 NFSLOCKSESSION(shp);
6312 sep = nfsrv_findsession(sessionid);
6313 if (sep != NULL) {
6314 clp = sep->sess_clp;
6315 error = nfsrv_checkmachcred(NFSV4OP_BINDCONNTOSESS, nd, clp);
6316 if (error != 0)
6317 goto out;
6318 if (*foreaftp == NFSCDFC4_BACK ||
6319 *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6320 *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6321 /* Try to set up a backchannel. */
6322 if (clp->lc_req.nr_client == NULL) {
6323 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6324 "backchannel\n");
6325 clp->lc_req.nr_client = (struct __rpc_client *)
6326 clnt_bck_create(nd->nd_xprt->xp_socket,
6327 sep->sess_cbprogram, NFSV4_CBVERS);
6328 }
6329 if (clp->lc_req.nr_client != NULL) {
6330 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6331 "backchannel\n");
6332 savxprt = sep->sess_cbsess.nfsess_xprt;
6333 SVC_ACQUIRE(nd->nd_xprt);
6334 CLNT_ACQUIRE(clp->lc_req.nr_client);
6335 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
6336 /* Disable idle timeout. */
6337 nd->nd_xprt->xp_idletimeout = 0;
6338 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6339 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6340 clp->lc_flags |= LCL_DONEBINDCONN |
6341 LCL_NEEDSCBNULL;
6342 clp->lc_flags &= ~LCL_CBDOWN;
6343 if (*foreaftp == NFSCDFS4_BACK)
6344 *foreaftp = NFSCDFS4_BACK;
6345 else
6346 *foreaftp = NFSCDFS4_BOTH;
6347 } else if (*foreaftp != NFSCDFC4_BACK) {
6348 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6349 "up backchannel\n");
6350 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6351 clp->lc_flags |= LCL_DONEBINDCONN;
6352 *foreaftp = NFSCDFS4_FORE;
6353 } else {
6354 error = NFSERR_NOTSUPP;
6355 printf("nfsrv_bindconnsess: Can't add "
6356 "backchannel\n");
6357 }
6358 } else {
6359 NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6360 clp->lc_flags |= LCL_DONEBINDCONN;
6361 *foreaftp = NFSCDFS4_FORE;
6362 }
6363 } else
6364 error = NFSERR_BADSESSION;
6365 out:
6366 NFSUNLOCKSESSION(shp);
6367 NFSUNLOCKSTATE();
6368 if (savxprt != NULL)
6369 SVC_RELEASE(savxprt);
6370 return (error);
6371 }
6372
6373 /*
6374 * Free up a session structure.
6375 */
6376 static int
nfsrv_freesession(struct nfsrv_descript * nd,struct nfsdsession * sep,uint8_t * sessionid,bool locked,SVCXPRT ** old_xprtp)6377 nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
6378 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp)
6379 {
6380 struct nfssessionhash *shp;
6381 int i;
6382
6383 if (!locked)
6384 NFSLOCKSTATE();
6385 if (sep == NULL) {
6386 shp = NFSSESSIONHASH(sessionid);
6387 NFSLOCKSESSION(shp);
6388 sep = nfsrv_findsession(sessionid);
6389 } else {
6390 shp = NFSSESSIONHASH(sep->sess_sessionid);
6391 NFSLOCKSESSION(shp);
6392 }
6393 if (sep != NULL) {
6394 /* Check for the SP4_MACH_CRED case. */
6395 if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
6396 nd, sep->sess_clp) != 0) {
6397 NFSUNLOCKSESSION(shp);
6398 if (!locked)
6399 NFSUNLOCKSTATE();
6400 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
6401 }
6402
6403 sep->sess_refcnt--;
6404 if (sep->sess_refcnt > 0) {
6405 NFSUNLOCKSESSION(shp);
6406 if (!locked)
6407 NFSUNLOCKSTATE();
6408 return (NFSERR_BACKCHANBUSY);
6409 }
6410 LIST_REMOVE(sep, sess_hash);
6411 LIST_REMOVE(sep, sess_list);
6412 }
6413 NFSUNLOCKSESSION(shp);
6414 if (!locked)
6415 NFSUNLOCKSTATE();
6416 if (sep == NULL)
6417 return (NFSERR_BADSESSION);
6418 for (i = 0; i < NFSV4_SLOTS; i++)
6419 if (sep->sess_slots[i].nfssl_reply != NULL)
6420 m_freem(sep->sess_slots[i].nfssl_reply);
6421 if (!locked) {
6422 if (sep->sess_cbsess.nfsess_xprt != NULL)
6423 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6424 if (old_xprtp != NULL)
6425 *old_xprtp = NULL;
6426 } else if (old_xprtp != NULL)
6427 *old_xprtp = sep->sess_cbsess.nfsess_xprt;
6428 free(sep, M_NFSDSESSION);
6429 return (0);
6430 }
6431
6432 /*
6433 * Free a stateid.
6434 * RFC5661 says that it should fail when there are associated opens, locks
6435 * or delegations. Since stateids represent opens, I don't see how you can
6436 * free an open stateid (it will be free'd when closed), so this function
6437 * only works for lock stateids (freeing the lock_owner) or delegations.
6438 */
6439 int
nfsrv_freestateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6440 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6441 NFSPROC_T *p)
6442 {
6443 struct nfsclient *clp;
6444 struct nfsstate *stp;
6445 int error;
6446
6447 NFSLOCKSTATE();
6448 /*
6449 * Look up the stateid
6450 */
6451 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6452 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6453 if (error == 0) {
6454 /* First, check for a delegation. */
6455 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6456 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6457 NFSX_STATEIDOTHER))
6458 break;
6459 }
6460 if (stp != NULL) {
6461 nfsrv_freedeleg(stp);
6462 NFSUNLOCKSTATE();
6463 return (error);
6464 }
6465 }
6466 /* Not a delegation, try for a lock_owner. */
6467 if (error == 0)
6468 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6469 if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6470 NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6471 /* Not a lock_owner stateid. */
6472 error = NFSERR_LOCKSHELD;
6473 if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6474 error = NFSERR_LOCKSHELD;
6475 if (error == 0)
6476 nfsrv_freelockowner(stp, NULL, 0, p);
6477 NFSUNLOCKSTATE();
6478 return (error);
6479 }
6480
6481 /*
6482 * Test a stateid.
6483 */
6484 int
nfsrv_teststateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6485 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6486 NFSPROC_T *p)
6487 {
6488 struct nfsclient *clp;
6489 struct nfsstate *stp;
6490 int error;
6491
6492 NFSLOCKSTATE();
6493 /*
6494 * Look up the stateid
6495 */
6496 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6497 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6498 if (error == 0)
6499 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6500 if (error == 0 && stateidp->seqid != 0 &&
6501 SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6502 error = NFSERR_OLDSTATEID;
6503 NFSUNLOCKSTATE();
6504 return (error);
6505 }
6506
6507 /*
6508 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6509 */
6510 static int
nfsv4_setcbsequence(struct nfsrv_descript * nd,struct nfsclient * clp,int dont_replycache,struct nfsdsession ** sepp,int * slotposp)6511 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6512 int dont_replycache, struct nfsdsession **sepp, int *slotposp)
6513 {
6514 struct nfsdsession *sep;
6515 uint32_t *tl, slotseq = 0;
6516 int maxslot;
6517 uint8_t sessionid[NFSX_V4SESSIONID];
6518 int error;
6519
6520 error = nfsv4_getcbsession(clp, sepp);
6521 if (error != 0)
6522 return (error);
6523 sep = *sepp;
6524 nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
6525 &slotseq, sessionid, true);
6526 KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6527
6528 /* Build the Sequence arguments. */
6529 NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6530 bcopy(sessionid, tl, NFSX_V4SESSIONID);
6531 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6532 nd->nd_slotseq = tl;
6533 nd->nd_slotid = *slotposp;
6534 nd->nd_flag |= ND_HASSLOTID;
6535 *tl++ = txdr_unsigned(slotseq);
6536 *tl++ = txdr_unsigned(*slotposp);
6537 *tl++ = txdr_unsigned(maxslot);
6538 if (dont_replycache == 0)
6539 *tl++ = newnfs_true;
6540 else
6541 *tl++ = newnfs_false;
6542 *tl = 0; /* No referring call list, for now. */
6543 nd->nd_flag |= ND_HASSEQUENCE;
6544 return (0);
6545 }
6546
6547 /*
6548 * Get a session for the callback.
6549 */
6550 static int
nfsv4_getcbsession(struct nfsclient * clp,struct nfsdsession ** sepp)6551 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6552 {
6553 struct nfsdsession *sep;
6554
6555 NFSLOCKSTATE();
6556 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6557 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6558 break;
6559 }
6560 if (sep == NULL) {
6561 NFSUNLOCKSTATE();
6562 return (NFSERR_BADSESSION);
6563 }
6564 sep->sess_refcnt++;
6565 *sepp = sep;
6566 NFSUNLOCKSTATE();
6567 return (0);
6568 }
6569
6570 /*
6571 * Free up all backchannel xprts. This needs to be done when the nfsd threads
6572 * exit, since those transports will all be going away.
6573 * This is only called after all the nfsd threads are done performing RPCs,
6574 * so locking shouldn't be an issue.
6575 */
6576 void
nfsrv_freeallbackchannel_xprts(void)6577 nfsrv_freeallbackchannel_xprts(void)
6578 {
6579 struct nfsdsession *sep;
6580 struct nfsclient *clp;
6581 SVCXPRT *xprt;
6582 int i;
6583
6584 for (i = 0; i < nfsrv_clienthashsize; i++) {
6585 LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) {
6586 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6587 xprt = sep->sess_cbsess.nfsess_xprt;
6588 sep->sess_cbsess.nfsess_xprt = NULL;
6589 if (xprt != NULL)
6590 SVC_RELEASE(xprt);
6591 }
6592 }
6593 }
6594 }
6595
6596 /*
6597 * Do a layout commit. Actually just call nfsrv_updatemdsattr().
6598 * I have no idea if the rest of these arguments will ever be useful?
6599 */
6600 int
nfsrv_layoutcommit(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int hasnewoff,uint64_t newoff,uint64_t offset,uint64_t len,int hasnewmtime,struct timespec * newmtimep,int reclaim,nfsv4stateid_t * stateidp,int maxcnt,char * layp,int * hasnewsizep,uint64_t * newsizep,struct ucred * cred,NFSPROC_T * p)6601 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
6602 int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
6603 int hasnewmtime, struct timespec *newmtimep, int reclaim,
6604 nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
6605 uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
6606 {
6607 struct nfsvattr na;
6608 int error;
6609
6610 error = nfsrv_updatemdsattr(vp, &na, p);
6611 if (error == 0) {
6612 *hasnewsizep = 1;
6613 *newsizep = na.na_size;
6614 }
6615 return (error);
6616 }
6617
6618 /*
6619 * Try and get a layout.
6620 */
6621 int
nfsrv_layoutget(struct nfsrv_descript * nd,vnode_t vp,struct nfsexstuff * exp,int layouttype,int * iomode,uint64_t * offset,uint64_t * len,uint64_t minlen,nfsv4stateid_t * stateidp,int maxcnt,int * retonclose,int * layoutlenp,char * layp,struct ucred * cred,NFSPROC_T * p)6622 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
6623 int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
6624 uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
6625 int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
6626 {
6627 struct nfslayouthash *lhyp;
6628 struct nfslayout *lyp;
6629 char *devid;
6630 fhandle_t fh, *dsfhp;
6631 int error, mirrorcnt, stripecnt;
6632 uint64_t stripesiz;
6633
6634 if (nfsrv_devidcnt == 0)
6635 return (NFSERR_UNKNLAYOUTTYPE);
6636
6637 if (*offset != 0)
6638 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
6639 (uintmax_t)*len);
6640 error = nfsvno_getfh(vp, &fh, p);
6641 NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
6642 if (error != 0)
6643 return (error);
6644
6645 /*
6646 * For now, all layouts are for entire files.
6647 * Only issue Read/Write layouts if requested for a non-readonly fs.
6648 */
6649 if (NFSVNO_EXRDONLY(exp)) {
6650 if (*iomode == NFSLAYOUTIOMODE_RW)
6651 return (NFSERR_LAYOUTTRYLATER);
6652 *iomode = NFSLAYOUTIOMODE_READ;
6653 }
6654 if (*iomode != NFSLAYOUTIOMODE_RW)
6655 *iomode = NFSLAYOUTIOMODE_READ;
6656
6657 /*
6658 * Check to see if a write layout can be issued for this file.
6659 * This is used during mirror recovery to avoid RW layouts being
6660 * issued for a file while it is being copied to the recovered
6661 * mirror.
6662 */
6663 if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
6664 return (NFSERR_LAYOUTTRYLATER);
6665
6666 *retonclose = 0;
6667 *offset = 0;
6668 *len = UINT64_MAX;
6669
6670 /* First, see if a layout already exists and return if found. */
6671 lhyp = NFSLAYOUTHASH(&fh);
6672 NFSLOCKLAYOUT(lhyp);
6673 error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
6674 NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
6675 /*
6676 * Not sure if the seqid must be the same, so I won't check it.
6677 */
6678 if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
6679 stateidp->other[1] != lyp->lay_stateid.other[1] ||
6680 stateidp->other[2] != lyp->lay_stateid.other[2])) {
6681 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
6682 NFSUNLOCKLAYOUT(lhyp);
6683 NFSD_DEBUG(1, "ret bad stateid\n");
6684 return (NFSERR_BADSTATEID);
6685 }
6686 /*
6687 * I believe we get here because there is a race between
6688 * the client processing the CBLAYOUTRECALL and the layout
6689 * being deleted here on the server.
6690 * The client has now done a LayoutGet with a non-layout
6691 * stateid, as it would when there is no layout.
6692 * As such, free this layout and set error == NFSERR_BADSTATEID
6693 * so the code below will create a new layout structure as
6694 * would happen if no layout was found.
6695 * "lyp" will be set before being used below, but set it NULL
6696 * as a safety belt.
6697 */
6698 nfsrv_freelayout(&lhyp->list, lyp);
6699 lyp = NULL;
6700 error = NFSERR_BADSTATEID;
6701 }
6702 if (error == 0) {
6703 if (lyp->lay_layoutlen > maxcnt) {
6704 NFSUNLOCKLAYOUT(lhyp);
6705 NFSD_DEBUG(1, "ret layout too small\n");
6706 return (NFSERR_TOOSMALL);
6707 }
6708 if (*iomode == NFSLAYOUTIOMODE_RW) {
6709 if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
6710 NFSUNLOCKLAYOUT(lhyp);
6711 NFSD_DEBUG(1, "ret layout nospace\n");
6712 return (NFSERR_NOSPC);
6713 }
6714 lyp->lay_flags |= NFSLAY_RW;
6715 } else
6716 lyp->lay_flags |= NFSLAY_READ;
6717 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
6718 *layoutlenp = lyp->lay_layoutlen;
6719 if (++lyp->lay_stateid.seqid == 0)
6720 lyp->lay_stateid.seqid = 1;
6721 stateidp->seqid = lyp->lay_stateid.seqid;
6722 NFSUNLOCKLAYOUT(lhyp);
6723 NFSD_DEBUG(4, "ret fnd layout\n");
6724 return (0);
6725 }
6726 NFSUNLOCKLAYOUT(lhyp);
6727
6728 /* Find the device id and file handle. */
6729 error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, &stripesiz, &stripecnt,
6730 &dsfhp, &devid);
6731 NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
6732 if (error == 0) {
6733 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
6734 if (NFSX_V4FILELAYOUT > maxcnt)
6735 error = NFSERR_TOOSMALL;
6736 else
6737 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
6738 devid, vp->v_mount->mnt_stat.f_fsid);
6739 } else {
6740 if (NFSX_V4FLEXLAYOUT(mirrorcnt, stripecnt) > maxcnt)
6741 error = NFSERR_TOOSMALL;
6742 else
6743 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
6744 stripesiz, stripecnt, &fh, dsfhp, devid,
6745 vp->v_mount->mnt_stat.f_fsid);
6746 }
6747 }
6748 free(dsfhp, M_TEMP);
6749 free(devid, M_TEMP);
6750 if (error != 0)
6751 return (error);
6752
6753 /*
6754 * Now, add this layout to the list.
6755 */
6756 error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
6757 NFSD_DEBUG(4, "layoutget addl=%d\n", error);
6758 /*
6759 * The lyp will be set to NULL by nfsrv_addlayout() if it
6760 * linked the new structure into the lists.
6761 */
6762 free(lyp, M_NFSDSTATE);
6763 return (error);
6764 }
6765
6766 /*
6767 * Generate a File Layout.
6768 */
6769 static struct nfslayout *
nfsrv_filelayout(struct nfsrv_descript * nd,int iomode,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6770 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
6771 fhandle_t *dsfhp, char *devid, fsid_t fs)
6772 {
6773 uint32_t *tl;
6774 struct nfslayout *lyp;
6775 uint64_t pattern_offset;
6776
6777 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
6778 M_WAITOK | M_ZERO);
6779 lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
6780 if (iomode == NFSLAYOUTIOMODE_RW)
6781 lyp->lay_flags = NFSLAY_RW;
6782 else
6783 lyp->lay_flags = NFSLAY_READ;
6784 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6785 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6786 lyp->lay_fsid = fs;
6787 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6788
6789 /* Fill in the xdr for the files layout. */
6790 tl = (uint32_t *)lyp->lay_xdr;
6791 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6792 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6793
6794 /* Set the stripe size to the maximum I/O size. */
6795 *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
6796 *tl++ = 0; /* 1st stripe index. */
6797 pattern_offset = 0;
6798 txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
6799 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6800 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6801 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6802 lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
6803 return (lyp);
6804 }
6805
6806 #define FLEX_OWNERID "999"
6807 #define FLEX_UID0 "0"
6808 /*
6809 * Generate a Flex File Layout.
6810 * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
6811 * string goes on the wire, it isn't supposed to be used by the client,
6812 * since this server uses tight coupling.
6813 * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
6814 * a string of "0". This works around the Linux Flex File Layout driver bug
6815 * which uses the synthetic uid/gid strings for the "tightly coupled" case.
6816 */
6817 static struct nfslayout *
nfsrv_flexlayout(struct nfsrv_descript * nd,int iomode,int mirrorcnt,uint64_t stripesiz,int stripecnt,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6818 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
6819 uint64_t stripesiz, int stripecnt, fhandle_t *fhp, fhandle_t *dsfhp,
6820 char *devid, fsid_t fs)
6821 {
6822 uint32_t *tl;
6823 struct nfslayout *lyp;
6824 int i, j;
6825
6826 lyp = malloc(sizeof(struct nfslayout) +
6827 NFSX_V4FLEXLAYOUT(mirrorcnt, stripecnt), M_NFSDSTATE,
6828 M_WAITOK | M_ZERO);
6829 lyp->lay_type = NFSLAYOUT_FLEXFILE;
6830 if (iomode == NFSLAYOUTIOMODE_RW)
6831 lyp->lay_flags = NFSLAY_RW;
6832 else
6833 lyp->lay_flags = NFSLAY_READ;
6834 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6835 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6836 lyp->lay_fsid = fs;
6837 lyp->lay_mirrorcnt = mirrorcnt;
6838 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6839
6840 /* Fill in the xdr for the files layout. */
6841 tl = (uint32_t *)lyp->lay_xdr;
6842 txdr_hyper(stripesiz, tl); tl += 2; /* Stripe unit. */
6843 *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */
6844 for (i = 0; i < mirrorcnt; i++) {
6845 *tl++ = txdr_unsigned(stripecnt); /* Stripe cnt. */
6846 for (j = 0; j < stripecnt; j++) {
6847 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6848 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6849 devid += NFSX_V4DEVICEID;
6850 *tl++ = txdr_unsigned(1); /* Efficiency. */
6851 *tl++ = 0; /* Proxy Stateid. */
6852 *tl++ = 0x55555555;
6853 *tl++ = 0x55555555;
6854 *tl++ = 0x55555555;
6855 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6856 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6857 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6858 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
6859 dsfhp++;
6860 if (nfsrv_flexlinuxhack != 0) {
6861 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
6862 *tl = 0; /* 0 pad string. */
6863 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
6864 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
6865 *tl = 0; /* 0 pad string. */
6866 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
6867 } else {
6868 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
6869 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
6870 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
6871 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
6872 }
6873 }
6874 }
6875 *tl++ = txdr_unsigned(0); /* ff_flags. */
6876 *tl = txdr_unsigned(60); /* Status interval hint. */
6877 lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt, stripecnt);
6878 return (lyp);
6879 }
6880
6881 /*
6882 * Parse and process Flex File errors returned via LayoutReturn.
6883 */
6884 static void
nfsrv_flexlayouterr(struct nfsrv_descript * nd,uint32_t * layp,int maxcnt,NFSPROC_T * p)6885 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
6886 NFSPROC_T *p)
6887 {
6888 uint32_t *tl;
6889 int cnt, errcnt, i, j, opnum, stat;
6890 char devid[NFSX_V4DEVICEID];
6891
6892 tl = layp;
6893 maxcnt -= NFSX_UNSIGNED;
6894 if (maxcnt > 0)
6895 cnt = fxdr_unsigned(int, *tl++);
6896 else
6897 cnt = 0;
6898 NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
6899 for (i = 0; i < cnt; i++) {
6900 maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
6901 NFSX_UNSIGNED;
6902 if (maxcnt <= 0)
6903 break;
6904 /* Skip offset, length and stateid for now. */
6905 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
6906 errcnt = fxdr_unsigned(int, *tl++);
6907 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
6908 for (j = 0; j < errcnt; j++) {
6909 maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
6910 if (maxcnt < 0)
6911 break;
6912 NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
6913 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6914 stat = fxdr_unsigned(int, *tl++);
6915 opnum = fxdr_unsigned(int, *tl++);
6916 NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
6917 stat);
6918 /*
6919 * Except for NFSERR_ACCES, NFSERR_STALE and
6920 * NFSERR_NOSPC errors, disable the mirror.
6921 */
6922 if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
6923 stat != NFSERR_NOSPC)
6924 nfsrv_delds(devid, p);
6925
6926 /* For NFSERR_NOSPC, mark all devids and layouts. */
6927 if (stat == NFSERR_NOSPC)
6928 nfsrv_marknospc(devid, true);
6929 }
6930 }
6931 }
6932
6933 /*
6934 * This function removes all flex file layouts which has a mirror with
6935 * a device id that matches the argument.
6936 * Called when the DS represented by the device id has failed.
6937 */
6938 void
nfsrv_flexmirrordel(char * devid,NFSPROC_T * p)6939 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
6940 {
6941 uint32_t *tl;
6942 struct nfslayout *lyp, *nlyp;
6943 struct nfslayouthash *lhyp;
6944 struct nfslayouthead loclyp;
6945 int i, j;
6946
6947 NFSD_DEBUG(4, "flexmirrordel\n");
6948 /* Move all layouts found onto a local list. */
6949 TAILQ_INIT(&loclyp);
6950 for (i = 0; i < nfsrv_layouthashsize; i++) {
6951 lhyp = &nfslayouthash[i];
6952 NFSLOCKLAYOUT(lhyp);
6953 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
6954 if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
6955 lyp->lay_mirrorcnt > 1) {
6956 NFSD_DEBUG(4, "possible match\n");
6957 tl = lyp->lay_xdr;
6958 tl += 3;
6959 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
6960 tl++;
6961 if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
6962 == 0) {
6963 /* Found one. */
6964 NFSD_DEBUG(4, "fnd one\n");
6965 TAILQ_REMOVE(&lhyp->list, lyp,
6966 lay_list);
6967 TAILQ_INSERT_HEAD(&loclyp, lyp,
6968 lay_list);
6969 break;
6970 }
6971 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
6972 NFSM_RNDUP(NFSX_V4PNFSFH) /
6973 NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
6974 }
6975 }
6976 }
6977 NFSUNLOCKLAYOUT(lhyp);
6978 }
6979
6980 /* Now, try to do a Layout recall for each one found. */
6981 TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
6982 NFSD_DEBUG(4, "do layout recall\n");
6983 /*
6984 * The layout stateid.seqid needs to be incremented
6985 * before doing a LAYOUT_RECALL callback.
6986 */
6987 if (++lyp->lay_stateid.seqid == 0)
6988 lyp->lay_stateid.seqid = 1;
6989 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
6990 &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
6991 nfsrv_freelayout(&loclyp, lyp);
6992 }
6993 }
6994
6995 /*
6996 * Do a recall callback to the client for this layout.
6997 */
6998 static int
nfsrv_recalllayout(nfsquad_t clid,nfsv4stateid_t * stateidp,fhandle_t * fhp,struct nfslayout * lyp,int changed,int laytype,NFSPROC_T * p)6999 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
7000 struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
7001 {
7002 struct nfsclient *clp;
7003 int error;
7004
7005 NFSD_DEBUG(4, "nfsrv_recalllayout\n");
7006 error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
7007 0, NULL, p);
7008 NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
7009 if (error != 0) {
7010 printf("nfsrv_recalllayout: getclient err=%d\n", error);
7011 return (error);
7012 }
7013 if ((clp->lc_flags & LCL_NFSV41) != 0) {
7014 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
7015 stateidp, changed, fhp, NULL, NULL, laytype, p);
7016 /* If lyp != NULL, handle an error return here. */
7017 if (error != 0 && lyp != NULL) {
7018 NFSDRECALLLOCK();
7019 /*
7020 * Mark it returned, since no layout recall
7021 * has been done.
7022 * All errors seem to be non-recoverable, although
7023 * NFSERR_NOMATCHLAYOUT is a normal event.
7024 */
7025 if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
7026 lyp->lay_flags |= NFSLAY_RETURNED;
7027 wakeup(lyp);
7028 }
7029 NFSDRECALLUNLOCK();
7030 if (error != NFSERR_NOMATCHLAYOUT)
7031 printf("nfsrv_recalllayout: err=%d\n", error);
7032 }
7033 } else
7034 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
7035 return (error);
7036 }
7037
7038 /*
7039 * Find a layout to recall when we exceed our high water mark.
7040 */
7041 void
nfsrv_recalloldlayout(NFSPROC_T * p)7042 nfsrv_recalloldlayout(NFSPROC_T *p)
7043 {
7044 struct nfslayouthash *lhyp;
7045 struct nfslayout *lyp;
7046 nfsquad_t clientid;
7047 nfsv4stateid_t stateid;
7048 fhandle_t fh;
7049 int error, laytype = 0, ret;
7050
7051 lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
7052 NFSLOCKLAYOUT(lhyp);
7053 TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
7054 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
7055 lyp->lay_flags |= NFSLAY_CALLB;
7056 /*
7057 * The layout stateid.seqid needs to be incremented
7058 * before doing a LAYOUT_RECALL callback.
7059 */
7060 if (++lyp->lay_stateid.seqid == 0)
7061 lyp->lay_stateid.seqid = 1;
7062 clientid = lyp->lay_clientid;
7063 stateid = lyp->lay_stateid;
7064 NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
7065 laytype = lyp->lay_type;
7066 break;
7067 }
7068 }
7069 NFSUNLOCKLAYOUT(lhyp);
7070 if (lyp != NULL) {
7071 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
7072 laytype, p);
7073 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
7074 NFSD_DEBUG(4, "recallold=%d\n", error);
7075 if (error != 0) {
7076 NFSLOCKLAYOUT(lhyp);
7077 /*
7078 * Since the hash list was unlocked, we need to
7079 * find it again.
7080 */
7081 ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
7082 &lyp);
7083 if (ret == 0 &&
7084 (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
7085 lyp->lay_stateid.other[0] == stateid.other[0] &&
7086 lyp->lay_stateid.other[1] == stateid.other[1] &&
7087 lyp->lay_stateid.other[2] == stateid.other[2]) {
7088 /*
7089 * The client no longer knows this layout, so
7090 * it can be free'd now.
7091 */
7092 if (error == NFSERR_NOMATCHLAYOUT)
7093 nfsrv_freelayout(&lhyp->list, lyp);
7094 else {
7095 /*
7096 * Leave it to be tried later by
7097 * clearing NFSLAY_CALLB and moving
7098 * it to the head of the list, so it
7099 * won't be tried again for a while.
7100 */
7101 lyp->lay_flags &= ~NFSLAY_CALLB;
7102 TAILQ_REMOVE(&lhyp->list, lyp,
7103 lay_list);
7104 TAILQ_INSERT_HEAD(&lhyp->list, lyp,
7105 lay_list);
7106 }
7107 }
7108 NFSUNLOCKLAYOUT(lhyp);
7109 }
7110 }
7111 }
7112
7113 /*
7114 * Try and return layout(s).
7115 */
7116 int
nfsrv_layoutreturn(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int iomode,uint64_t offset,uint64_t len,int reclaim,int kind,nfsv4stateid_t * stateidp,int maxcnt,uint32_t * layp,int * fndp,struct ucred * cred,NFSPROC_T * p)7117 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
7118 int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
7119 int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
7120 struct ucred *cred, NFSPROC_T *p)
7121 {
7122 struct nfsvattr na;
7123 struct nfslayouthash *lhyp;
7124 struct nfslayout *lyp;
7125 fhandle_t fh;
7126 int error = 0;
7127
7128 *fndp = 0;
7129 if (kind == NFSV4LAYOUTRET_FILE) {
7130 error = nfsvno_getfh(vp, &fh, p);
7131 if (error == 0) {
7132 error = nfsrv_updatemdsattr(vp, &na, p);
7133 if (error != 0 && error != ESTALE)
7134 printf("nfsrv_layoutreturn: updatemdsattr"
7135 " failed=%d\n", error);
7136 }
7137 if (error == 0) {
7138 if (reclaim == newnfs_true) {
7139 error = nfsrv_checkgrace(NULL, NULL,
7140 NFSLCK_RECLAIM);
7141 if (error != NFSERR_NOGRACE)
7142 error = 0;
7143 return (error);
7144 }
7145 lhyp = NFSLAYOUTHASH(&fh);
7146 NFSDRECALLLOCK();
7147 NFSLOCKLAYOUT(lhyp);
7148 error = nfsrv_findlayout(&nd->nd_clientid, &fh,
7149 layouttype, p, &lyp);
7150 NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
7151 if (error == 0 &&
7152 stateidp->other[0] == lyp->lay_stateid.other[0] &&
7153 stateidp->other[1] == lyp->lay_stateid.other[1] &&
7154 stateidp->other[2] == lyp->lay_stateid.other[2]) {
7155 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
7156 " %x %x %x laystateid %d %x %x %x"
7157 " off=%ju len=%ju flgs=0x%x\n",
7158 stateidp->seqid, stateidp->other[0],
7159 stateidp->other[1], stateidp->other[2],
7160 lyp->lay_stateid.seqid,
7161 lyp->lay_stateid.other[0],
7162 lyp->lay_stateid.other[1],
7163 lyp->lay_stateid.other[2],
7164 (uintmax_t)offset, (uintmax_t)len,
7165 lyp->lay_flags);
7166 if (++lyp->lay_stateid.seqid == 0)
7167 lyp->lay_stateid.seqid = 1;
7168 stateidp->seqid = lyp->lay_stateid.seqid;
7169 if (offset == 0 && len == UINT64_MAX) {
7170 if ((iomode & NFSLAYOUTIOMODE_READ) !=
7171 0)
7172 lyp->lay_flags &= ~NFSLAY_READ;
7173 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7174 lyp->lay_flags &= ~NFSLAY_RW;
7175 if ((lyp->lay_flags & (NFSLAY_READ |
7176 NFSLAY_RW)) == 0)
7177 nfsrv_freelayout(&lhyp->list,
7178 lyp);
7179 else
7180 *fndp = 1;
7181 } else
7182 *fndp = 1;
7183 }
7184 NFSUNLOCKLAYOUT(lhyp);
7185 /* Search the nfsrv_recalllist for a match. */
7186 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
7187 if (NFSBCMP(&lyp->lay_fh, &fh,
7188 sizeof(fh)) == 0 &&
7189 lyp->lay_clientid.qval ==
7190 nd->nd_clientid.qval &&
7191 stateidp->other[0] ==
7192 lyp->lay_stateid.other[0] &&
7193 stateidp->other[1] ==
7194 lyp->lay_stateid.other[1] &&
7195 stateidp->other[2] ==
7196 lyp->lay_stateid.other[2]) {
7197 lyp->lay_flags |= NFSLAY_RETURNED;
7198 wakeup(lyp);
7199 error = 0;
7200 }
7201 }
7202 NFSDRECALLUNLOCK();
7203 }
7204 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
7205 nfsrv_flexlayouterr(nd, layp, maxcnt, p);
7206 } else if (kind == NFSV4LAYOUTRET_FSID)
7207 nfsrv_freelayouts(&nd->nd_clientid,
7208 &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
7209 else if (kind == NFSV4LAYOUTRET_ALL)
7210 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
7211 else
7212 error = NFSERR_INVAL;
7213 if (error == -1)
7214 error = 0;
7215 return (error);
7216 }
7217
7218 /*
7219 * Look for an existing layout.
7220 */
7221 static int
nfsrv_findlayout(nfsquad_t * clientidp,fhandle_t * fhp,int laytype,NFSPROC_T * p,struct nfslayout ** lypp)7222 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
7223 NFSPROC_T *p, struct nfslayout **lypp)
7224 {
7225 struct nfslayouthash *lhyp;
7226 struct nfslayout *lyp;
7227 int ret;
7228
7229 *lypp = NULL;
7230 ret = 0;
7231 lhyp = NFSLAYOUTHASH(fhp);
7232 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
7233 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7234 lyp->lay_clientid.qval == clientidp->qval &&
7235 lyp->lay_type == laytype)
7236 break;
7237 }
7238 if (lyp != NULL)
7239 *lypp = lyp;
7240 else
7241 ret = -1;
7242 return (ret);
7243 }
7244
7245 /*
7246 * Add the new layout, as required.
7247 */
7248 static int
nfsrv_addlayout(struct nfsrv_descript * nd,struct nfslayout ** lypp,nfsv4stateid_t * stateidp,char * layp,int * layoutlenp,NFSPROC_T * p)7249 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
7250 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
7251 {
7252 struct nfsclient *clp;
7253 struct nfslayouthash *lhyp;
7254 struct nfslayout *lyp, *nlyp;
7255 fhandle_t *fhp;
7256 int error;
7257
7258 KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
7259 ("nfsrv_layoutget: no nd_clientid\n"));
7260 lyp = *lypp;
7261 fhp = &lyp->lay_fh;
7262 NFSLOCKSTATE();
7263 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
7264 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
7265 if (error != 0) {
7266 NFSUNLOCKSTATE();
7267 return (error);
7268 }
7269 lyp->lay_stateid.seqid = stateidp->seqid = 1;
7270 lyp->lay_stateid.other[0] = stateidp->other[0] =
7271 clp->lc_clientid.lval[0];
7272 lyp->lay_stateid.other[1] = stateidp->other[1] =
7273 clp->lc_clientid.lval[1];
7274 lyp->lay_stateid.other[2] = stateidp->other[2] =
7275 nfsrv_nextstateindex(clp);
7276 NFSUNLOCKSTATE();
7277
7278 lhyp = NFSLAYOUTHASH(fhp);
7279 NFSLOCKLAYOUT(lhyp);
7280 TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
7281 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7282 nlyp->lay_clientid.qval == nd->nd_clientid.qval)
7283 break;
7284 }
7285 if (nlyp != NULL) {
7286 /* A layout already exists, so use it. */
7287 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
7288 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
7289 *layoutlenp = nlyp->lay_layoutlen;
7290 if (++nlyp->lay_stateid.seqid == 0)
7291 nlyp->lay_stateid.seqid = 1;
7292 stateidp->seqid = nlyp->lay_stateid.seqid;
7293 stateidp->other[0] = nlyp->lay_stateid.other[0];
7294 stateidp->other[1] = nlyp->lay_stateid.other[1];
7295 stateidp->other[2] = nlyp->lay_stateid.other[2];
7296 NFSUNLOCKLAYOUT(lhyp);
7297 return (0);
7298 }
7299
7300 /* Insert the new layout in the lists. */
7301 *lypp = NULL;
7302 atomic_add_int(&nfsrv_layoutcnt, 1);
7303 VNET(nfsstatsv1_p)->srvlayouts++;
7304 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
7305 *layoutlenp = lyp->lay_layoutlen;
7306 TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
7307 NFSUNLOCKLAYOUT(lhyp);
7308 return (0);
7309 }
7310
7311 /*
7312 * Get the devinfo for a deviceid.
7313 */
7314 int
nfsrv_getdevinfo(char * devid,int layouttype,uint32_t * maxcnt,uint32_t * notify,int * devaddrlen,char ** devaddr)7315 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
7316 uint32_t *notify, int *devaddrlen, char **devaddr)
7317 {
7318 struct nfsdevice *ds;
7319
7320 if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
7321 NFSLAYOUT_FLEXFILE) ||
7322 (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
7323 return (NFSERR_UNKNLAYOUTTYPE);
7324
7325 /*
7326 * Now, search for the device id. Note that the structures won't go
7327 * away, but the order changes in the list. As such, the lock only
7328 * needs to be held during the search through the list.
7329 */
7330 NFSDDSLOCK();
7331 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7332 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
7333 ds->nfsdev_nmp != NULL)
7334 break;
7335 }
7336 NFSDDSUNLOCK();
7337 if (ds == NULL)
7338 return (NFSERR_NOENT);
7339
7340 /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
7341 *devaddrlen = 0;
7342 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
7343 *devaddrlen = ds->nfsdev_fileaddrlen;
7344 *devaddr = ds->nfsdev_fileaddr;
7345 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7346 *devaddrlen = ds->nfsdev_flexaddrlen;
7347 *devaddr = ds->nfsdev_flexaddr;
7348 }
7349 if (*devaddrlen == 0)
7350 return (NFSERR_UNKNLAYOUTTYPE);
7351
7352 /*
7353 * The XDR overhead is 3 unsigned values: layout_type,
7354 * length_of_address and notify bitmap.
7355 * If the notify array is changed to not all zeros, the
7356 * count of unsigned values must be increased.
7357 */
7358 if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
7359 3 * NFSX_UNSIGNED) {
7360 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
7361 return (NFSERR_TOOSMALL);
7362 }
7363 return (0);
7364 }
7365
7366 /*
7367 * Free a list of layout state structures.
7368 */
7369 static void
nfsrv_freelayoutlist(nfsquad_t clientid)7370 nfsrv_freelayoutlist(nfsquad_t clientid)
7371 {
7372 struct nfslayouthash *lhyp;
7373 struct nfslayout *lyp, *nlyp;
7374 int i;
7375
7376 for (i = 0; i < nfsrv_layouthashsize; i++) {
7377 lhyp = &nfslayouthash[i];
7378 NFSLOCKLAYOUT(lhyp);
7379 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7380 if (lyp->lay_clientid.qval == clientid.qval)
7381 nfsrv_freelayout(&lhyp->list, lyp);
7382 }
7383 NFSUNLOCKLAYOUT(lhyp);
7384 }
7385 }
7386
7387 /*
7388 * Free up a layout.
7389 */
7390 static void
nfsrv_freelayout(struct nfslayouthead * lhp,struct nfslayout * lyp)7391 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
7392 {
7393
7394 NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
7395 atomic_add_int(&nfsrv_layoutcnt, -1);
7396 VNET(nfsstatsv1_p)->srvlayouts--;
7397 TAILQ_REMOVE(lhp, lyp, lay_list);
7398 free(lyp, M_NFSDSTATE);
7399 }
7400
7401 /*
7402 * Free up a device id.
7403 */
7404 void
nfsrv_freeonedevid(struct nfsdevice * ds)7405 nfsrv_freeonedevid(struct nfsdevice *ds)
7406 {
7407 int i;
7408
7409 atomic_add_int(&nfsrv_devidcnt, -1);
7410 vrele(ds->nfsdev_dvp);
7411 for (i = 0; i < nfsrv_dsdirsize; i++)
7412 if (ds->nfsdev_dsdir[i] != NULL)
7413 vrele(ds->nfsdev_dsdir[i]);
7414 free(ds->nfsdev_fileaddr, M_NFSDSTATE);
7415 free(ds->nfsdev_flexaddr, M_NFSDSTATE);
7416 free(ds->nfsdev_host, M_NFSDSTATE);
7417 free(ds, M_NFSDSTATE);
7418 }
7419
7420 /*
7421 * Free up a device id and its mirrors.
7422 */
7423 static void
nfsrv_freedevid(struct nfsdevice * ds)7424 nfsrv_freedevid(struct nfsdevice *ds)
7425 {
7426
7427 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
7428 nfsrv_freeonedevid(ds);
7429 }
7430
7431 /*
7432 * Free all layouts and device ids.
7433 * Done when the nfsd threads are shut down since there may be a new
7434 * modified device id list created when the nfsd is restarted.
7435 */
7436 void
nfsrv_freealllayoutsanddevids(void)7437 nfsrv_freealllayoutsanddevids(void)
7438 {
7439 struct nfsdontlist *mrp, *nmrp;
7440 struct nfslayout *lyp, *nlyp;
7441
7442 /* Get rid of the deviceid structures. */
7443 nfsrv_freealldevids();
7444 TAILQ_INIT(&nfsrv_devidhead);
7445 nfsrv_devidcnt = 0;
7446
7447 /* Get rid of all layouts. */
7448 nfsrv_freealllayouts();
7449
7450 /* Get rid of any nfsdontlist entries. */
7451 LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
7452 free(mrp, M_NFSDSTATE);
7453 LIST_INIT(&nfsrv_dontlisthead);
7454 nfsrv_dontlistlen = 0;
7455
7456 /* Free layouts in the recall list. */
7457 TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
7458 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
7459 TAILQ_INIT(&nfsrv_recalllisthead);
7460 }
7461
7462 /*
7463 * Free layouts that match the arguments.
7464 */
7465 static void
nfsrv_freelayouts(nfsquad_t * clid,fsid_t * fs,int laytype,int iomode)7466 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
7467 {
7468 struct nfslayouthash *lhyp;
7469 struct nfslayout *lyp, *nlyp;
7470 int i;
7471
7472 for (i = 0; i < nfsrv_layouthashsize; i++) {
7473 lhyp = &nfslayouthash[i];
7474 NFSLOCKLAYOUT(lhyp);
7475 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7476 if (clid->qval != lyp->lay_clientid.qval)
7477 continue;
7478 if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
7479 continue;
7480 if (laytype != lyp->lay_type)
7481 continue;
7482 if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
7483 lyp->lay_flags &= ~NFSLAY_READ;
7484 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7485 lyp->lay_flags &= ~NFSLAY_RW;
7486 if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
7487 nfsrv_freelayout(&lhyp->list, lyp);
7488 }
7489 NFSUNLOCKLAYOUT(lhyp);
7490 }
7491 }
7492
7493 /*
7494 * Free all layouts for the argument file.
7495 */
7496 void
nfsrv_freefilelayouts(fhandle_t * fhp)7497 nfsrv_freefilelayouts(fhandle_t *fhp)
7498 {
7499 struct nfslayouthash *lhyp;
7500 struct nfslayout *lyp, *nlyp;
7501
7502 lhyp = NFSLAYOUTHASH(fhp);
7503 NFSLOCKLAYOUT(lhyp);
7504 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7505 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
7506 nfsrv_freelayout(&lhyp->list, lyp);
7507 }
7508 NFSUNLOCKLAYOUT(lhyp);
7509 }
7510
7511 /*
7512 * Free all layouts.
7513 */
7514 static void
nfsrv_freealllayouts(void)7515 nfsrv_freealllayouts(void)
7516 {
7517 struct nfslayouthash *lhyp;
7518 struct nfslayout *lyp, *nlyp;
7519 int i;
7520
7521 for (i = 0; i < nfsrv_layouthashsize; i++) {
7522 lhyp = &nfslayouthash[i];
7523 NFSLOCKLAYOUT(lhyp);
7524 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
7525 nfsrv_freelayout(&lhyp->list, lyp);
7526 NFSUNLOCKLAYOUT(lhyp);
7527 }
7528 }
7529
7530 /*
7531 * Look up the mount path for the DS server.
7532 */
7533 static int
nfsrv_setdsserver(char * dspathp,char * mdspathp,NFSPROC_T * p,struct nfsdevice ** dsp)7534 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
7535 struct nfsdevice **dsp)
7536 {
7537 struct nameidata nd;
7538 struct nfsdevice *ds;
7539 struct mount *mp;
7540 int error, i;
7541 char *cp, *dsdirpath, *endcp;
7542 size_t dsdirsize;
7543 u_quad_t stripesiz;
7544
7545 NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
7546 *dsp = NULL;
7547 if (jailed(p->td_ucred)) {
7548 printf("A pNFS nfsd cannot run in a jail\n");
7549 return (EPERM);
7550 }
7551 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
7552 dspathp);
7553 error = namei(&nd);
7554 NFSD_DEBUG(4, "lookup=%d\n", error);
7555 if (error != 0)
7556 return (error);
7557 NDFREE_PNBUF(&nd);
7558 if (nd.ni_vp->v_type != VDIR) {
7559 vput(nd.ni_vp);
7560 NFSD_DEBUG(4, "dspath not dir\n");
7561 return (ENOTDIR);
7562 }
7563 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7564 vput(nd.ni_vp);
7565 NFSD_DEBUG(4, "dspath not an NFS mount\n");
7566 return (ENXIO);
7567 }
7568
7569 /*
7570 * Allocate a DS server structure with the NFS mounted directory
7571 * vnode reference counted, so that a non-forced dismount will
7572 * fail with EBUSY.
7573 * This structure is always linked into the list, even if an error
7574 * is being returned. The caller will free the entire list upon
7575 * an error return.
7576 */
7577 *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
7578 M_NFSDSTATE, M_WAITOK | M_ZERO);
7579 ds->nfsdev_dvp = nd.ni_vp;
7580 ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
7581 ds->nfsdev_mdsstripesiz = nfsrv_stripesiz;
7582 NFSVOPUNLOCK(nd.ni_vp);
7583
7584 dsdirsize = strlen(dspathp) + 16;
7585 dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
7586 /* Now, create the DS directory structures. */
7587 for (i = 0; i < nfsrv_dsdirsize; i++) {
7588 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
7589 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7590 UIO_SYSSPACE, dsdirpath);
7591 error = namei(&nd);
7592 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
7593 if (error != 0)
7594 break;
7595 NDFREE_PNBUF(&nd);
7596 if (nd.ni_vp->v_type != VDIR) {
7597 vput(nd.ni_vp);
7598 error = ENOTDIR;
7599 NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
7600 break;
7601 }
7602 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7603 vput(nd.ni_vp);
7604 error = ENXIO;
7605 NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
7606 break;
7607 }
7608 ds->nfsdev_dsdir[i] = nd.ni_vp;
7609 NFSVOPUNLOCK(nd.ni_vp);
7610 }
7611 free(dsdirpath, M_TEMP);
7612
7613 if (strlen(mdspathp) > 0) {
7614 cp = strchr(mdspathp, '@');
7615 if (cp != NULL)
7616 *cp = '\0';
7617 /*
7618 * This DS stores file for a specific MDS exported file
7619 * system.
7620 */
7621 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7622 UIO_SYSSPACE, mdspathp);
7623 error = namei(&nd);
7624 NFSD_DEBUG(4, "mds lookup=%d\n", error);
7625 if (error != 0)
7626 goto out;
7627 NDFREE_PNBUF(&nd);
7628 if (nd.ni_vp->v_type != VDIR) {
7629 vput(nd.ni_vp);
7630 error = ENOTDIR;
7631 NFSD_DEBUG(4, "mdspath not dir\n");
7632 goto out;
7633 }
7634 mp = nd.ni_vp->v_mount;
7635 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
7636 vput(nd.ni_vp);
7637 error = ENXIO;
7638 NFSD_DEBUG(4, "mdspath not an exported fs\n");
7639 goto out;
7640 }
7641 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
7642 ds->nfsdev_mdsisset = 1;
7643 vput(nd.ni_vp);
7644 if (cp != NULL) {
7645 /* There is a stripesiz specified. */
7646 endcp = NULL;
7647 if (*(cp + 1) != '\0')
7648 stripesiz = strtouq(cp + 1, &endcp, 10);
7649 if (endcp == NULL || *endcp != '\0') {
7650 error = ENXIO;
7651 NFSD_DEBUG(4, "mds stripesiz invalid\n");
7652 goto out;
7653 }
7654 ds->nfsdev_mdsstripesiz = stripesiz;
7655 *cp = '@';
7656 }
7657 }
7658
7659 out:
7660 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
7661 atomic_add_int(&nfsrv_devidcnt, 1);
7662 return (error);
7663 }
7664
7665 /*
7666 * Look up the mount path for the DS server and delete it.
7667 */
7668 int
nfsrv_deldsserver(int op,char * dspathp,NFSPROC_T * p)7669 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
7670 {
7671 struct mount *mp;
7672 struct nfsmount *nmp;
7673 struct nfsdevice *ds;
7674 int error;
7675
7676 NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
7677 /*
7678 * Search for the path in the mount list. Avoid looking the path
7679 * up, since this mount point may be hung, with associated locked
7680 * vnodes, etc.
7681 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
7682 * until this completes.
7683 * As noted in the man page, this should be done before any forced
7684 * dismount on the mount point, but at least the handshake on
7685 * NFSMNTP_CANCELRPCS should make it safe.
7686 */
7687 error = 0;
7688 ds = NULL;
7689 nmp = NULL;
7690 mtx_lock(&mountlist_mtx);
7691 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
7692 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
7693 strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
7694 mp->mnt_data != NULL) {
7695 nmp = VFSTONFS(mp);
7696 NFSLOCKMNT(nmp);
7697 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7698 NFSMNTP_CANCELRPCS)) == 0) {
7699 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7700 NFSUNLOCKMNT(nmp);
7701 } else {
7702 NFSUNLOCKMNT(nmp);
7703 nmp = NULL;
7704 }
7705 break;
7706 }
7707 }
7708 mtx_unlock(&mountlist_mtx);
7709
7710 if (nmp != NULL) {
7711 ds = nfsrv_deldsnmp(op, nmp, p);
7712 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
7713 if (ds != NULL) {
7714 nfsrv_killrpcs(nmp);
7715 NFSD_DEBUG(4, "aft killrpcs\n");
7716 } else
7717 error = ENXIO;
7718 NFSLOCKMNT(nmp);
7719 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7720 wakeup(nmp);
7721 NFSUNLOCKMNT(nmp);
7722 } else
7723 error = EINVAL;
7724 return (error);
7725 }
7726
7727 /*
7728 * Search for and remove a DS entry which matches the "nmp" argument.
7729 * The nfsdevice structure pointer is returned so that the caller can
7730 * free it via nfsrv_freeonedevid().
7731 * For the forced case, do not try to do LayoutRecalls, since the server
7732 * must be shut down now anyhow.
7733 */
7734 struct nfsdevice *
nfsrv_deldsnmp(int op,struct nfsmount * nmp,NFSPROC_T * p)7735 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
7736 {
7737 struct nfsdevice *fndds;
7738
7739 NFSD_DEBUG(4, "deldsdvp\n");
7740 NFSDDSLOCK();
7741 if (op == PNFSDOP_FORCEDELDS)
7742 fndds = nfsv4_findmirror(nmp);
7743 else
7744 fndds = nfsrv_findmirroredds(nmp);
7745 if (fndds != NULL)
7746 nfsrv_deleteds(fndds);
7747 NFSDDSUNLOCK();
7748 if (fndds != NULL) {
7749 if (op != PNFSDOP_FORCEDELDS)
7750 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7751 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7752 }
7753 return (fndds);
7754 }
7755
7756 /*
7757 * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
7758 * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
7759 * point.
7760 * Also, returns an error instead of the nfsdevice found.
7761 */
7762 int
nfsrv_delds(char * devid,NFSPROC_T * p)7763 nfsrv_delds(char *devid, NFSPROC_T *p)
7764 {
7765 struct nfsdevice *ds, *fndds;
7766 struct nfsmount *nmp;
7767 int fndmirror;
7768
7769 NFSD_DEBUG(4, "delds\n");
7770 /*
7771 * Search the DS server list for a match with devid.
7772 * Remove the DS entry if found and there is a mirror.
7773 */
7774 fndds = NULL;
7775 nmp = NULL;
7776 fndmirror = 0;
7777 NFSDDSLOCK();
7778 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7779 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
7780 ds->nfsdev_nmp != NULL) {
7781 NFSD_DEBUG(4, "fnd main ds\n");
7782 fndds = ds;
7783 break;
7784 }
7785 }
7786 if (fndds == NULL) {
7787 NFSDDSUNLOCK();
7788 return (ENXIO);
7789 }
7790 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
7791 fndmirror = 1;
7792 else if (fndds->nfsdev_mdsisset != 0) {
7793 /* For the fsid is set case, search for a mirror. */
7794 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7795 if (ds != fndds && ds->nfsdev_nmp != NULL &&
7796 ds->nfsdev_mdsisset != 0 &&
7797 fsidcmp(&ds->nfsdev_mdsfsid,
7798 &fndds->nfsdev_mdsfsid) == 0) {
7799 fndmirror = 1;
7800 break;
7801 }
7802 }
7803 }
7804 if (fndmirror != 0) {
7805 nmp = fndds->nfsdev_nmp;
7806 NFSLOCKMNT(nmp);
7807 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7808 NFSMNTP_CANCELRPCS)) == 0) {
7809 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7810 NFSUNLOCKMNT(nmp);
7811 nfsrv_deleteds(fndds);
7812 } else {
7813 NFSUNLOCKMNT(nmp);
7814 nmp = NULL;
7815 }
7816 }
7817 NFSDDSUNLOCK();
7818 if (nmp != NULL) {
7819 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7820 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7821 nfsrv_killrpcs(nmp);
7822 NFSLOCKMNT(nmp);
7823 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7824 wakeup(nmp);
7825 NFSUNLOCKMNT(nmp);
7826 return (0);
7827 }
7828 return (ENXIO);
7829 }
7830
7831 /*
7832 * Mark a DS as disabled by setting nfsdev_nmp = NULL.
7833 */
7834 static void
nfsrv_deleteds(struct nfsdevice * fndds)7835 nfsrv_deleteds(struct nfsdevice *fndds)
7836 {
7837
7838 NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
7839 fndds->nfsdev_nmp = NULL;
7840 if (fndds->nfsdev_mdsisset == 0)
7841 nfsrv_faildscnt--;
7842 }
7843
7844 /*
7845 * Fill in the addr structures for the File and Flex File layouts.
7846 */
7847 static void
nfsrv_allocdevid(struct nfsdevice * ds,char * addr,char * dnshost)7848 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
7849 {
7850 uint32_t *tl;
7851 char *netprot;
7852 int addrlen;
7853 static uint64_t new_devid = 0;
7854
7855 if (strchr(addr, ':') != NULL)
7856 netprot = "tcp6";
7857 else
7858 netprot = "tcp";
7859
7860 /* Fill in the device id. */
7861 NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
7862 new_devid++;
7863 NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
7864 sizeof(new_devid));
7865
7866 /*
7867 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
7868 * as defined in RFC5661) in XDR.
7869 */
7870 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
7871 6 * NFSX_UNSIGNED;
7872 NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
7873 ds->nfsdev_fileaddrlen = addrlen;
7874 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
7875 ds->nfsdev_fileaddr = (char *)tl;
7876 *tl++ = txdr_unsigned(1); /* One stripe with index 0. */
7877 *tl++ = 0;
7878 *tl++ = txdr_unsigned(1); /* One multipath list */
7879 *tl++ = txdr_unsigned(1); /* with one entry in it. */
7880 /* The netaddr for this one entry. */
7881 *tl++ = txdr_unsigned(strlen(netprot));
7882 NFSBCOPY(netprot, tl, strlen(netprot));
7883 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
7884 *tl++ = txdr_unsigned(strlen(addr));
7885 NFSBCOPY(addr, tl, strlen(addr));
7886
7887 /*
7888 * Fill in the flex file addr (actually the ff_device_addr4
7889 * as defined for Flexible File Layout) in XDR.
7890 */
7891 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
7892 14 * NFSX_UNSIGNED;
7893 ds->nfsdev_flexaddrlen = addrlen;
7894 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
7895 ds->nfsdev_flexaddr = (char *)tl;
7896 *tl++ = txdr_unsigned(1); /* One multipath entry. */
7897 /* The netaddr for this one entry. */
7898 *tl++ = txdr_unsigned(strlen(netprot));
7899 NFSBCOPY(netprot, tl, strlen(netprot));
7900 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
7901 *tl++ = txdr_unsigned(strlen(addr));
7902 NFSBCOPY(addr, tl, strlen(addr));
7903 tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
7904 *tl++ = txdr_unsigned(2); /* Two NFS Versions. */
7905 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
7906 *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
7907 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
7908 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
7909 *tl++ = newnfs_true; /* Tightly coupled. */
7910 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
7911 *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
7912 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
7913 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
7914 *tl = newnfs_true; /* Tightly coupled. */
7915
7916 ds->nfsdev_hostnamelen = strlen(dnshost);
7917 ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
7918 M_WAITOK);
7919 NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
7920 }
7921
7922 /*
7923 * Create the device id list.
7924 * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
7925 * is misconfigured.
7926 */
7927 int
nfsrv_createdevids(struct nfsd_nfsd_args * args,NFSPROC_T * p)7928 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
7929 {
7930 struct nfsdevice *ds;
7931 char *addrp, *dnshostp, *dspathp, *mdspathp;
7932 int error, i;
7933
7934 addrp = args->addr;
7935 dnshostp = args->dnshost;
7936 dspathp = args->dspath;
7937 mdspathp = args->mdspath;
7938 nfsrv_maxpnfsmirror = args->mirrorcnt;
7939 if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
7940 mdspathp == NULL)
7941 return (0);
7942
7943 /*
7944 * Loop around for each nul-terminated string in args->addr,
7945 * args->dnshost, args->dnspath and args->mdspath.
7946 */
7947 while (addrp < (args->addr + args->addrlen) &&
7948 dnshostp < (args->dnshost + args->dnshostlen) &&
7949 dspathp < (args->dspath + args->dspathlen) &&
7950 mdspathp < (args->mdspath + args->mdspathlen)) {
7951 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
7952 if (error != 0) {
7953 /* Free all DS servers. */
7954 nfsrv_freealldevids();
7955 nfsrv_devidcnt = 0;
7956 return (ENXIO);
7957 }
7958 nfsrv_allocdevid(ds, addrp, dnshostp);
7959 addrp += (strlen(addrp) + 1);
7960 dnshostp += (strlen(dnshostp) + 1);
7961 dspathp += (strlen(dspathp) + 1);
7962 mdspathp += (strlen(mdspathp) + 1);
7963 }
7964 if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
7965 /* Free all DS servers. */
7966 nfsrv_freealldevids();
7967 nfsrv_devidcnt = 0;
7968 nfsrv_maxpnfsmirror = 1;
7969 return (ENXIO);
7970 }
7971 /* We can fail at most one less DS than the mirror level. */
7972 nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
7973
7974 /*
7975 * Allocate the nfslayout hash table now, since this is a pNFS server.
7976 * Make it 1% of the high water mark and at least 100.
7977 */
7978 if (nfslayouthash == NULL) {
7979 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
7980 if (nfsrv_layouthashsize < 100)
7981 nfsrv_layouthashsize = 100;
7982 nfslayouthash = mallocarray(nfsrv_layouthashsize,
7983 sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
7984 M_ZERO);
7985 for (i = 0; i < nfsrv_layouthashsize; i++) {
7986 mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
7987 TAILQ_INIT(&nfslayouthash[i].list);
7988 }
7989 }
7990 return (0);
7991 }
7992
7993 /*
7994 * Free all device ids.
7995 */
7996 static void
nfsrv_freealldevids(void)7997 nfsrv_freealldevids(void)
7998 {
7999 struct nfsdevice *ds, *nds;
8000
8001 TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
8002 nfsrv_freedevid(ds);
8003 }
8004
8005 /*
8006 * Check to see if there is a Read/Write Layout plus either:
8007 * - A Write Delegation
8008 * or
8009 * - An Open with Write_access.
8010 * Return 1 if this is the case and 0 otherwise.
8011 * This function is used by nfsrv_proxyds() to decide if doing a Proxy
8012 * Getattr RPC to the Data Server (DS) is necessary.
8013 */
8014 #define NFSCLIDVECSIZE 6
8015 int
nfsrv_checkdsattr(vnode_t vp,NFSPROC_T * p)8016 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
8017 {
8018 fhandle_t fh, *tfhp;
8019 struct nfsstate *stp;
8020 struct nfslayout *lyp;
8021 struct nfslayouthash *lhyp;
8022 struct nfslockhashhead *hp;
8023 struct nfslockfile *lfp;
8024 nfsquad_t clid[NFSCLIDVECSIZE];
8025 int clidcnt, ret;
8026
8027 ret = nfsvno_getfh(vp, &fh, p);
8028 if (ret != 0)
8029 return (0);
8030
8031 /* First check for a Read/Write Layout. */
8032 clidcnt = 0;
8033 lhyp = NFSLAYOUTHASH(&fh);
8034 NFSLOCKLAYOUT(lhyp);
8035 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8036 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8037 ((lyp->lay_flags & NFSLAY_RW) != 0 ||
8038 ((lyp->lay_flags & NFSLAY_READ) != 0 &&
8039 nfsrv_pnfsatime != 0))) {
8040 if (clidcnt < NFSCLIDVECSIZE)
8041 clid[clidcnt].qval = lyp->lay_clientid.qval;
8042 clidcnt++;
8043 }
8044 }
8045 NFSUNLOCKLAYOUT(lhyp);
8046 if (clidcnt == 0) {
8047 /* None found, so return 0. */
8048 return (0);
8049 }
8050
8051 /* Get the nfslockfile for this fh. */
8052 NFSLOCKSTATE();
8053 hp = NFSLOCKHASH(&fh);
8054 LIST_FOREACH(lfp, hp, lf_hash) {
8055 tfhp = &lfp->lf_fh;
8056 if (NFSVNO_CMPFH(&fh, tfhp))
8057 break;
8058 }
8059 if (lfp == NULL) {
8060 /* None found, so return 0. */
8061 NFSUNLOCKSTATE();
8062 return (0);
8063 }
8064
8065 /* Now, look for a Write delegation for this clientid. */
8066 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
8067 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8068 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8069 break;
8070 }
8071 if (stp != NULL) {
8072 /* Found one, so return 1. */
8073 NFSUNLOCKSTATE();
8074 return (1);
8075 }
8076
8077 /* No Write delegation, so look for an Open with Write_access. */
8078 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
8079 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
8080 ("nfsrv_checkdsattr: Non-open in Open list\n"));
8081 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
8082 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8083 break;
8084 }
8085 NFSUNLOCKSTATE();
8086 if (stp != NULL)
8087 return (1);
8088 return (0);
8089 }
8090
8091 /*
8092 * Look for a matching clientid in the vector. Return 1 if one might match.
8093 */
8094 static int
nfsrv_fndclid(nfsquad_t * clidvec,nfsquad_t clid,int clidcnt)8095 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
8096 {
8097 int i;
8098
8099 /* If too many for the vector, return 1 since there might be a match. */
8100 if (clidcnt > NFSCLIDVECSIZE)
8101 return (1);
8102
8103 for (i = 0; i < clidcnt; i++)
8104 if (clidvec[i].qval == clid.qval)
8105 return (1);
8106 return (0);
8107 }
8108
8109 /*
8110 * Check the don't list for "vp" and see if issuing an rw layout is allowed.
8111 * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
8112 */
8113 static int
nfsrv_dontlayout(fhandle_t * fhp)8114 nfsrv_dontlayout(fhandle_t *fhp)
8115 {
8116 struct nfsdontlist *mrp;
8117 int ret;
8118
8119 if (nfsrv_dontlistlen == 0)
8120 return (0);
8121 ret = 0;
8122 NFSDDONTLISTLOCK();
8123 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8124 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
8125 (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
8126 ret = 1;
8127 break;
8128 }
8129 }
8130 NFSDDONTLISTUNLOCK();
8131 return (ret);
8132 }
8133
8134 #define PNFSDS_COPYSIZ 65536
8135 /*
8136 * Create a new file on a DS and copy the contents of an extant DS file to it.
8137 * This can be used for recovery of a DS file onto a recovered DS.
8138 * The steps are:
8139 * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
8140 * - Disable issuing of read/write layouts for the file via the nfsdontlist,
8141 * so that they will be disabled after the MDS file's vnode is unlocked.
8142 * - Set up the nfsrv_recalllist so that recall of read/write layouts can
8143 * be done.
8144 * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
8145 * writes, LayoutCommits and LayoutReturns for the file when completing the
8146 * LayoutReturn requested by the LayoutRecall callback.
8147 * - Issue a LayoutRecall callback for all read/write layouts and wait for
8148 * them to be returned. (If the LayoutRecall callback replies
8149 * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
8150 * - Exclusively lock the MDS file's vnode. This ensures that no proxied
8151 * writes are in progress or can occur during the DS file copy.
8152 * It also blocks Setattr operations.
8153 * - Create the file on the recovered mirror.
8154 * - Copy the file from the operational DS.
8155 * - Copy any ACL from the MDS file to the new DS file.
8156 * - Set the modify time of the new DS file to that of the MDS file.
8157 * - Update the extended attribute for the MDS file.
8158 * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
8159 * - The caller will unlock the MDS file's vnode allowing operations
8160 * to continue normally, since it is now on the mirror again.
8161 */
8162 int
nfsrv_copymr(vnode_t vp,vnode_t fvp,vnode_t dvp,struct nfsdevice * ds,struct pnfsdsfile * pf,struct pnfsdsfile * wpf,int mirrorcnt,struct ucred * cred,NFSPROC_T * p)8163 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
8164 struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
8165 struct ucred *cred, NFSPROC_T *p)
8166 {
8167 struct nfsdontlist *mrp, *nmrp;
8168 struct nfslayouthash *lhyp;
8169 struct nfslayout *lyp, *nlyp;
8170 struct nfslayouthead thl;
8171 struct mount *mp, *tvmp;
8172 struct acl *aclp;
8173 struct vattr va;
8174 struct timespec mtime;
8175 fhandle_t fh;
8176 vnode_t tvp;
8177 off_t rdpos, wrpos;
8178 ssize_t aresid;
8179 char *dat;
8180 int didprintf, ret, retacl, xfer;
8181
8182 ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
8183 ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
8184 /*
8185 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
8186 * so that no more RW layouts will get issued.
8187 */
8188 ret = nfsvno_getfh(vp, &fh, p);
8189 if (ret != 0) {
8190 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
8191 return (ret);
8192 }
8193 nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
8194 nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
8195 NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
8196 NFSDDONTLISTLOCK();
8197 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8198 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
8199 break;
8200 }
8201 if (mrp == NULL) {
8202 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
8203 mrp = nmrp;
8204 nmrp = NULL;
8205 nfsrv_dontlistlen++;
8206 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
8207 } else {
8208 NFSDDONTLISTUNLOCK();
8209 free(nmrp, M_NFSDSTATE);
8210 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
8211 return (ENXIO);
8212 }
8213 NFSDDONTLISTUNLOCK();
8214
8215 /*
8216 * Search for all RW layouts for this file. Move them to the
8217 * recall list, so they can be recalled and their return noted.
8218 */
8219 lhyp = NFSLAYOUTHASH(&fh);
8220 NFSDRECALLLOCK();
8221 NFSLOCKLAYOUT(lhyp);
8222 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
8223 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8224 (lyp->lay_flags & NFSLAY_RW) != 0) {
8225 TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
8226 TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
8227 lyp->lay_trycnt = 0;
8228 }
8229 }
8230 NFSUNLOCKLAYOUT(lhyp);
8231 NFSDRECALLUNLOCK();
8232
8233 ret = 0;
8234 mp = tvmp = NULL;
8235 didprintf = 0;
8236 TAILQ_INIT(&thl);
8237 /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
8238 NFSVOPUNLOCK(vp);
8239 /* Now, do a recall for all layouts not yet recalled. */
8240 tryagain:
8241 NFSDRECALLLOCK();
8242 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8243 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8244 (lyp->lay_flags & NFSLAY_RECALL) == 0) {
8245 lyp->lay_flags |= NFSLAY_RECALL;
8246 /*
8247 * The layout stateid.seqid needs to be incremented
8248 * before doing a LAYOUT_RECALL callback.
8249 */
8250 if (++lyp->lay_stateid.seqid == 0)
8251 lyp->lay_stateid.seqid = 1;
8252 NFSDRECALLUNLOCK();
8253 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
8254 &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
8255 NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
8256 goto tryagain;
8257 }
8258 }
8259
8260 /* Now wait for them to be returned. */
8261 tryagain2:
8262 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8263 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
8264 if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
8265 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
8266 lay_list);
8267 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
8268 NFSD_DEBUG(4,
8269 "nfsrv_copymr: layout returned\n");
8270 } else {
8271 lyp->lay_trycnt++;
8272 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
8273 PVFS | PCATCH, "nfsmrl", hz);
8274 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
8275 ret);
8276 if (ret == EINTR || ret == ERESTART)
8277 break;
8278 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
8279 /*
8280 * Give up after 60sec and return
8281 * ENXIO, failing the copymr.
8282 * This layout will remain on the
8283 * recalllist. It can only be cleared
8284 * by restarting the nfsd.
8285 * This seems the safe way to handle
8286 * it, since it cannot be safely copied
8287 * with an outstanding RW layout.
8288 */
8289 if (lyp->lay_trycnt >= 60) {
8290 ret = ENXIO;
8291 break;
8292 }
8293 if (didprintf == 0) {
8294 printf("nfsrv_copymr: layout "
8295 "not returned\n");
8296 didprintf = 1;
8297 }
8298 }
8299 }
8300 goto tryagain2;
8301 }
8302 }
8303 NFSDRECALLUNLOCK();
8304 /* We can now get rid of the layouts that have been returned. */
8305 TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
8306 nfsrv_freelayout(&thl, lyp);
8307
8308 /*
8309 * Do the vn_start_write() calls here, before the MDS vnode is
8310 * locked and the tvp is created (locked) in the NFS file system
8311 * that dvp is in.
8312 * For tvmp, this probably isn't necessary, since it will be an
8313 * NFS mount and they are not suspendable at this time.
8314 */
8315 if (ret == 0)
8316 ret = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
8317 if (ret == 0) {
8318 tvmp = dvp->v_mount;
8319 ret = vn_start_write(NULL, &tvmp, V_WAIT | V_PCATCH);
8320 }
8321
8322 /*
8323 * LK_EXCLUSIVE lock the MDS vnode, so that any
8324 * proxied writes through the MDS will be blocked until we have
8325 * completed the copy and update of the extended attributes.
8326 * This will also ensure that any attributes and ACL will not be
8327 * changed until the copy is complete.
8328 */
8329 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
8330 if (ret == 0 && VN_IS_DOOMED(vp)) {
8331 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
8332 ret = ESTALE;
8333 }
8334
8335 /* Create the data file on the recovered DS. */
8336 if (ret == 0)
8337 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
8338
8339 /* Copy the DS file, if created successfully. */
8340 if (ret == 0) {
8341 /*
8342 * Get any NFSv4 ACL on the MDS file, so that it can be set
8343 * on the new DS file.
8344 */
8345 aclp = acl_alloc(M_WAITOK | M_ZERO);
8346 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
8347 if (retacl != 0 && retacl != ENOATTR)
8348 NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
8349 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
8350 /* Malloc a block of 0s used to check for holes. */
8351 if (nfsrv_zeropnfsdat == NULL)
8352 nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
8353 M_WAITOK | M_ZERO);
8354 rdpos = wrpos = 0;
8355 ret = VOP_GETATTR(fvp, &va, cred);
8356 aresid = 0;
8357 while (ret == 0 && aresid == 0) {
8358 ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
8359 rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
8360 &aresid, p);
8361 xfer = PNFSDS_COPYSIZ - aresid;
8362 if (ret == 0 && xfer > 0) {
8363 rdpos += xfer;
8364 /*
8365 * Skip the write for holes, except for the
8366 * last block.
8367 */
8368 if (xfer < PNFSDS_COPYSIZ || rdpos ==
8369 va.va_size || NFSBCMP(dat,
8370 nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
8371 ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
8372 wrpos, UIO_SYSSPACE, IO_NODELOCKED,
8373 cred, NULL, NULL, p);
8374 if (ret == 0)
8375 wrpos += xfer;
8376 }
8377 }
8378
8379 /* If there is an ACL and the copy succeeded, set the ACL. */
8380 if (ret == 0 && retacl == 0) {
8381 ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
8382 /*
8383 * Don't consider these as errors, since VOP_GETACL()
8384 * can return an ACL when they are not actually
8385 * supported. For example, for UFS, VOP_GETACL()
8386 * will return a trivial ACL based on the uid/gid/mode
8387 * when there is no ACL on the file.
8388 * This case should be recognized as a trivial ACL
8389 * by UFS's VOP_SETACL() and succeed, but...
8390 */
8391 if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
8392 ret = 0;
8393 }
8394
8395 if (ret == 0)
8396 ret = VOP_FSYNC(tvp, MNT_WAIT, p);
8397
8398 /* Set the DS data file's modify time that of the MDS file. */
8399 if (ret == 0)
8400 ret = VOP_GETATTR(vp, &va, cred);
8401 if (ret == 0) {
8402 mtime = va.va_mtime;
8403 VATTR_NULL(&va);
8404 va.va_mtime = mtime;
8405 ret = VOP_SETATTR(tvp, &va, cred);
8406 }
8407
8408 vput(tvp);
8409 acl_free(aclp);
8410 free(dat, M_TEMP);
8411 }
8412 if (tvmp != NULL)
8413 vn_finished_write(tvmp);
8414
8415 /* Update the extended attributes for the newly created DS file. */
8416 if (ret == 0)
8417 ret = vn_extattr_set(vp, IO_NODELOCKED,
8418 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
8419 sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
8420 if (mp != NULL)
8421 vn_finished_write(mp);
8422
8423 /* Get rid of the dontlist entry, so that Layouts can be issued. */
8424 NFSDDONTLISTLOCK();
8425 LIST_REMOVE(mrp, nfsmr_list);
8426 NFSDDONTLISTUNLOCK();
8427 free(mrp, M_NFSDSTATE);
8428 return (ret);
8429 }
8430
8431 /*
8432 * Create a data storage file on the recovered DS.
8433 */
8434 static int
nfsrv_createdsfile(vnode_t vp,fhandle_t * fhp,struct pnfsdsfile * pf,vnode_t dvp,struct nfsdevice * ds,struct ucred * cred,NFSPROC_T * p,vnode_t * tvpp)8435 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
8436 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
8437 vnode_t *tvpp)
8438 {
8439 struct vattr va, nva;
8440 int error;
8441
8442 /* Make data file name based on FH. */
8443 error = VOP_GETATTR(vp, &va, cred);
8444 if (error == 0) {
8445 /* Set the attributes for "vp" to Setattr the DS vp. */
8446 VATTR_NULL(&nva);
8447 nva.va_uid = va.va_uid;
8448 nva.va_gid = va.va_gid;
8449 nva.va_mode = va.va_mode;
8450 nva.va_size = 0;
8451 VATTR_NULL(&va);
8452 va.va_type = VREG;
8453 va.va_mode = nva.va_mode;
8454 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
8455 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
8456 pf->dsf_filename, cred, p, tvpp);
8457 }
8458 return (error);
8459 }
8460
8461 /*
8462 * Look up the MDS file shared locked, and then get the extended attribute
8463 * to find the extant DS file to be copied to the new mirror.
8464 * If successful, *vpp is set to the MDS file's vp and *nvpp is
8465 * set to a DS data file for the MDS file, both exclusively locked.
8466 * The "buf" argument has the pnfsdsfile structure from the MDS file
8467 * in it and buflen is set to its length.
8468 */
8469 int
nfsrv_mdscopymr(char * mdspathp,char * dspathp,char * curdspathp,char * buf,int * buflenp,char * fname,NFSPROC_T * p,struct vnode ** vpp,struct vnode ** nvpp,struct pnfsdsfile ** pfp,struct nfsdevice ** dsp,struct nfsdevice ** fdsp)8470 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
8471 int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
8472 struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
8473 struct nfsdevice **fdsp)
8474 {
8475 struct nameidata nd;
8476 struct vnode *vp, *curvp;
8477 struct pnfsdsfile *pf;
8478 struct nfsmount *nmp, *curnmp;
8479 int dsdir, error, ippos;
8480
8481 vp = NULL;
8482 curvp = NULL;
8483 curnmp = NULL;
8484 *dsp = NULL;
8485 *fdsp = NULL;
8486 if (dspathp == NULL && curdspathp != NULL)
8487 return (EPERM);
8488
8489 /*
8490 * Look up the MDS file shared locked. The lock will be upgraded
8491 * to an exclusive lock after any rw layouts have been returned.
8492 */
8493 NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
8494 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
8495 mdspathp);
8496 error = namei(&nd);
8497 NFSD_DEBUG(4, "lookup=%d\n", error);
8498 if (error != 0)
8499 return (error);
8500 NDFREE_PNBUF(&nd);
8501 if (nd.ni_vp->v_type != VREG) {
8502 vput(nd.ni_vp);
8503 NFSD_DEBUG(4, "mdspath not reg\n");
8504 return (EISDIR);
8505 }
8506 vp = nd.ni_vp;
8507
8508 if (curdspathp != NULL) {
8509 /*
8510 * Look up the current DS path and find the nfsdev structure for
8511 * it.
8512 */
8513 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
8514 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8515 UIO_SYSSPACE, curdspathp);
8516 error = namei(&nd);
8517 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8518 if (error != 0) {
8519 vput(vp);
8520 return (error);
8521 }
8522 NDFREE_PNBUF(&nd);
8523 if (nd.ni_vp->v_type != VDIR) {
8524 vput(nd.ni_vp);
8525 vput(vp);
8526 NFSD_DEBUG(4, "curdspath not dir\n");
8527 return (ENOTDIR);
8528 }
8529 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8530 vput(nd.ni_vp);
8531 vput(vp);
8532 NFSD_DEBUG(4, "curdspath not an NFS mount\n");
8533 return (ENXIO);
8534 }
8535 curnmp = VFSTONFS(nd.ni_vp->v_mount);
8536
8537 /* Search the nfsdev list for a match. */
8538 NFSDDSLOCK();
8539 *fdsp = nfsv4_findmirror(curnmp);
8540 NFSDDSUNLOCK();
8541 if (*fdsp == NULL)
8542 curnmp = NULL;
8543 if (curnmp == NULL) {
8544 vput(nd.ni_vp);
8545 vput(vp);
8546 NFSD_DEBUG(4, "mdscopymr: no current ds\n");
8547 return (ENXIO);
8548 }
8549 curvp = nd.ni_vp;
8550 }
8551
8552 if (dspathp != NULL) {
8553 /* Look up the nfsdev path and find the nfsdev structure. */
8554 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
8555 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8556 UIO_SYSSPACE, dspathp);
8557 error = namei(&nd);
8558 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8559 if (error != 0) {
8560 vput(vp);
8561 if (curvp != NULL)
8562 vput(curvp);
8563 return (error);
8564 }
8565 NDFREE_PNBUF(&nd);
8566 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
8567 vput(nd.ni_vp);
8568 vput(vp);
8569 if (curvp != NULL)
8570 vput(curvp);
8571 NFSD_DEBUG(4, "dspath not dir\n");
8572 if (nd.ni_vp == curvp)
8573 return (EPERM);
8574 return (ENOTDIR);
8575 }
8576 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8577 vput(nd.ni_vp);
8578 vput(vp);
8579 if (curvp != NULL)
8580 vput(curvp);
8581 NFSD_DEBUG(4, "dspath not an NFS mount\n");
8582 return (ENXIO);
8583 }
8584 nmp = VFSTONFS(nd.ni_vp->v_mount);
8585
8586 /*
8587 * Search the nfsdevice list for a match. If curnmp == NULL,
8588 * this is a recovery and there must be a mirror.
8589 */
8590 NFSDDSLOCK();
8591 if (curnmp == NULL)
8592 *dsp = nfsrv_findmirroredds(nmp);
8593 else
8594 *dsp = nfsv4_findmirror(nmp);
8595 NFSDDSUNLOCK();
8596 if (*dsp == NULL) {
8597 vput(nd.ni_vp);
8598 vput(vp);
8599 if (curvp != NULL)
8600 vput(curvp);
8601 NFSD_DEBUG(4, "mdscopymr: no ds\n");
8602 return (ENXIO);
8603 }
8604 } else {
8605 nd.ni_vp = NULL;
8606 nmp = NULL;
8607 }
8608
8609 /*
8610 * Get a vp for an available DS data file using the extended
8611 * attribute on the MDS file.
8612 * If there is a valid entry for the new DS in the extended attribute
8613 * on the MDS file (as checked via the nmp argument),
8614 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
8615 */
8616 error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, NULL, NULL, NULL, p,
8617 NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
8618 if (curvp != NULL)
8619 vput(curvp);
8620 if (nd.ni_vp == NULL) {
8621 if (error == 0 && nmp != NULL) {
8622 /* Search the nfsdev list for a match. */
8623 NFSDDSLOCK();
8624 *dsp = nfsrv_findmirroredds(nmp);
8625 NFSDDSUNLOCK();
8626 }
8627 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
8628 if (nvpp != NULL && *nvpp != NULL) {
8629 vput(*nvpp);
8630 *nvpp = NULL;
8631 }
8632 error = ENXIO;
8633 }
8634 } else
8635 vput(nd.ni_vp);
8636
8637 /*
8638 * When dspathp != NULL and curdspathp == NULL, this is a recovery
8639 * and is only allowed if there is a 0.0.0.0 IP address entry.
8640 * When curdspathp != NULL, the ippos will be set to that entry.
8641 */
8642 if (error == 0 && dspathp != NULL && ippos == -1) {
8643 if (nvpp != NULL && *nvpp != NULL) {
8644 vput(*nvpp);
8645 *nvpp = NULL;
8646 }
8647 error = ENXIO;
8648 }
8649 if (error == 0) {
8650 *vpp = vp;
8651
8652 pf = (struct pnfsdsfile *)buf;
8653 if (ippos == -1) {
8654 /* If no zeroip pnfsdsfile, add one. */
8655 ippos = *buflenp / sizeof(*pf);
8656 *buflenp += sizeof(*pf);
8657 pf += ippos;
8658 pf->dsf_dir = dsdir;
8659 strlcpy(pf->dsf_filename, fname,
8660 sizeof(pf->dsf_filename));
8661 } else
8662 pf += ippos;
8663 *pfp = pf;
8664 } else
8665 vput(vp);
8666 return (error);
8667 }
8668
8669 /*
8670 * Search for a matching pnfsd mirror device structure, base on the nmp arg.
8671 * Return one if found, NULL otherwise.
8672 */
8673 static struct nfsdevice *
nfsrv_findmirroredds(struct nfsmount * nmp)8674 nfsrv_findmirroredds(struct nfsmount *nmp)
8675 {
8676 struct nfsdevice *ds, *fndds;
8677 int fndmirror;
8678
8679 mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
8680 /*
8681 * Search the DS server list for a match with nmp.
8682 * Remove the DS entry if found and there is a mirror.
8683 */
8684 fndds = NULL;
8685 fndmirror = 0;
8686 if (nfsrv_devidcnt == 0)
8687 return (fndds);
8688 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8689 if (ds->nfsdev_nmp == nmp) {
8690 NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
8691 fndds = ds;
8692 break;
8693 }
8694 }
8695 if (fndds == NULL)
8696 return (fndds);
8697 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
8698 fndmirror = 1;
8699 else if (fndds->nfsdev_mdsisset != 0) {
8700 /* For the fsid is set case, search for a mirror. */
8701 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8702 if (ds != fndds && ds->nfsdev_nmp != NULL &&
8703 ds->nfsdev_mdsisset != 0 &&
8704 fsidcmp(&ds->nfsdev_mdsfsid,
8705 &fndds->nfsdev_mdsfsid) == 0) {
8706 fndmirror = 1;
8707 break;
8708 }
8709 }
8710 }
8711 if (fndmirror == 0) {
8712 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
8713 return (NULL);
8714 }
8715 return (fndds);
8716 }
8717
8718 /*
8719 * Mark the appropriate devid and all associated layout as "out of space".
8720 */
8721 void
nfsrv_marknospc(char * devid,bool setit)8722 nfsrv_marknospc(char *devid, bool setit)
8723 {
8724 struct nfsdevice *ds;
8725 struct nfslayout *lyp;
8726 struct nfslayouthash *lhyp;
8727 int i;
8728
8729 NFSDDSLOCK();
8730 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8731 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
8732 NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
8733 ds->nfsdev_nospc = setit;
8734 }
8735 }
8736 NFSDDSUNLOCK();
8737
8738 for (i = 0; i < nfsrv_layouthashsize; i++) {
8739 lhyp = &nfslayouthash[i];
8740 NFSLOCKLAYOUT(lhyp);
8741 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8742 if (NFSBCMP(lyp->lay_deviceid, devid,
8743 NFSX_V4DEVICEID) == 0) {
8744 NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
8745 setit);
8746 if (setit)
8747 lyp->lay_flags |= NFSLAY_NOSPC;
8748 else
8749 lyp->lay_flags &= ~NFSLAY_NOSPC;
8750 }
8751 }
8752 NFSUNLOCKLAYOUT(lhyp);
8753 }
8754 }
8755
8756 /*
8757 * Check to see if SP4_MACH_CRED is in use and, if it is, check that the
8758 * correct machine credential is being used.
8759 */
8760 static int
nfsrv_checkmachcred(int op,struct nfsrv_descript * nd,struct nfsclient * clp)8761 nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
8762 {
8763
8764 if ((clp->lc_flags & LCL_MACHCRED) == 0 ||
8765 !NFSISSET_OPBIT(&clp->lc_mustops, op))
8766 return (0);
8767 KASSERT((nd->nd_flag & ND_NFSV41) != 0,
8768 ("nfsrv_checkmachcred: MachCred for NFSv4.0"));
8769 if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
8770 nd->nd_princlen == clp->lc_namelen &&
8771 !NFSBCMP(nd->nd_principal, clp->lc_name, nd->nd_princlen))
8772 return (0);
8773 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
8774 }
8775
8776 /*
8777 * Issue a delegation and, optionally set rflagsp for why not.
8778 */
8779 static void
nfsrv_issuedelegation(struct vnode * vp,struct nfsclient * clp,struct nfsrv_descript * nd,int delegate,int writedeleg,int readonly,u_quad_t filerev,uint64_t rdonly,struct nfsstate ** new_delegp,struct nfsstate * new_stp,struct nfslockfile * lfp,uint32_t * rflagsp,nfsv4stateid_t * delegstateidp)8780 nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
8781 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
8782 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
8783 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
8784 nfsv4stateid_t *delegstateidp)
8785 {
8786 struct nfsstate *up_deleg, *new_deleg;
8787
8788 new_deleg = *new_delegp;
8789 up_deleg = LIST_FIRST(&lfp->lf_deleg);
8790 if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
8791 *rflagsp |= NFSV4OPEN_WDNOTWANTED;
8792 else if (nfsrv_issuedelegs == 0)
8793 *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
8794 else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
8795 *rflagsp |= NFSV4OPEN_WDRESOURCE;
8796 else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
8797 (writedeleg == 0 && (readonly == 0 ||
8798 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
8799 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
8800 LCL_CALLBACKSON) {
8801 /* Is this a downgrade attempt? */
8802 if (up_deleg != NULL && up_deleg->ls_clp == clp &&
8803 (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8804 (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
8805 *rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
8806 else
8807 *rflagsp |= NFSV4OPEN_WDCONTENTION;
8808 } else if (up_deleg != NULL &&
8809 (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
8810 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
8811 /* This is an atomic upgrade. */
8812 up_deleg->ls_stateid.seqid++;
8813 delegstateidp->seqid = up_deleg->ls_stateid.seqid;
8814 delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
8815 delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
8816 delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
8817 up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8818 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8819 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8820 nfsrv_writedelegcnt++;
8821 } else {
8822 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
8823 new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
8824 = clp->lc_clientid.lval[0];
8825 new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
8826 = clp->lc_clientid.lval[1];
8827 new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
8828 = nfsrv_nextstateindex(clp);
8829 if (writedeleg && !rdonly &&
8830 (nfsrv_writedelegifpos || !readonly) &&
8831 (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
8832 NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
8833 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8834 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8835 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8836 nfsrv_writedelegcnt++;
8837 } else {
8838 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
8839 NFSLCK_READACCESS);
8840 *rflagsp |= NFSV4OPEN_READDELEGATE;
8841 }
8842 new_deleg->ls_uid = new_stp->ls_uid;
8843 new_deleg->ls_lfp = lfp;
8844 new_deleg->ls_clp = clp;
8845 new_deleg->ls_filerev = filerev;
8846 new_deleg->ls_compref = nd->nd_compref;
8847 new_deleg->ls_lastrecall = 0;
8848 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
8849 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
8850 new_deleg, ls_hash);
8851 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
8852 *new_delegp = NULL;
8853 VNET(nfsstatsv1_p)->srvdelegates++;
8854 nfsrv_openpluslock++;
8855 nfsrv_delegatecnt++;
8856 }
8857 }
8858
8859 /*
8860 * Find and remove any delegations for the fh.
8861 */
8862 void
nfsrv_removedeleg(fhandle_t * fhp,struct nfsrv_descript * nd,NFSPROC_T * p)8863 nfsrv_removedeleg(fhandle_t *fhp, struct nfsrv_descript *nd, NFSPROC_T *p)
8864 {
8865 struct nfsclient *clp;
8866 struct nfsstate *stp, *nstp;
8867 struct nfslockfile *lfp;
8868 int error;
8869
8870 NFSLOCKSTATE();
8871 error = nfsrv_getclient(nd->nd_clientid, CLOPS_RENEW, &clp, NULL,
8872 (nfsquad_t)((u_quad_t)0), 0, nd, p);
8873 if (error == 0)
8874 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, fhp, 0);
8875 /*
8876 * Now we must free any delegations.
8877 */
8878 if (error == 0) {
8879 LIST_FOREACH_SAFE(stp, &lfp->lf_deleg, ls_file, nstp)
8880 nfsrv_freedeleg(stp);
8881 }
8882 NFSUNLOCKSTATE();
8883 }
8884