1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Rick Macklem, University of Guelph
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include <sys/extattr.h>
34 #include <fs/nfs/nfsport.h>
35
36 int nfsrv_issuedelegs = 0;
37 int nfsrv_dolocallocks = 0;
38 struct nfsv4lock nfsv4rootfs_lock;
39 time_t nfsdev_time = 0;
40 int nfsrv_layouthashsize;
41 volatile int nfsrv_layoutcnt = 0;
42
43 NFSD_VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst);
44
45 NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
46 NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
47
48 extern uint32_t nfs_srvmaxio;
49 extern int nfsrv_lease;
50 extern struct timeval nfsboottime;
51 extern u_int32_t newnfs_true, newnfs_false;
52 extern struct mtx nfsrv_dslock_mtx;
53 extern struct mtx nfsrv_recalllock_mtx;
54 extern struct mtx nfsrv_dontlistlock_mtx;
55 extern int nfsd_debuglevel;
56 extern u_int nfsrv_dsdirsize;
57 extern struct nfsdevicehead nfsrv_devidhead;
58 extern int nfsrv_doflexfile;
59 extern int nfsrv_maxpnfsmirror;
60 NFSV4ROOTLOCKMUTEX;
61 NFSSTATESPINLOCK;
62 extern struct nfsdontlisthead nfsrv_dontlisthead;
63 extern volatile int nfsrv_devidcnt;
64 extern struct nfslayouthead nfsrv_recalllisthead;
65 extern char *nfsrv_zeropnfsdat;
66
67 SYSCTL_DECL(_vfs_nfsd);
68 int nfsrv_statehashsize = NFSSTATEHASHSIZE;
69 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
70 &nfsrv_statehashsize, 0,
71 "Size of state hash table set via loader.conf");
72
73 int nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
74 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
75 &nfsrv_clienthashsize, 0,
76 "Size of client hash table set via loader.conf");
77
78 int nfsrv_lockhashsize = NFSLOCKHASHSIZE;
79 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
80 &nfsrv_lockhashsize, 0,
81 "Size of file handle hash table set via loader.conf");
82
83 int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
84 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
85 &nfsrv_sessionhashsize, 0,
86 "Size of session hash table set via loader.conf");
87
88 int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
89 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
90 &nfsrv_layouthighwater, 0,
91 "High water mark for number of layouts set via loader.conf");
92
93 static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
94 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
95 &nfsrv_v4statelimit, 0,
96 "High water limit for NFSv4 opens+locks+delegations");
97
98 static int nfsrv_writedelegifpos = 0;
99 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
100 &nfsrv_writedelegifpos, 0,
101 "Issue a write delegation for read opens if possible");
102
103 static int nfsrv_allowreadforwriteopen = 1;
104 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
105 &nfsrv_allowreadforwriteopen, 0,
106 "Allow Reads to be done with Write Access StateIDs");
107
108 int nfsrv_pnfsatime = 0;
109 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
110 &nfsrv_pnfsatime, 0,
111 "For pNFS service, do Getattr ops to keep atime up-to-date");
112
113 int nfsrv_flexlinuxhack = 0;
114 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
115 &nfsrv_flexlinuxhack, 0,
116 "For Linux clients, hack around Flex File Layout bug");
117
118 NFSD_VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false;
119 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace,
120 CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_disable_grace),
121 0, "Disable grace for testing");
122
123 /*
124 * Hash lists for nfs V4.
125 */
126 NFSD_VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash);
127 NFSD_VNET_DEFINE(struct nfslockhashhead *, nfslockhash);
128 NFSD_VNET_DEFINE(struct nfssessionhash *, nfssessionhash);
129
130 struct nfslayouthash *nfslayouthash;
131 volatile int nfsrv_dontlistlen = 0;
132
133 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
134 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
135 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
136 static int nfsrv_nogsscallback = 0;
137 static volatile int nfsrv_writedelegcnt = 0;
138 static int nfsrv_faildscnt;
139
140 NFSD_VNET_DEFINE_STATIC(time_t, nfsrvboottime);
141
142 /* local functions */
143 static void nfsrv_dumpaclient(struct nfsclient *clp,
144 struct nfsd_dumpclients *dumpp);
145 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
146 NFSPROC_T *p);
147 static void nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
148 NFSPROC_T *p);
149 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
150 NFSPROC_T *p);
151 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
152 int cansleep, NFSPROC_T *p);
153 static void nfsrv_freenfslock(struct nfslock *lop);
154 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
155 static void nfsrv_freedeleg(struct nfsstate *);
156 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
157 u_int32_t flags, struct nfsstate **stpp);
158 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
159 struct nfsstate **stpp);
160 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
161 struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
162 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
163 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
164 static void nfsrv_insertlock(struct nfslock *new_lop,
165 struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
166 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
167 struct nfslock **other_lopp, struct nfslockfile *lfp);
168 static int nfsrv_getipnumber(u_char *cp);
169 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
170 nfsv4stateid_t *stateidp, int specialid);
171 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
172 u_int32_t flags);
173 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
174 nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
175 struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
176 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
177 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
178 int *slotposp);
179 static u_int32_t nfsrv_nextclientindex(void);
180 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
181 static void nfsrv_markstable(struct nfsclient *clp);
182 static void nfsrv_markreclaim(struct nfsclient *clp);
183 static int nfsrv_checkstable(struct nfsclient *clp);
184 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
185 vnode *vp, NFSPROC_T *p);
186 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
187 NFSPROC_T *p, vnode_t vp);
188 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
189 struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
190 static int nfsrv_notsamecredname(int op, struct nfsrv_descript *nd,
191 struct nfsclient *clp);
192 static time_t nfsrv_leaseexpiry(void);
193 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
194 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
195 struct nfsstate *stp, struct nfsrvcache *op);
196 static int nfsrv_nootherstate(struct nfsstate *stp);
197 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
198 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
199 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
200 uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
201 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
202 int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
203 NFSPROC_T *p);
204 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
205 NFSPROC_T *p);
206 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
207 uint64_t first, uint64_t end);
208 static void nfsrv_locklf(struct nfslockfile *lfp);
209 static void nfsrv_unlocklf(struct nfslockfile *lfp);
210 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
211 static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
212 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp);
213 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
214 int dont_replycache, struct nfsdsession **sepp, int *slotposp);
215 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
216 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
217 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
218 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
219 static void nfsrv_freelayoutlist(nfsquad_t clientid);
220 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
221 int iomode);
222 static void nfsrv_freealllayouts(void);
223 static void nfsrv_freedevid(struct nfsdevice *ds);
224 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
225 struct nfsdevice **dsp);
226 static void nfsrv_deleteds(struct nfsdevice *fndds);
227 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
228 static void nfsrv_freealldevids(void);
229 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
230 int maxcnt, NFSPROC_T *p);
231 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
232 fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
233 NFSPROC_T *p);
234 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
235 NFSPROC_T *, struct nfslayout **lypp);
236 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
237 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
238 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
239 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
240 int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
241 static int nfsrv_dontlayout(fhandle_t *fhp);
242 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
243 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
244 vnode_t *tvpp);
245 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
246 static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
247 struct nfsclient *clp);
248 static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
249 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
250 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
251 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
252 nfsv4stateid_t *delegstateidp);
253 static void nfsrv_clientlock(bool mlocked);
254 static void nfsrv_clientunlock(bool mlocked);
255
256 /*
257 * Lock the client structure, either with the mutex or the exclusive nfsd lock.
258 */
259 static void
nfsrv_clientlock(bool mlocked)260 nfsrv_clientlock(bool mlocked)
261 {
262 int igotlock;
263
264 if (mlocked) {
265 NFSLOCKSTATE();
266 } else {
267 NFSLOCKV4ROOTMUTEX();
268 nfsv4_relref(&nfsv4rootfs_lock);
269 do {
270 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
271 NFSV4ROOTLOCKMUTEXPTR, NULL);
272 } while (!igotlock);
273 NFSUNLOCKV4ROOTMUTEX();
274 }
275 }
276
277 /*
278 * Unlock the client structure.
279 */
280 static void
nfsrv_clientunlock(bool mlocked)281 nfsrv_clientunlock(bool mlocked)
282 {
283
284 if (mlocked) {
285 NFSUNLOCKSTATE();
286 } else {
287 NFSLOCKV4ROOTMUTEX();
288 nfsv4_unlock(&nfsv4rootfs_lock, 1);
289 NFSUNLOCKV4ROOTMUTEX();
290 }
291 }
292
293 /*
294 * Scan the client list for a match and either return the current one,
295 * create a new entry or return an error.
296 * If returning a non-error, the clp structure must either be linked into
297 * the client list or free'd.
298 */
299 int
nfsrv_setclient(struct nfsrv_descript * nd,struct nfsclient ** new_clpp,nfsquad_t * clientidp,nfsquad_t * confirmp,NFSPROC_T * p)300 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
301 nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
302 {
303 struct nfsclient *clp = NULL, *new_clp = *new_clpp;
304 int i, error = 0, ret;
305 struct nfsstate *stp, *tstp;
306 #ifdef INET
307 struct sockaddr_in *sin, *rin;
308 #endif
309 #ifdef INET6
310 struct sockaddr_in6 *sin6, *rin6;
311 #endif
312 struct nfsdsession *sep, *nsep;
313 SVCXPRT *old_xprt;
314 struct nfssessionhead old_sess;
315 int zapit = 0, gotit, hasstate = 0;
316 bool mlocked;
317 static u_int64_t confirm_index = 0;
318
319 /*
320 * Check for state resource limit exceeded.
321 */
322 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
323 error = NFSERR_RESOURCE;
324 goto out;
325 }
326
327 if (nfsrv_issuedelegs == 0 ||
328 ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
329 /*
330 * Don't do callbacks when delegations are disabled or
331 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
332 * If establishing a callback connection is attempted
333 * when a firewall is blocking the callback path, the
334 * server may wait too long for the connect attempt to
335 * succeed during the Open. Some clients, such as Linux,
336 * may timeout and give up on the Open before the server
337 * replies. Also, since AUTH_GSS callbacks are not
338 * yet interoperability tested, they might cause the
339 * server to crap out, if they get past the Init call to
340 * the client.
341 */
342 new_clp->lc_program = 0;
343
344 mlocked = true;
345 if (nfsrv_dolocallocks != 0)
346 mlocked = false;
347 /* Lock out other nfsd threads */
348 nfsrv_clientlock(mlocked);
349
350 /*
351 * Search for a match in the client list.
352 */
353 gotit = i = 0;
354 while (i < nfsrv_clienthashsize && !gotit) {
355 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
356 if (new_clp->lc_idlen == clp->lc_idlen &&
357 !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
358 gotit = 1;
359 break;
360 }
361 }
362 if (gotit == 0)
363 i++;
364 }
365 old_xprt = NULL;
366 if (!gotit ||
367 (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
368 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
369 /*
370 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
371 * client is trying to update a confirmed clientid.
372 */
373 nfsrv_clientunlock(mlocked);
374 confirmp->lval[1] = 0;
375 error = NFSERR_NOENT;
376 goto out;
377 }
378 /*
379 * Get rid of the old one.
380 */
381 if (i != nfsrv_clienthashsize) {
382 LIST_REMOVE(clp, lc_hash);
383 if (mlocked)
384 nfsrv_cleanclient(clp, p, true, &old_xprt);
385 else
386 nfsrv_cleanclient(clp, p, false, NULL);
387 nfsrv_freedeleglist(&clp->lc_deleg);
388 nfsrv_freedeleglist(&clp->lc_olddeleg);
389 zapit = 1;
390 }
391 /*
392 * Add it after assigning a client id to it.
393 */
394 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
395 if ((nd->nd_flag & ND_NFSV41) != 0) {
396 confirmp->lval[0] = ++confirm_index;
397 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
398 } else
399 confirmp->qval = new_clp->lc_confirm.qval =
400 ++confirm_index;
401 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
402 NFSD_VNET(nfsrvboottime);
403 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
404 nfsrv_nextclientindex();
405 new_clp->lc_stateindex = 0;
406 new_clp->lc_statemaxindex = 0;
407 new_clp->lc_prevsess = 0;
408 new_clp->lc_cbref = 0;
409 new_clp->lc_expiry = nfsrv_leaseexpiry();
410 LIST_INIT(&new_clp->lc_open);
411 LIST_INIT(&new_clp->lc_deleg);
412 LIST_INIT(&new_clp->lc_olddeleg);
413 LIST_INIT(&new_clp->lc_session);
414 for (i = 0; i < nfsrv_statehashsize; i++)
415 LIST_INIT(&new_clp->lc_stateid[i]);
416 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
417 lc_hash);
418 NFSD_VNET(nfsstatsv1_p)->srvclients++;
419 nfsrv_openpluslock++;
420 nfsrv_clients++;
421 nfsrv_clientunlock(mlocked);
422 if (zapit != 0) {
423 if (old_xprt != NULL)
424 SVC_RELEASE(old_xprt);
425 nfsrv_zapclient(clp, p);
426 }
427 *new_clpp = NULL;
428 goto out;
429 }
430
431 /*
432 * Now, handle the cases where the id is already issued.
433 */
434 if (nfsrv_notsamecredname(NFSV4OP_EXCHANGEID, nd, clp)) {
435 /*
436 * Check to see if there is expired state that should go away.
437 */
438 if (clp->lc_expiry < NFSD_MONOSEC &&
439 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
440 if (mlocked)
441 nfsrv_cleanclient(clp, p, true, &old_xprt);
442 else
443 nfsrv_cleanclient(clp, p, false, NULL);
444 nfsrv_freedeleglist(&clp->lc_deleg);
445 }
446
447 /*
448 * If there is outstanding state, then reply NFSERR_CLIDINUSE per
449 * RFC3530 Sec. 8.1.2 last para.
450 */
451 if (!LIST_EMPTY(&clp->lc_deleg)) {
452 hasstate = 1;
453 } else if (LIST_EMPTY(&clp->lc_open)) {
454 hasstate = 0;
455 } else {
456 hasstate = 0;
457 /* Look for an Open on the OpenOwner */
458 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
459 if (!LIST_EMPTY(&stp->ls_open)) {
460 hasstate = 1;
461 break;
462 }
463 }
464 }
465 if (hasstate) {
466 /*
467 * If the uid doesn't match, return NFSERR_CLIDINUSE after
468 * filling out the correct ipaddr and portnum.
469 */
470 switch (clp->lc_req.nr_nam->sa_family) {
471 #ifdef INET
472 case AF_INET:
473 sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
474 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
475 sin->sin_addr.s_addr = rin->sin_addr.s_addr;
476 sin->sin_port = rin->sin_port;
477 break;
478 #endif
479 #ifdef INET6
480 case AF_INET6:
481 sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
482 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
483 sin6->sin6_addr = rin6->sin6_addr;
484 sin6->sin6_port = rin6->sin6_port;
485 break;
486 #endif
487 }
488 nfsrv_clientunlock(mlocked);
489 if (old_xprt != NULL)
490 SVC_RELEASE(old_xprt);
491 error = NFSERR_CLIDINUSE;
492 goto out;
493 }
494 }
495
496 if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
497 /*
498 * If the verifier has changed, the client has rebooted
499 * and a new client id is issued. The old state info
500 * can be thrown away once the SetClientID_Confirm or
501 * Create_Session that confirms the clientid occurs.
502 */
503 LIST_REMOVE(clp, lc_hash);
504
505 LIST_NEWHEAD(&old_sess, &clp->lc_session, sess_list);
506
507 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
508 if ((nd->nd_flag & ND_NFSV41) != 0) {
509 confirmp->lval[0] = ++confirm_index;
510 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
511 } else
512 confirmp->qval = new_clp->lc_confirm.qval =
513 ++confirm_index;
514 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
515 NFSD_VNET(nfsrvboottime);
516 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
517 nfsrv_nextclientindex();
518 new_clp->lc_stateindex = 0;
519 new_clp->lc_statemaxindex = 0;
520 new_clp->lc_prevsess = 0;
521 new_clp->lc_cbref = 0;
522 new_clp->lc_expiry = nfsrv_leaseexpiry();
523
524 /*
525 * Save the state until confirmed.
526 */
527 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
528 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
529 tstp->ls_clp = new_clp;
530 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
531 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
532 tstp->ls_clp = new_clp;
533 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
534 ls_list);
535 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
536 tstp->ls_clp = new_clp;
537 for (i = 0; i < nfsrv_statehashsize; i++) {
538 LIST_NEWHEAD(&new_clp->lc_stateid[i],
539 &clp->lc_stateid[i], ls_hash);
540 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
541 tstp->ls_clp = new_clp;
542 }
543 LIST_INIT(&new_clp->lc_session);
544 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
545 lc_hash);
546 NFSD_VNET(nfsstatsv1_p)->srvclients++;
547 nfsrv_openpluslock++;
548 nfsrv_clients++;
549 if (!mlocked) {
550 nfsrv_clientunlock(mlocked);
551 NFSLOCKSTATE();
552 }
553
554 /*
555 * Must wait until any outstanding callback on the old clp
556 * completes.
557 */
558 while (clp->lc_cbref) {
559 clp->lc_flags |= LCL_WAKEUPWANTED;
560 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
561 "nfsd clp", 10 * hz);
562 }
563 NFSUNLOCKSTATE();
564 if (old_xprt != NULL)
565 SVC_RELEASE(old_xprt);
566 /* Get rid of all sessions on this clientid. */
567 LIST_FOREACH_SAFE(sep, &old_sess, sess_list, nsep) {
568 ret = nfsrv_freesession(NULL, sep, NULL, false, NULL);
569 if (ret != 0)
570 printf("nfsrv_setclient: verifier changed free"
571 " session failed=%d\n", ret);
572 }
573
574 nfsrv_zapclient(clp, p);
575 *new_clpp = NULL;
576 goto out;
577 }
578
579 /* For NFSv4.1, mark that we found a confirmed clientid. */
580 if ((nd->nd_flag & ND_NFSV41) != 0) {
581 clientidp->lval[0] = clp->lc_clientid.lval[0];
582 clientidp->lval[1] = clp->lc_clientid.lval[1];
583 confirmp->lval[0] = 0; /* Ignored by client */
584 confirmp->lval[1] = 1;
585 } else {
586 /*
587 * id and verifier match, so update the net address info
588 * and get rid of any existing callback authentication
589 * handle, so a new one will be acquired.
590 */
591 LIST_REMOVE(clp, lc_hash);
592 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
593 new_clp->lc_expiry = nfsrv_leaseexpiry();
594 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
595 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
596 clp->lc_clientid.lval[0];
597 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
598 clp->lc_clientid.lval[1];
599 new_clp->lc_delegtime = clp->lc_delegtime;
600 new_clp->lc_stateindex = clp->lc_stateindex;
601 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
602 new_clp->lc_cbref = 0;
603 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
604 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
605 tstp->ls_clp = new_clp;
606 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
607 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
608 tstp->ls_clp = new_clp;
609 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
610 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
611 tstp->ls_clp = new_clp;
612 for (i = 0; i < nfsrv_statehashsize; i++) {
613 LIST_NEWHEAD(&new_clp->lc_stateid[i],
614 &clp->lc_stateid[i], ls_hash);
615 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
616 tstp->ls_clp = new_clp;
617 }
618 LIST_INIT(&new_clp->lc_session);
619 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
620 lc_hash);
621 NFSD_VNET(nfsstatsv1_p)->srvclients++;
622 nfsrv_openpluslock++;
623 nfsrv_clients++;
624 }
625 if (!mlocked)
626 nfsrv_clientunlock(mlocked);
627
628 if ((nd->nd_flag & ND_NFSV41) == 0) {
629 /*
630 * Must wait until any outstanding callback on the old clp
631 * completes.
632 */
633 if (!mlocked)
634 NFSLOCKSTATE();
635 while (clp->lc_cbref) {
636 clp->lc_flags |= LCL_WAKEUPWANTED;
637 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
638 "nfsdclp", 10 * hz);
639 }
640 NFSUNLOCKSTATE();
641 if (old_xprt != NULL)
642 SVC_RELEASE(old_xprt);
643 nfsrv_zapclient(clp, p);
644 *new_clpp = NULL;
645 } else {
646 if (mlocked)
647 NFSUNLOCKSTATE();
648 if (old_xprt != NULL)
649 SVC_RELEASE(old_xprt);
650 }
651
652 out:
653 NFSEXITCODE2(error, nd);
654 return (error);
655 }
656
657 /*
658 * Check to see if the client id exists and optionally confirm it.
659 */
660 int
nfsrv_getclient(nfsquad_t clientid,int opflags,struct nfsclient ** clpp,struct nfsdsession * nsep,nfsquad_t confirm,uint32_t cbprogram,struct nfsrv_descript * nd,NFSPROC_T * p)661 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
662 struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
663 struct nfsrv_descript *nd, NFSPROC_T *p)
664 {
665 struct nfsclient *clp;
666 struct nfsstate *stp;
667 int i;
668 struct nfsclienthashhead *hp;
669 int error = 0, doneok, igotlock;
670 struct nfssessionhash *shp;
671 struct nfsdsession *sep;
672 uint64_t sessid[2];
673 CLIENT *client;
674 SVCXPRT *old_xprt;
675 bool mlocked, sess_replay;
676 static uint64_t next_sess = 0;
677
678 if (clpp)
679 *clpp = NULL;
680 if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
681 opflags != CLOPS_RENEW) && NFSD_VNET(nfsrvboottime) !=
682 clientid.lval[0]) {
683 error = NFSERR_STALECLIENTID;
684 goto out;
685 }
686
687 /*
688 * If called with opflags == CLOPS_RENEW, the State Lock is
689 * already held. Otherwise, we need to get either that or,
690 * for the case of Confirm, lock out the nfsd threads.
691 */
692 client = NULL;
693 old_xprt = NULL;
694 mlocked = true;
695 if (nfsrv_dolocallocks != 0)
696 mlocked = false;
697 if (opflags & CLOPS_CONFIRM) {
698 if (nsep != NULL &&
699 (nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
700 client = (struct __rpc_client *)
701 clnt_bck_create(nd->nd_xprt->xp_socket,
702 cbprogram, NFSV4_CBVERS);
703 if (mlocked) {
704 nfsrv_clientlock(mlocked);
705 } else {
706 NFSLOCKV4ROOTMUTEX();
707 nfsv4_relref(&nfsv4rootfs_lock);
708 do {
709 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1,
710 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
711 } while (!igotlock);
712 }
713 /*
714 * Create a new sessionid here, since we need to do it where
715 * there is a mutex held to serialize update of next_sess.
716 */
717 if ((nd->nd_flag & ND_NFSV41) != 0) {
718 sessid[0] = ++next_sess;
719 sessid[1] = clientid.qval;
720 }
721 if (!mlocked)
722 NFSUNLOCKV4ROOTMUTEX();
723 } else if (opflags != CLOPS_RENEW) {
724 NFSLOCKSTATE();
725 }
726
727 /* For NFSv4.1, the clp is acquired from the associated session. */
728 if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
729 opflags == CLOPS_RENEW) {
730 clp = NULL;
731 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
732 shp = NFSSESSIONHASH(nd->nd_sessionid);
733 NFSLOCKSESSION(shp);
734 sep = nfsrv_findsession(nd->nd_sessionid);
735 if (sep != NULL)
736 clp = sep->sess_clp;
737 NFSUNLOCKSESSION(shp);
738 }
739 } else {
740 hp = NFSCLIENTHASH(clientid);
741 LIST_FOREACH(clp, hp, lc_hash) {
742 if (clp->lc_clientid.lval[1] == clientid.lval[1])
743 break;
744 }
745 }
746 if (clp == NULL) {
747 if (opflags & CLOPS_CONFIRM)
748 error = NFSERR_STALECLIENTID;
749 else
750 error = NFSERR_EXPIRED;
751 } else if (clp->lc_flags & LCL_ADMINREVOKED) {
752 /*
753 * If marked admin revoked, just return the error.
754 */
755 error = NFSERR_ADMINREVOKED;
756 }
757 if (error) {
758 if (opflags & CLOPS_CONFIRM) {
759 nfsrv_clientunlock(mlocked);
760 if (client != NULL)
761 CLNT_RELEASE(client);
762 } else if (opflags != CLOPS_RENEW) {
763 NFSUNLOCKSTATE();
764 }
765 goto out;
766 }
767
768 /*
769 * Perform any operations specified by the opflags.
770 */
771 if (opflags & CLOPS_CONFIRM) {
772 sess_replay = false;
773 if ((nd->nd_flag & ND_NFSV41) != 0) {
774 /*
775 * For the case where lc_confirm.lval[0] == confirm.lval[0],
776 * use the new session, but with the previous sessionid.
777 * This is not exactly what the RFC describes, but should
778 * result in the same reply as the previous CreateSession.
779 */
780 if (clp->lc_confirm.lval[0] + 1 == confirm.lval[0]) {
781 clp->lc_confirm.lval[0] = confirm.lval[0];
782 clp->lc_prevsess = sessid[0];
783 } else if (clp->lc_confirm.lval[0] == confirm.lval[0]) {
784 if (clp->lc_prevsess == 0)
785 error = NFSERR_SEQMISORDERED;
786 else
787 sessid[0] = clp->lc_prevsess;
788 sess_replay = true;
789 } else
790 error = NFSERR_SEQMISORDERED;
791 } else if ((nd->nd_flag & ND_NFSV41) == 0 &&
792 clp->lc_confirm.qval != confirm.qval)
793 error = NFSERR_STALECLIENTID;
794 if (error == 0 && nfsrv_notsamecredname(NFSV4OP_CREATESESSION,
795 nd, clp))
796 error = NFSERR_CLIDINUSE;
797
798 if (!error) {
799 if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
800 LCL_NEEDSCONFIRM) {
801 /*
802 * Hang onto the delegations (as old delegations)
803 * for an Open with CLAIM_DELEGATE_PREV unless in
804 * grace, but get rid of the rest of the state.
805 */
806 if (mlocked)
807 nfsrv_cleanclient(clp, p, true, &old_xprt);
808 else
809 nfsrv_cleanclient(clp, p, false, NULL);
810 nfsrv_freedeleglist(&clp->lc_olddeleg);
811 if (nfsrv_checkgrace(nd, clp, 0)) {
812 /* In grace, so just delete delegations */
813 nfsrv_freedeleglist(&clp->lc_deleg);
814 } else {
815 LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
816 stp->ls_flags |= NFSLCK_OLDDELEG;
817 clp->lc_delegtime = NFSD_MONOSEC +
818 nfsrv_lease + NFSRV_LEASEDELTA;
819 LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
820 ls_list);
821 }
822 if ((nd->nd_flag & ND_NFSV41) != 0)
823 clp->lc_program = cbprogram;
824 }
825 clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
826 if (clp->lc_program)
827 clp->lc_flags |= LCL_NEEDSCBNULL;
828 /* For NFSv4.1, link the session onto the client. */
829 if (nsep != NULL) {
830 /* Hold a reference on the xprt for a backchannel. */
831 if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
832 != 0 && !sess_replay) {
833 if (clp->lc_req.nr_client == NULL) {
834 clp->lc_req.nr_client = client;
835 client = NULL;
836 }
837 if (clp->lc_req.nr_client != NULL) {
838 SVC_ACQUIRE(nd->nd_xprt);
839 CLNT_ACQUIRE(clp->lc_req.nr_client);
840 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
841 /* Disable idle timeout. */
842 nd->nd_xprt->xp_idletimeout = 0;
843 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
844 } else
845 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
846 }
847 NFSBCOPY(sessid, nsep->sess_sessionid,
848 NFSX_V4SESSIONID);
849 NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
850 NFSX_V4SESSIONID);
851 if (!sess_replay) {
852 shp = NFSSESSIONHASH(nsep->sess_sessionid);
853 if (!mlocked)
854 NFSLOCKSTATE();
855 NFSLOCKSESSION(shp);
856 LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
857 LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
858 nsep->sess_clp = clp;
859 NFSUNLOCKSESSION(shp);
860 if (!mlocked)
861 NFSUNLOCKSTATE();
862 }
863 }
864 }
865 } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
866 error = NFSERR_EXPIRED;
867 }
868
869 /*
870 * If called by the Renew Op, we must check the principal.
871 */
872 if (!error && (opflags & CLOPS_RENEWOP)) {
873 if (nfsrv_notsamecredname(0, nd, clp)) {
874 doneok = 0;
875 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
876 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
877 if ((stp->ls_flags & NFSLCK_OPEN) &&
878 stp->ls_uid == nd->nd_cred->cr_uid) {
879 doneok = 1;
880 break;
881 }
882 }
883 }
884 if (!doneok)
885 error = NFSERR_ACCES;
886 }
887 if (!error && (clp->lc_flags & LCL_CBDOWN))
888 error = NFSERR_CBPATHDOWN;
889 }
890 if ((!error || error == NFSERR_CBPATHDOWN) &&
891 (opflags & CLOPS_RENEW)) {
892 clp->lc_expiry = nfsrv_leaseexpiry();
893 }
894 if (opflags & CLOPS_CONFIRM) {
895 nfsrv_clientunlock(mlocked);
896 if (client != NULL)
897 CLNT_RELEASE(client);
898 if (old_xprt != NULL)
899 SVC_RELEASE(old_xprt);
900 } else if (opflags != CLOPS_RENEW) {
901 NFSUNLOCKSTATE();
902 }
903 if (clpp)
904 *clpp = clp;
905
906 out:
907 NFSEXITCODE2(error, nd);
908 return (error);
909 }
910
911 /*
912 * Perform the NFSv4.1 destroy clientid.
913 */
914 int
nfsrv_destroyclient(struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)915 nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
916 {
917 struct nfsclient *clp;
918 struct nfsclienthashhead *hp;
919 SVCXPRT *old_xprt;
920 int error = 0, i;
921 bool mlocked;
922
923 if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
924 error = NFSERR_STALECLIENTID;
925 goto out;
926 }
927
928 mlocked = true;
929 if (nfsrv_dolocallocks != 0)
930 mlocked = false;
931 /* Lock out other nfsd threads */
932 nfsrv_clientlock(mlocked);
933
934 hp = NFSCLIENTHASH(clientid);
935 LIST_FOREACH(clp, hp, lc_hash) {
936 if (clp->lc_clientid.lval[1] == clientid.lval[1])
937 break;
938 }
939 if (clp == NULL) {
940 nfsrv_clientunlock(mlocked);
941 /* Just return ok, since it is gone. */
942 goto out;
943 }
944
945 /* Check for the SP4_MACH_CRED case. */
946 error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
947 if (error != 0) {
948 nfsrv_clientunlock(mlocked);
949 goto out;
950 }
951
952 /*
953 * Free up all layouts on the clientid. Should the client return the
954 * layouts?
955 */
956 nfsrv_freelayoutlist(clientid);
957
958 /* Scan for state on the clientid. */
959 for (i = 0; i < nfsrv_statehashsize; i++)
960 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
961 nfsrv_clientunlock(mlocked);
962 error = NFSERR_CLIENTIDBUSY;
963 goto out;
964 }
965 if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
966 nfsrv_clientunlock(mlocked);
967 error = NFSERR_CLIENTIDBUSY;
968 goto out;
969 }
970
971 /* Destroy the clientid and return ok. */
972 old_xprt = NULL;
973 if (mlocked)
974 nfsrv_cleanclient(clp, p, true, &old_xprt);
975 else
976 nfsrv_cleanclient(clp, p, false, NULL);
977 nfsrv_freedeleglist(&clp->lc_deleg);
978 nfsrv_freedeleglist(&clp->lc_olddeleg);
979 LIST_REMOVE(clp, lc_hash);
980 nfsrv_clientunlock(mlocked);
981 if (old_xprt != NULL)
982 SVC_RELEASE(old_xprt);
983 nfsrv_zapclient(clp, p);
984 out:
985 NFSEXITCODE2(error, nd);
986 return (error);
987 }
988
989 /*
990 * Called from the new nfssvc syscall to admin revoke a clientid.
991 * Returns 0 for success, error otherwise.
992 */
993 int
nfsrv_adminrevoke(struct nfsd_clid * revokep,NFSPROC_T * p)994 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
995 {
996 struct nfsclient *clp = NULL;
997 int i, error = 0;
998 int gotit, igotlock;
999
1000 /*
1001 * First, lock out the nfsd so that state won't change while the
1002 * revocation record is being written to the stable storage restart
1003 * file.
1004 */
1005 NFSLOCKV4ROOTMUTEX();
1006 do {
1007 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
1008 NFSV4ROOTLOCKMUTEXPTR, NULL);
1009 } while (!igotlock);
1010 NFSUNLOCKV4ROOTMUTEX();
1011
1012 /*
1013 * Search for a match in the client list.
1014 */
1015 gotit = i = 0;
1016 while (i < nfsrv_clienthashsize && !gotit) {
1017 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
1018 if (revokep->nclid_idlen == clp->lc_idlen &&
1019 !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
1020 gotit = 1;
1021 break;
1022 }
1023 }
1024 i++;
1025 }
1026 if (!gotit) {
1027 NFSLOCKV4ROOTMUTEX();
1028 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1029 NFSUNLOCKV4ROOTMUTEX();
1030 error = EPERM;
1031 goto out;
1032 }
1033
1034 /*
1035 * Now, write out the revocation record
1036 */
1037 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
1038 nfsrv_backupstable();
1039
1040 /*
1041 * and clear out the state, marking the clientid revoked.
1042 */
1043 clp->lc_flags &= ~LCL_CALLBACKSON;
1044 clp->lc_flags |= LCL_ADMINREVOKED;
1045 nfsrv_cleanclient(clp, p, false, NULL);
1046 nfsrv_freedeleglist(&clp->lc_deleg);
1047 nfsrv_freedeleglist(&clp->lc_olddeleg);
1048 NFSLOCKV4ROOTMUTEX();
1049 nfsv4_unlock(&nfsv4rootfs_lock, 0);
1050 NFSUNLOCKV4ROOTMUTEX();
1051
1052 out:
1053 NFSEXITCODE(error);
1054 return (error);
1055 }
1056
1057 /*
1058 * Dump out stats for all clients. Called from nfssvc(2), that is used
1059 * nfsstatsv1.
1060 */
1061 void
nfsrv_dumpclients(struct nfsd_dumpclients * dumpp,int maxcnt)1062 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
1063 {
1064 struct nfsclient *clp;
1065 int i = 0, cnt = 0;
1066
1067 /*
1068 * First, get a reference on the nfsv4rootfs_lock so that an
1069 * exclusive lock cannot be acquired while dumping the clients.
1070 */
1071 NFSLOCKV4ROOTMUTEX();
1072 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1073 NFSUNLOCKV4ROOTMUTEX();
1074 NFSLOCKSTATE();
1075 /*
1076 * Rattle through the client lists until done.
1077 */
1078 while (i < nfsrv_clienthashsize && cnt < maxcnt) {
1079 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1080 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i]) && cnt <
1081 maxcnt) {
1082 nfsrv_dumpaclient(clp, &dumpp[cnt]);
1083 cnt++;
1084 clp = LIST_NEXT(clp, lc_hash);
1085 }
1086 i++;
1087 }
1088 if (cnt < maxcnt)
1089 dumpp[cnt].ndcl_clid.nclid_idlen = 0;
1090 NFSUNLOCKSTATE();
1091 NFSLOCKV4ROOTMUTEX();
1092 nfsv4_relref(&nfsv4rootfs_lock);
1093 NFSUNLOCKV4ROOTMUTEX();
1094 }
1095
1096 /*
1097 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
1098 */
1099 static void
nfsrv_dumpaclient(struct nfsclient * clp,struct nfsd_dumpclients * dumpp)1100 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
1101 {
1102 struct nfsstate *stp, *openstp, *lckownstp;
1103 struct nfslock *lop;
1104 sa_family_t af;
1105 #ifdef INET
1106 struct sockaddr_in *rin;
1107 #endif
1108 #ifdef INET6
1109 struct sockaddr_in6 *rin6;
1110 #endif
1111
1112 dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
1113 dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
1114 dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
1115 dumpp->ndcl_flags = clp->lc_flags;
1116 dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
1117 NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
1118 af = clp->lc_req.nr_nam->sa_family;
1119 dumpp->ndcl_addrfam = af;
1120 switch (af) {
1121 #ifdef INET
1122 case AF_INET:
1123 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
1124 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
1125 break;
1126 #endif
1127 #ifdef INET6
1128 case AF_INET6:
1129 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
1130 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
1131 break;
1132 #endif
1133 }
1134
1135 /*
1136 * Now, scan the state lists and total up the opens and locks.
1137 */
1138 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
1139 dumpp->ndcl_nopenowners++;
1140 LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
1141 dumpp->ndcl_nopens++;
1142 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
1143 dumpp->ndcl_nlockowners++;
1144 LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
1145 dumpp->ndcl_nlocks++;
1146 }
1147 }
1148 }
1149 }
1150
1151 /*
1152 * and the delegation lists.
1153 */
1154 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
1155 dumpp->ndcl_ndelegs++;
1156 }
1157 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
1158 dumpp->ndcl_nolddelegs++;
1159 }
1160 }
1161
1162 /*
1163 * Dump out lock stats for a file.
1164 */
1165 void
nfsrv_dumplocks(vnode_t vp,struct nfsd_dumplocks * ldumpp,int maxcnt,NFSPROC_T * p)1166 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
1167 NFSPROC_T *p)
1168 {
1169 struct nfsstate *stp;
1170 struct nfslock *lop;
1171 int cnt = 0;
1172 struct nfslockfile *lfp;
1173 sa_family_t af;
1174 #ifdef INET
1175 struct sockaddr_in *rin;
1176 #endif
1177 #ifdef INET6
1178 struct sockaddr_in6 *rin6;
1179 #endif
1180 int ret;
1181 fhandle_t nfh;
1182
1183 ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
1184 /*
1185 * First, get a reference on the nfsv4rootfs_lock so that an
1186 * exclusive lock on it cannot be acquired while dumping the locks.
1187 */
1188 NFSLOCKV4ROOTMUTEX();
1189 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1190 NFSUNLOCKV4ROOTMUTEX();
1191 NFSLOCKSTATE();
1192 if (!ret)
1193 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
1194 if (ret) {
1195 ldumpp[0].ndlck_clid.nclid_idlen = 0;
1196 NFSUNLOCKSTATE();
1197 NFSLOCKV4ROOTMUTEX();
1198 nfsv4_relref(&nfsv4rootfs_lock);
1199 NFSUNLOCKV4ROOTMUTEX();
1200 return;
1201 }
1202
1203 /*
1204 * For each open share on file, dump it out.
1205 */
1206 stp = LIST_FIRST(&lfp->lf_open);
1207 while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1208 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1209 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1210 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1211 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1212 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1213 ldumpp[cnt].ndlck_owner.nclid_idlen =
1214 stp->ls_openowner->ls_ownerlen;
1215 NFSBCOPY(stp->ls_openowner->ls_owner,
1216 ldumpp[cnt].ndlck_owner.nclid_id,
1217 stp->ls_openowner->ls_ownerlen);
1218 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1219 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1220 stp->ls_clp->lc_idlen);
1221 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1222 ldumpp[cnt].ndlck_addrfam = af;
1223 switch (af) {
1224 #ifdef INET
1225 case AF_INET:
1226 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1227 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1228 break;
1229 #endif
1230 #ifdef INET6
1231 case AF_INET6:
1232 rin6 = (struct sockaddr_in6 *)
1233 stp->ls_clp->lc_req.nr_nam;
1234 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1235 break;
1236 #endif
1237 }
1238 stp = LIST_NEXT(stp, ls_file);
1239 cnt++;
1240 }
1241
1242 /*
1243 * and all locks.
1244 */
1245 lop = LIST_FIRST(&lfp->lf_lock);
1246 while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1247 stp = lop->lo_stp;
1248 ldumpp[cnt].ndlck_flags = lop->lo_flags;
1249 ldumpp[cnt].ndlck_first = lop->lo_first;
1250 ldumpp[cnt].ndlck_end = lop->lo_end;
1251 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1252 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1253 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1254 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1255 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1256 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1257 stp->ls_ownerlen);
1258 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1259 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1260 stp->ls_clp->lc_idlen);
1261 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1262 ldumpp[cnt].ndlck_addrfam = af;
1263 switch (af) {
1264 #ifdef INET
1265 case AF_INET:
1266 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1267 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1268 break;
1269 #endif
1270 #ifdef INET6
1271 case AF_INET6:
1272 rin6 = (struct sockaddr_in6 *)
1273 stp->ls_clp->lc_req.nr_nam;
1274 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1275 break;
1276 #endif
1277 }
1278 lop = LIST_NEXT(lop, lo_lckfile);
1279 cnt++;
1280 }
1281
1282 /*
1283 * and the delegations.
1284 */
1285 stp = LIST_FIRST(&lfp->lf_deleg);
1286 while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1287 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1288 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1289 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1290 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1291 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1292 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1293 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1294 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1295 stp->ls_clp->lc_idlen);
1296 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1297 ldumpp[cnt].ndlck_addrfam = af;
1298 switch (af) {
1299 #ifdef INET
1300 case AF_INET:
1301 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1302 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1303 break;
1304 #endif
1305 #ifdef INET6
1306 case AF_INET6:
1307 rin6 = (struct sockaddr_in6 *)
1308 stp->ls_clp->lc_req.nr_nam;
1309 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1310 break;
1311 #endif
1312 }
1313 stp = LIST_NEXT(stp, ls_file);
1314 cnt++;
1315 }
1316
1317 /*
1318 * If list isn't full, mark end of list by setting the client name
1319 * to zero length.
1320 */
1321 if (cnt < maxcnt)
1322 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1323 NFSUNLOCKSTATE();
1324 NFSLOCKV4ROOTMUTEX();
1325 nfsv4_relref(&nfsv4rootfs_lock);
1326 NFSUNLOCKV4ROOTMUTEX();
1327 }
1328
1329 /*
1330 * Server timer routine. It can scan any linked list, so long
1331 * as it holds the spin/mutex lock and there is no exclusive lock on
1332 * nfsv4rootfs_lock.
1333 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1334 * to do this from a callout, since the spin locks work. For
1335 * Darwin, I'm not sure what will work correctly yet.)
1336 * Should be called once per second.
1337 */
1338 void
nfsrv_servertimer(void * arg __unused)1339 nfsrv_servertimer(void *arg __unused)
1340 {
1341 struct nfsclient *clp, *nclp;
1342 struct nfsstate *stp, *nstp;
1343 int got_ref, i;
1344
1345 /*
1346 * Make sure nfsboottime is set. This is used by V3 as well
1347 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1348 * only used by the V4 server for leases.
1349 */
1350 if (nfsboottime.tv_sec == 0)
1351 NFSSETBOOTTIME(nfsboottime);
1352
1353 /*
1354 * If server hasn't started yet, just return.
1355 */
1356 NFSLOCKSTATE();
1357 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce == 0) {
1358 NFSUNLOCKSTATE();
1359 return;
1360 }
1361 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) {
1362 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags &
1363 NFSNSF_GRACEOVER) &&
1364 NFSD_MONOSEC > NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
1365 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1366 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1367 NFSUNLOCKSTATE();
1368 return;
1369 }
1370
1371 /*
1372 * Try and get a reference count on the nfsv4rootfs_lock so that
1373 * no nfsd thread can acquire an exclusive lock on it before this
1374 * call is done. If it is already exclusively locked, just return.
1375 */
1376 NFSLOCKV4ROOTMUTEX();
1377 got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1378 NFSUNLOCKV4ROOTMUTEX();
1379 if (got_ref == 0) {
1380 NFSUNLOCKSTATE();
1381 return;
1382 }
1383
1384 /*
1385 * For each client...
1386 */
1387 for (i = 0; i < nfsrv_clienthashsize; i++) {
1388 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1389 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i])) {
1390 nclp = LIST_NEXT(clp, lc_hash);
1391 if (!(clp->lc_flags & LCL_EXPIREIT)) {
1392 if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1393 && ((LIST_EMPTY(&clp->lc_deleg)
1394 && LIST_EMPTY(&clp->lc_open)) ||
1395 nfsrv_clients > nfsrv_clienthighwater)) ||
1396 (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1397 (clp->lc_expiry < NFSD_MONOSEC &&
1398 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1399 /*
1400 * Lease has expired several nfsrv_lease times ago:
1401 * PLUS
1402 * - no state is associated with it
1403 * OR
1404 * - above high water mark for number of clients
1405 * (nfsrv_clienthighwater should be large enough
1406 * that this only occurs when clients fail to
1407 * use the same nfs_client_id4.id. Maybe somewhat
1408 * higher that the maximum number of clients that
1409 * will mount this server?)
1410 * OR
1411 * Lease has expired a very long time ago
1412 * OR
1413 * Lease has expired PLUS the number of opens + locks
1414 * has exceeded 90% of capacity
1415 *
1416 * --> Mark for expiry. The actual expiry will be done
1417 * by an nfsd sometime soon.
1418 */
1419 clp->lc_flags |= LCL_EXPIREIT;
1420 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1421 (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1422 } else {
1423 /*
1424 * If there are no opens, increment no open tick cnt
1425 * If time exceeds NFSNOOPEN, mark it to be thrown away
1426 * otherwise, if there is an open, reset no open time
1427 * Hopefully, this will avoid excessive re-creation
1428 * of open owners and subsequent open confirms.
1429 */
1430 stp = LIST_FIRST(&clp->lc_open);
1431 while (stp != LIST_END(&clp->lc_open)) {
1432 nstp = LIST_NEXT(stp, ls_list);
1433 if (LIST_EMPTY(&stp->ls_open)) {
1434 stp->ls_noopens++;
1435 if (stp->ls_noopens > NFSNOOPEN ||
1436 (nfsrv_openpluslock * 2) >
1437 nfsrv_v4statelimit)
1438 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1439 NFSNSF_NOOPENS;
1440 } else {
1441 stp->ls_noopens = 0;
1442 }
1443 stp = nstp;
1444 }
1445 }
1446 }
1447 clp = nclp;
1448 }
1449 }
1450 NFSUNLOCKSTATE();
1451 NFSLOCKV4ROOTMUTEX();
1452 nfsv4_relref(&nfsv4rootfs_lock);
1453 NFSUNLOCKV4ROOTMUTEX();
1454 }
1455
1456 /*
1457 * The following set of functions free up the various data structures.
1458 */
1459 /*
1460 * Clear out all open/lock state related to this nfsclient.
1461 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1462 * there are no other active nfsd threads.
1463 */
1464 void
nfsrv_cleanclient(struct nfsclient * clp,NFSPROC_T * p,bool locked,SVCXPRT ** old_xprtp)1465 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p, bool locked,
1466 SVCXPRT **old_xprtp)
1467 {
1468 struct nfsstate *stp, *nstp;
1469 struct nfsdsession *sep, *nsep;
1470
1471 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
1472 if (locked)
1473 nfsrv_freeopenowner(stp, 0, p);
1474 else
1475 nfsrv_freeopenowner(stp, 1, p);
1476 }
1477 if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1478 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1479 (void)nfsrv_freesession(NULL, sep, NULL, locked,
1480 old_xprtp);
1481 }
1482
1483 /*
1484 * Free a client that has been cleaned. It should also already have been
1485 * removed from the lists.
1486 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1487 * softclock interrupts are enabled.)
1488 */
1489 void
nfsrv_zapclient(struct nfsclient * clp,NFSPROC_T * p)1490 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1491 {
1492
1493 #ifdef notyet
1494 if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1495 (LCL_GSS | LCL_CALLBACKSON) &&
1496 (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1497 clp->lc_handlelen > 0) {
1498 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1499 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1500 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1501 NULL, 0, NULL, NULL, NULL, 0, p);
1502 }
1503 #endif
1504 newnfs_disconnect(NULL, &clp->lc_req);
1505 free(clp->lc_req.nr_nam, M_SONAME);
1506 NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1507 free(clp->lc_stateid, M_NFSDCLIENT);
1508 free(clp, M_NFSDCLIENT);
1509 NFSLOCKSTATE();
1510 NFSD_VNET(nfsstatsv1_p)->srvclients--;
1511 nfsrv_openpluslock--;
1512 nfsrv_clients--;
1513 NFSUNLOCKSTATE();
1514 }
1515
1516 /*
1517 * Free a list of delegation state structures.
1518 * (This function will also free all nfslockfile structures that no
1519 * longer have associated state.)
1520 */
1521 void
nfsrv_freedeleglist(struct nfsstatehead * sthp)1522 nfsrv_freedeleglist(struct nfsstatehead *sthp)
1523 {
1524 struct nfsstate *stp, *nstp;
1525
1526 LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1527 nfsrv_freedeleg(stp);
1528 }
1529 LIST_INIT(sthp);
1530 }
1531
1532 /*
1533 * Free up a delegation.
1534 */
1535 static void
nfsrv_freedeleg(struct nfsstate * stp)1536 nfsrv_freedeleg(struct nfsstate *stp)
1537 {
1538 struct nfslockfile *lfp;
1539
1540 LIST_REMOVE(stp, ls_hash);
1541 LIST_REMOVE(stp, ls_list);
1542 LIST_REMOVE(stp, ls_file);
1543 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1544 nfsrv_writedelegcnt--;
1545 lfp = stp->ls_lfp;
1546 if (LIST_EMPTY(&lfp->lf_open) &&
1547 LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1548 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1549 lfp->lf_usecount == 0 &&
1550 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1551 nfsrv_freenfslockfile(lfp);
1552 free(stp, M_NFSDSTATE);
1553 NFSD_VNET(nfsstatsv1_p)->srvdelegates--;
1554 nfsrv_openpluslock--;
1555 nfsrv_delegatecnt--;
1556 }
1557
1558 /*
1559 * This function frees an open owner and all associated opens.
1560 */
1561 static void
nfsrv_freeopenowner(struct nfsstate * stp,int cansleep,NFSPROC_T * p)1562 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1563 {
1564 struct nfsstate *nstp, *tstp;
1565
1566 LIST_REMOVE(stp, ls_list);
1567 /*
1568 * Now, free all associated opens.
1569 */
1570 nstp = LIST_FIRST(&stp->ls_open);
1571 while (nstp != LIST_END(&stp->ls_open)) {
1572 tstp = nstp;
1573 nstp = LIST_NEXT(nstp, ls_list);
1574 nfsrv_freeopen(tstp, NULL, cansleep, p);
1575 }
1576 if (stp->ls_op)
1577 nfsrvd_derefcache(stp->ls_op);
1578 free(stp, M_NFSDSTATE);
1579 NFSD_VNET(nfsstatsv1_p)->srvopenowners--;
1580 nfsrv_openpluslock--;
1581 }
1582
1583 /*
1584 * This function frees an open (nfsstate open structure) with all associated
1585 * lock_owners and locks. It also frees the nfslockfile structure iff there
1586 * are no other opens on the file.
1587 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1588 */
1589 static void
nfsrv_freeopen(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1590 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1591 {
1592 struct nfsstate *nstp, *tstp;
1593 struct nfslockfile *lfp;
1594
1595 LIST_REMOVE(stp, ls_hash);
1596 LIST_REMOVE(stp, ls_list);
1597 LIST_REMOVE(stp, ls_file);
1598
1599 lfp = stp->ls_lfp;
1600 /*
1601 * Now, free all lockowners associated with this open.
1602 * Note that, if vp != NULL, nfsrv_freelockowner() will
1603 * not call nfsrv_freeallnfslocks(), so it needs to be called, below.
1604 */
1605 LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1606 nfsrv_freelockowner(tstp, vp, cansleep, p);
1607
1608 if (vp != NULL) {
1609 KASSERT(cansleep != 0, ("nfsrv_freeopen: cansleep == 0"));
1610 mtx_assert(NFSSTATEMUTEXPTR, MA_OWNED);
1611 /*
1612 * Only called with vp != NULL for Close when
1613 * vfs.nfsd.enable_locallocks != 0.
1614 * Lock the lfp so that it will not go away and do the
1615 * nfsrv_freeallnfslocks() call that was not done by
1616 * nfsrv_freelockowner().
1617 */
1618 nfsrv_locklf(lfp);
1619 NFSUNLOCKSTATE();
1620 NFSVOPUNLOCK(vp);
1621 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1622 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1623 NFSLOCKSTATE();
1624 nfsrv_unlocklf(lfp);
1625 }
1626
1627 /*
1628 * The nfslockfile is freed here if there are no locks
1629 * associated with the open.
1630 * If there are locks associated with the open, the
1631 * nfslockfile structure can be freed via nfsrv_freelockowner().
1632 */
1633 if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1634 LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1635 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1636 lfp->lf_usecount == 0 &&
1637 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1638 nfsrv_freenfslockfile(lfp);
1639 free(stp, M_NFSDSTATE);
1640 NFSD_VNET(nfsstatsv1_p)->srvopens--;
1641 nfsrv_openpluslock--;
1642 }
1643
1644 /*
1645 * Frees a lockowner and all associated locks.
1646 */
1647 static void
nfsrv_freelockowner(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1648 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1649 NFSPROC_T *p)
1650 {
1651
1652 LIST_REMOVE(stp, ls_hash);
1653 LIST_REMOVE(stp, ls_list);
1654 if (vp == NULL)
1655 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1656 if (stp->ls_op)
1657 nfsrvd_derefcache(stp->ls_op);
1658 free(stp, M_NFSDSTATE);
1659 NFSD_VNET(nfsstatsv1_p)->srvlockowners--;
1660 nfsrv_openpluslock--;
1661 }
1662
1663 /*
1664 * Free all the nfs locks on a lockowner.
1665 */
1666 static void
nfsrv_freeallnfslocks(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1667 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1668 NFSPROC_T *p)
1669 {
1670 struct nfslock *lop, *nlop;
1671 struct nfsrollback *rlp, *nrlp;
1672 struct nfslockfile *lfp = NULL;
1673 int gottvp = 0;
1674 vnode_t tvp = NULL;
1675 uint64_t first, end;
1676
1677 if (vp != NULL)
1678 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1679 lop = LIST_FIRST(&stp->ls_lock);
1680 while (lop != LIST_END(&stp->ls_lock)) {
1681 nlop = LIST_NEXT(lop, lo_lckowner);
1682 /*
1683 * Since all locks should be for the same file, lfp should
1684 * not change.
1685 */
1686 if (lfp == NULL)
1687 lfp = lop->lo_lfp;
1688 else if (lfp != lop->lo_lfp)
1689 panic("allnfslocks");
1690 /*
1691 * If vp is NULL and cansleep != 0, a vnode must be acquired
1692 * from the file handle. This only occurs when called from
1693 * nfsrv_cleanclient().
1694 */
1695 if (gottvp == 0) {
1696 if (nfsrv_dolocallocks == 0)
1697 tvp = NULL;
1698 else if (vp == NULL && cansleep != 0) {
1699 tvp = nfsvno_getvp(&lfp->lf_fh);
1700 if (tvp != NULL)
1701 NFSVOPUNLOCK(tvp);
1702 } else
1703 tvp = vp;
1704 gottvp = 1;
1705 }
1706
1707 if (tvp != NULL) {
1708 if (cansleep == 0)
1709 panic("allnfs2");
1710 first = lop->lo_first;
1711 end = lop->lo_end;
1712 nfsrv_freenfslock(lop);
1713 nfsrv_localunlock(tvp, lfp, first, end, p);
1714 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1715 nrlp)
1716 free(rlp, M_NFSDROLLBACK);
1717 LIST_INIT(&lfp->lf_rollback);
1718 } else
1719 nfsrv_freenfslock(lop);
1720 lop = nlop;
1721 }
1722 if (vp == NULL && tvp != NULL)
1723 vrele(tvp);
1724 }
1725
1726 /*
1727 * Free an nfslock structure.
1728 */
1729 static void
nfsrv_freenfslock(struct nfslock * lop)1730 nfsrv_freenfslock(struct nfslock *lop)
1731 {
1732
1733 if (lop->lo_lckfile.le_prev != NULL) {
1734 LIST_REMOVE(lop, lo_lckfile);
1735 NFSD_VNET(nfsstatsv1_p)->srvlocks--;
1736 nfsrv_openpluslock--;
1737 }
1738 LIST_REMOVE(lop, lo_lckowner);
1739 free(lop, M_NFSDLOCK);
1740 }
1741
1742 /*
1743 * This function frees an nfslockfile structure.
1744 */
1745 static void
nfsrv_freenfslockfile(struct nfslockfile * lfp)1746 nfsrv_freenfslockfile(struct nfslockfile *lfp)
1747 {
1748
1749 LIST_REMOVE(lfp, lf_hash);
1750 free(lfp, M_NFSDLOCKFILE);
1751 }
1752
1753 /*
1754 * This function looks up an nfsstate structure via stateid.
1755 */
1756 static int
nfsrv_getstate(struct nfsclient * clp,nfsv4stateid_t * stateidp,__unused u_int32_t flags,struct nfsstate ** stpp)1757 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1758 struct nfsstate **stpp)
1759 {
1760 struct nfsstate *stp;
1761 struct nfsstatehead *hp;
1762 int error = 0;
1763
1764 *stpp = NULL;
1765 hp = NFSSTATEHASH(clp, *stateidp);
1766 LIST_FOREACH(stp, hp, ls_hash) {
1767 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1768 NFSX_STATEIDOTHER))
1769 break;
1770 }
1771
1772 /*
1773 * If no state id in list, return NFSERR_BADSTATEID.
1774 */
1775 if (stp == LIST_END(hp)) {
1776 error = NFSERR_BADSTATEID;
1777 goto out;
1778 }
1779 *stpp = stp;
1780
1781 out:
1782 NFSEXITCODE(error);
1783 return (error);
1784 }
1785
1786 /*
1787 * This function gets an nfsstate structure via owner string.
1788 */
1789 static void
nfsrv_getowner(struct nfsstatehead * hp,struct nfsstate * new_stp,struct nfsstate ** stpp)1790 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1791 struct nfsstate **stpp)
1792 {
1793 struct nfsstate *stp;
1794
1795 *stpp = NULL;
1796 LIST_FOREACH(stp, hp, ls_list) {
1797 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1798 !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1799 *stpp = stp;
1800 return;
1801 }
1802 }
1803 }
1804
1805 /*
1806 * Lock control function called to update lock status.
1807 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1808 * that one isn't to be created and an NFSERR_xxx for other errors.
1809 * The structures new_stp and new_lop are passed in as pointers that should
1810 * be set to NULL if the structure is used and shouldn't be free'd.
1811 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1812 * never used and can safely be allocated on the stack. For all other
1813 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1814 * in case they are used.
1815 */
1816 int
nfsrv_lockctrl(vnode_t vp,struct nfsstate ** new_stpp,struct nfslock ** new_lopp,struct nfslockconflict * cfp,nfsquad_t clientid,nfsv4stateid_t * stateidp,__unused struct nfsexstuff * exp,struct nfsrv_descript * nd,NFSPROC_T * p)1817 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1818 struct nfslock **new_lopp, struct nfslockconflict *cfp,
1819 nfsquad_t clientid, nfsv4stateid_t *stateidp,
1820 __unused struct nfsexstuff *exp,
1821 struct nfsrv_descript *nd, NFSPROC_T *p)
1822 {
1823 struct nfslock *lop;
1824 struct nfsstate *new_stp = *new_stpp;
1825 struct nfslock *new_lop = *new_lopp;
1826 struct nfsstate *tstp, *mystp, *nstp;
1827 int specialid = 0;
1828 struct nfslockfile *lfp;
1829 struct nfslock *other_lop = NULL;
1830 struct nfsstate *stp, *lckstp = NULL;
1831 struct nfsclient *clp = NULL;
1832 u_int32_t bits;
1833 int error = 0, haslock = 0, ret, reterr;
1834 int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1835 fhandle_t nfh;
1836 uint64_t first, end;
1837 uint32_t lock_flags;
1838
1839 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1840 /*
1841 * Note the special cases of "all 1s" or "all 0s" stateids and
1842 * let reads with all 1s go ahead.
1843 */
1844 if (new_stp->ls_stateid.seqid == 0x0 &&
1845 new_stp->ls_stateid.other[0] == 0x0 &&
1846 new_stp->ls_stateid.other[1] == 0x0 &&
1847 new_stp->ls_stateid.other[2] == 0x0)
1848 specialid = 1;
1849 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1850 new_stp->ls_stateid.other[0] == 0xffffffff &&
1851 new_stp->ls_stateid.other[1] == 0xffffffff &&
1852 new_stp->ls_stateid.other[2] == 0xffffffff)
1853 specialid = 2;
1854 }
1855
1856 /*
1857 * Check for restart conditions (client and server).
1858 */
1859 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1860 &new_stp->ls_stateid, specialid);
1861 if (error)
1862 goto out;
1863
1864 /*
1865 * Check for state resource limit exceeded.
1866 */
1867 if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1868 nfsrv_openpluslock > nfsrv_v4statelimit) {
1869 error = NFSERR_RESOURCE;
1870 goto out;
1871 }
1872
1873 /*
1874 * For the lock case, get another nfslock structure,
1875 * just in case we need it.
1876 * Malloc now, before we start sifting through the linked lists,
1877 * in case we have to wait for memory.
1878 */
1879 tryagain:
1880 if (new_stp->ls_flags & NFSLCK_LOCK)
1881 other_lop = malloc(sizeof (struct nfslock),
1882 M_NFSDLOCK, M_WAITOK);
1883 filestruct_locked = 0;
1884 reterr = 0;
1885 lfp = NULL;
1886
1887 /*
1888 * Get the lockfile structure for CFH now, so we can do a sanity
1889 * check against the stateid, before incrementing the seqid#, since
1890 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1891 * shouldn't be incremented for this case.
1892 * If nfsrv_getlockfile() returns -1, it means "not found", which
1893 * will be handled later.
1894 * If we are doing Lock/LockU and local locking is enabled, sleep
1895 * lock the nfslockfile structure.
1896 */
1897 getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1898 NFSLOCKSTATE();
1899 if (getlckret == 0) {
1900 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1901 nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1902 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1903 &lfp, &nfh, 1);
1904 if (getlckret == 0)
1905 filestruct_locked = 1;
1906 } else
1907 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1908 &lfp, &nfh, 0);
1909 }
1910 if (getlckret != 0 && getlckret != -1)
1911 reterr = getlckret;
1912
1913 if (filestruct_locked != 0) {
1914 LIST_INIT(&lfp->lf_rollback);
1915 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1916 /*
1917 * For local locking, do the advisory locking now, so
1918 * that any conflict can be detected. A failure later
1919 * can be rolled back locally. If an error is returned,
1920 * struct nfslockfile has been unlocked and any local
1921 * locking rolled back.
1922 */
1923 NFSUNLOCKSTATE();
1924 if (vnode_unlocked == 0) {
1925 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1926 vnode_unlocked = 1;
1927 NFSVOPUNLOCK(vp);
1928 }
1929 reterr = nfsrv_locallock(vp, lfp,
1930 (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1931 new_lop->lo_first, new_lop->lo_end, cfp, p);
1932 NFSLOCKSTATE();
1933 }
1934 }
1935
1936 if (specialid == 0) {
1937 if (new_stp->ls_flags & NFSLCK_TEST) {
1938 /*
1939 * RFC 3530 does not list LockT as an op that renews a
1940 * lease, but the consensus seems to be that it is ok
1941 * for a server to do so.
1942 */
1943 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1944 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1945
1946 /*
1947 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1948 * error returns for LockT, just go ahead and test for a lock,
1949 * since there are no locks for this client, but other locks
1950 * can conflict. (ie. same client will always be false)
1951 */
1952 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1953 error = 0;
1954 lckstp = new_stp;
1955 } else {
1956 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1957 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1958 if (error == 0)
1959 /*
1960 * Look up the stateid
1961 */
1962 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1963 new_stp->ls_flags, &stp);
1964 /*
1965 * do some sanity checks for an unconfirmed open or a
1966 * stateid that refers to the wrong file, for an open stateid
1967 */
1968 if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1969 ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1970 (getlckret == 0 && stp->ls_lfp != lfp))){
1971 /*
1972 * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1973 * The only exception is using SETATTR with SIZE.
1974 * */
1975 if ((new_stp->ls_flags &
1976 (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1977 error = NFSERR_BADSTATEID;
1978 }
1979
1980 if (error == 0 &&
1981 (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1982 getlckret == 0 && stp->ls_lfp != lfp)
1983 error = NFSERR_BADSTATEID;
1984
1985 /*
1986 * If the lockowner stateid doesn't refer to the same file,
1987 * I believe that is considered ok, since some clients will
1988 * only create a single lockowner and use that for all locks
1989 * on all files.
1990 * For now, log it as a diagnostic, instead of considering it
1991 * a BadStateid.
1992 */
1993 if (error == 0 && (stp->ls_flags &
1994 (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1995 getlckret == 0 && stp->ls_lfp != lfp) {
1996 #ifdef DIAGNOSTIC
1997 printf("Got a lock statid for different file open\n");
1998 #endif
1999 /*
2000 error = NFSERR_BADSTATEID;
2001 */
2002 }
2003
2004 if (error == 0) {
2005 if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
2006 /*
2007 * If haslock set, we've already checked the seqid.
2008 */
2009 if (!haslock) {
2010 if (stp->ls_flags & NFSLCK_OPEN)
2011 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2012 stp->ls_openowner, new_stp->ls_op);
2013 else
2014 error = NFSERR_BADSTATEID;
2015 }
2016 if (!error)
2017 nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
2018 if (lckstp) {
2019 /*
2020 * For NFSv4.1 and NFSv4.2 allow an
2021 * open_to_lock_owner when the lock_owner already
2022 * exists. Just clear NFSLCK_OPENTOLOCK so that
2023 * a new lock_owner will not be created.
2024 * RFC7530 states that the error for NFSv4.0
2025 * is NFS4ERR_BAD_SEQID.
2026 */
2027 if ((nd->nd_flag & ND_NFSV41) != 0)
2028 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
2029 else
2030 error = NFSERR_BADSEQID;
2031 } else
2032 lckstp = new_stp;
2033 } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
2034 /*
2035 * If haslock set, ditto above.
2036 */
2037 if (!haslock) {
2038 if (stp->ls_flags & NFSLCK_OPEN)
2039 error = NFSERR_BADSTATEID;
2040 else
2041 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2042 stp, new_stp->ls_op);
2043 }
2044 lckstp = stp;
2045 } else {
2046 lckstp = stp;
2047 }
2048 }
2049 /*
2050 * If the seqid part of the stateid isn't the same, return
2051 * NFSERR_OLDSTATEID for cases other than I/O Ops.
2052 * For I/O Ops, only return NFSERR_OLDSTATEID if
2053 * nfsrv_returnoldstateid is set. (The consensus on the email
2054 * list was that most clients would prefer to not receive
2055 * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
2056 * is what will happen, so I use the nfsrv_returnoldstateid to
2057 * allow for either server configuration.)
2058 */
2059 if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
2060 (((nd->nd_flag & ND_NFSV41) == 0 &&
2061 (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2062 nfsrv_returnoldstateid)) ||
2063 ((nd->nd_flag & ND_NFSV41) != 0 &&
2064 new_stp->ls_stateid.seqid != 0)))
2065 error = NFSERR_OLDSTATEID;
2066 }
2067 }
2068
2069 /*
2070 * Now we can check for grace.
2071 */
2072 if (!error)
2073 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2074 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2075 nfsrv_checkstable(clp))
2076 error = NFSERR_NOGRACE;
2077 /*
2078 * If we successfully Reclaimed state, note that.
2079 */
2080 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
2081 nfsrv_markstable(clp);
2082
2083 /*
2084 * At this point, either error == NFSERR_BADSTATEID or the
2085 * seqid# has been updated, so we can return any error.
2086 * If error == 0, there may be an error in:
2087 * nd_repstat - Set by the calling function.
2088 * reterr - Set above, if getting the nfslockfile structure
2089 * or acquiring the local lock failed.
2090 * (If both of these are set, nd_repstat should probably be
2091 * returned, since that error was detected before this
2092 * function call.)
2093 */
2094 if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
2095 if (error == 0) {
2096 if (nd->nd_repstat != 0)
2097 error = nd->nd_repstat;
2098 else
2099 error = reterr;
2100 }
2101 if (filestruct_locked != 0) {
2102 /* Roll back local locks. */
2103 NFSUNLOCKSTATE();
2104 if (vnode_unlocked == 0) {
2105 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
2106 vnode_unlocked = 1;
2107 NFSVOPUNLOCK(vp);
2108 }
2109 nfsrv_locallock_rollback(vp, lfp, p);
2110 NFSLOCKSTATE();
2111 nfsrv_unlocklf(lfp);
2112 }
2113 NFSUNLOCKSTATE();
2114 goto out;
2115 }
2116
2117 /*
2118 * Check the nfsrv_getlockfile return.
2119 * Returned -1 if no structure found.
2120 */
2121 if (getlckret == -1) {
2122 error = NFSERR_EXPIRED;
2123 /*
2124 * Called from lockt, so no lock is OK.
2125 */
2126 if (new_stp->ls_flags & NFSLCK_TEST) {
2127 error = 0;
2128 } else if (new_stp->ls_flags &
2129 (NFSLCK_CHECK | NFSLCK_SETATTR)) {
2130 /*
2131 * Called to check for a lock, OK if the stateid is all
2132 * 1s or all 0s, but there should be an nfsstate
2133 * otherwise.
2134 * (ie. If there is no open, I'll assume no share
2135 * deny bits.)
2136 */
2137 if (specialid)
2138 error = 0;
2139 else
2140 error = NFSERR_BADSTATEID;
2141 }
2142 NFSUNLOCKSTATE();
2143 goto out;
2144 }
2145
2146 /*
2147 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
2148 * For NFSLCK_CHECK, allow a read if write access is granted,
2149 * but check for a deny. For NFSLCK_LOCK, require correct access,
2150 * which implies a conflicting deny can't exist.
2151 */
2152 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
2153 /*
2154 * Four kinds of state id:
2155 * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
2156 * - stateid for an open
2157 * - stateid for a delegation
2158 * - stateid for a lock owner
2159 */
2160 if (!specialid) {
2161 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2162 delegation = 1;
2163 mystp = stp;
2164 nfsrv_delaydelegtimeout(stp);
2165 } else if (stp->ls_flags & NFSLCK_OPEN) {
2166 mystp = stp;
2167 } else {
2168 mystp = stp->ls_openstp;
2169 }
2170 /*
2171 * If locking or checking, require correct access
2172 * bit set.
2173 */
2174 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
2175 !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
2176 mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
2177 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
2178 (NFSLCK_CHECK | NFSLCK_READACCESS) &&
2179 !(mystp->ls_flags & NFSLCK_READACCESS) &&
2180 nfsrv_allowreadforwriteopen == 0) ||
2181 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
2182 (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
2183 !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
2184 if (filestruct_locked != 0) {
2185 /* Roll back local locks. */
2186 NFSUNLOCKSTATE();
2187 if (vnode_unlocked == 0) {
2188 ASSERT_VOP_ELOCKED(vp,
2189 "nfsrv_lockctrl3");
2190 vnode_unlocked = 1;
2191 NFSVOPUNLOCK(vp);
2192 }
2193 nfsrv_locallock_rollback(vp, lfp, p);
2194 NFSLOCKSTATE();
2195 nfsrv_unlocklf(lfp);
2196 }
2197 NFSUNLOCKSTATE();
2198 error = NFSERR_OPENMODE;
2199 goto out;
2200 }
2201 } else
2202 mystp = NULL;
2203 if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
2204 /*
2205 * Check for a conflicting deny bit.
2206 */
2207 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
2208 if (tstp != mystp) {
2209 bits = tstp->ls_flags;
2210 bits >>= NFSLCK_SHIFT;
2211 if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
2212 KASSERT(vnode_unlocked == 0,
2213 ("nfsrv_lockctrl: vnode unlocked1"));
2214 ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
2215 vp, p);
2216 if (ret == 1) {
2217 /*
2218 * nfsrv_clientconflict unlocks state
2219 * when it returns non-zero.
2220 */
2221 lckstp = NULL;
2222 goto tryagain;
2223 }
2224 if (ret == 0)
2225 NFSUNLOCKSTATE();
2226 if (ret == 2)
2227 error = NFSERR_PERM;
2228 else
2229 error = NFSERR_OPENMODE;
2230 goto out;
2231 }
2232 }
2233 }
2234
2235 /* We're outta here */
2236 NFSUNLOCKSTATE();
2237 goto out;
2238 }
2239 }
2240
2241 /*
2242 * For setattr, just get rid of all the Delegations for other clients.
2243 */
2244 if (new_stp->ls_flags & NFSLCK_SETATTR) {
2245 KASSERT(vnode_unlocked == 0,
2246 ("nfsrv_lockctrl: vnode unlocked2"));
2247 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2248 if (ret) {
2249 /*
2250 * nfsrv_cleandeleg() unlocks state when it
2251 * returns non-zero.
2252 */
2253 if (ret == -1) {
2254 lckstp = NULL;
2255 goto tryagain;
2256 }
2257 error = ret;
2258 goto out;
2259 }
2260 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2261 (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2262 LIST_EMPTY(&lfp->lf_deleg))) {
2263 NFSUNLOCKSTATE();
2264 goto out;
2265 }
2266 }
2267
2268 /*
2269 * Check for a conflicting delegation. If one is found, call
2270 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2271 * been set yet, it will get the lock. Otherwise, it will recall
2272 * the delegation. Then, we try try again...
2273 * I currently believe the conflict algorithm to be:
2274 * For Lock Ops (Lock/LockT/LockU)
2275 * - there is a conflict iff a different client has a write delegation
2276 * For Reading (Read Op)
2277 * - there is a conflict iff a different client has a write delegation
2278 * (the specialids are always a different client)
2279 * For Writing (Write/Setattr of size)
2280 * - there is a conflict if a different client has any delegation
2281 * - there is a conflict if the same client has a read delegation
2282 * (I don't understand why this isn't allowed, but that seems to be
2283 * the current consensus?)
2284 */
2285 tstp = LIST_FIRST(&lfp->lf_deleg);
2286 while (tstp != LIST_END(&lfp->lf_deleg)) {
2287 nstp = LIST_NEXT(tstp, ls_file);
2288 if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2289 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2290 (new_lop->lo_flags & NFSLCK_READ))) &&
2291 clp != tstp->ls_clp &&
2292 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2293 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2294 (new_lop->lo_flags & NFSLCK_WRITE) &&
2295 (clp != tstp->ls_clp ||
2296 (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2297 ret = 0;
2298 if (filestruct_locked != 0) {
2299 /* Roll back local locks. */
2300 NFSUNLOCKSTATE();
2301 if (vnode_unlocked == 0) {
2302 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2303 NFSVOPUNLOCK(vp);
2304 }
2305 nfsrv_locallock_rollback(vp, lfp, p);
2306 NFSLOCKSTATE();
2307 nfsrv_unlocklf(lfp);
2308 NFSUNLOCKSTATE();
2309 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2310 vnode_unlocked = 0;
2311 if (VN_IS_DOOMED(vp))
2312 ret = NFSERR_SERVERFAULT;
2313 NFSLOCKSTATE();
2314 }
2315 if (ret == 0)
2316 ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2317 if (ret) {
2318 /*
2319 * nfsrv_delegconflict unlocks state when it
2320 * returns non-zero, which it always does.
2321 */
2322 if (other_lop) {
2323 free(other_lop, M_NFSDLOCK);
2324 other_lop = NULL;
2325 }
2326 if (ret == -1) {
2327 lckstp = NULL;
2328 goto tryagain;
2329 }
2330 error = ret;
2331 goto out;
2332 }
2333 /* Never gets here. */
2334 }
2335 tstp = nstp;
2336 }
2337
2338 /*
2339 * Handle the unlock case by calling nfsrv_updatelock().
2340 * (Should I have done some access checking above for unlock? For now,
2341 * just let it happen.)
2342 */
2343 if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2344 first = new_lop->lo_first;
2345 end = new_lop->lo_end;
2346 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2347 stateidp->seqid = ++(stp->ls_stateid.seqid);
2348 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2349 stateidp->seqid = stp->ls_stateid.seqid = 1;
2350 stateidp->other[0] = stp->ls_stateid.other[0];
2351 stateidp->other[1] = stp->ls_stateid.other[1];
2352 stateidp->other[2] = stp->ls_stateid.other[2];
2353 if (filestruct_locked != 0) {
2354 NFSUNLOCKSTATE();
2355 if (vnode_unlocked == 0) {
2356 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2357 vnode_unlocked = 1;
2358 NFSVOPUNLOCK(vp);
2359 }
2360 /* Update the local locks. */
2361 nfsrv_localunlock(vp, lfp, first, end, p);
2362 NFSLOCKSTATE();
2363 nfsrv_unlocklf(lfp);
2364 }
2365 NFSUNLOCKSTATE();
2366 goto out;
2367 }
2368
2369 /*
2370 * Search for a conflicting lock. A lock conflicts if:
2371 * - the lock range overlaps and
2372 * - at least one lock is a write lock and
2373 * - it is not owned by the same lock owner
2374 */
2375 if (!delegation) {
2376 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2377 if (new_lop->lo_end > lop->lo_first &&
2378 new_lop->lo_first < lop->lo_end &&
2379 (new_lop->lo_flags == NFSLCK_WRITE ||
2380 lop->lo_flags == NFSLCK_WRITE) &&
2381 lckstp != lop->lo_stp &&
2382 (clp != lop->lo_stp->ls_clp ||
2383 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2384 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2385 lckstp->ls_ownerlen))) {
2386 if (other_lop) {
2387 free(other_lop, M_NFSDLOCK);
2388 other_lop = NULL;
2389 }
2390 if (vnode_unlocked != 0)
2391 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2392 NULL, p);
2393 else
2394 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2395 vp, p);
2396 if (ret == 1) {
2397 if (filestruct_locked != 0) {
2398 if (vnode_unlocked == 0) {
2399 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2400 NFSVOPUNLOCK(vp);
2401 }
2402 /* Roll back local locks. */
2403 nfsrv_locallock_rollback(vp, lfp, p);
2404 NFSLOCKSTATE();
2405 nfsrv_unlocklf(lfp);
2406 NFSUNLOCKSTATE();
2407 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2408 vnode_unlocked = 0;
2409 if (VN_IS_DOOMED(vp)) {
2410 error = NFSERR_SERVERFAULT;
2411 goto out;
2412 }
2413 }
2414 /*
2415 * nfsrv_clientconflict() unlocks state when it
2416 * returns non-zero.
2417 */
2418 lckstp = NULL;
2419 goto tryagain;
2420 }
2421 /*
2422 * Found a conflicting lock, so record the conflict and
2423 * return the error.
2424 */
2425 if (cfp != NULL && ret == 0) {
2426 cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2427 cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2428 cfp->cl_first = lop->lo_first;
2429 cfp->cl_end = lop->lo_end;
2430 cfp->cl_flags = lop->lo_flags;
2431 cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2432 NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2433 cfp->cl_ownerlen);
2434 }
2435 if (ret == 2)
2436 error = NFSERR_PERM;
2437 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2438 error = NFSERR_RECLAIMCONFLICT;
2439 else if (new_stp->ls_flags & NFSLCK_CHECK)
2440 error = NFSERR_LOCKED;
2441 else
2442 error = NFSERR_DENIED;
2443 if (filestruct_locked != 0 && ret == 0) {
2444 /* Roll back local locks. */
2445 NFSUNLOCKSTATE();
2446 if (vnode_unlocked == 0) {
2447 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2448 vnode_unlocked = 1;
2449 NFSVOPUNLOCK(vp);
2450 }
2451 nfsrv_locallock_rollback(vp, lfp, p);
2452 NFSLOCKSTATE();
2453 nfsrv_unlocklf(lfp);
2454 }
2455 if (ret == 0)
2456 NFSUNLOCKSTATE();
2457 goto out;
2458 }
2459 }
2460 }
2461
2462 /*
2463 * We only get here if there was no lock that conflicted.
2464 */
2465 if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2466 NFSUNLOCKSTATE();
2467 goto out;
2468 }
2469
2470 /*
2471 * We only get here when we are creating or modifying a lock.
2472 * There are two variants:
2473 * - exist_lock_owner where lock_owner exists
2474 * - open_to_lock_owner with new lock_owner
2475 */
2476 first = new_lop->lo_first;
2477 end = new_lop->lo_end;
2478 lock_flags = new_lop->lo_flags;
2479 if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2480 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2481 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2482 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2483 stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2484 stateidp->other[0] = lckstp->ls_stateid.other[0];
2485 stateidp->other[1] = lckstp->ls_stateid.other[1];
2486 stateidp->other[2] = lckstp->ls_stateid.other[2];
2487 } else {
2488 /*
2489 * The new open_to_lock_owner case.
2490 * Link the new nfsstate into the lists.
2491 */
2492 new_stp->ls_seq = new_stp->ls_opentolockseq;
2493 nfsrvd_refcache(new_stp->ls_op);
2494 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2495 stateidp->other[0] = new_stp->ls_stateid.other[0] =
2496 clp->lc_clientid.lval[0];
2497 stateidp->other[1] = new_stp->ls_stateid.other[1] =
2498 clp->lc_clientid.lval[1];
2499 stateidp->other[2] = new_stp->ls_stateid.other[2] =
2500 nfsrv_nextstateindex(clp);
2501 new_stp->ls_clp = clp;
2502 LIST_INIT(&new_stp->ls_lock);
2503 new_stp->ls_openstp = stp;
2504 new_stp->ls_lfp = lfp;
2505 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2506 lfp);
2507 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2508 new_stp, ls_hash);
2509 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2510 *new_lopp = NULL;
2511 *new_stpp = NULL;
2512 NFSD_VNET(nfsstatsv1_p)->srvlockowners++;
2513 nfsrv_openpluslock++;
2514 }
2515 if (filestruct_locked != 0) {
2516 NFSUNLOCKSTATE();
2517 nfsrv_locallock_commit(lfp, lock_flags, first, end);
2518 NFSLOCKSTATE();
2519 nfsrv_unlocklf(lfp);
2520 }
2521 NFSUNLOCKSTATE();
2522
2523 out:
2524 if (haslock) {
2525 NFSLOCKV4ROOTMUTEX();
2526 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2527 NFSUNLOCKV4ROOTMUTEX();
2528 }
2529 if (vnode_unlocked != 0) {
2530 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2531 if (error == 0 && VN_IS_DOOMED(vp))
2532 error = NFSERR_SERVERFAULT;
2533 }
2534 if (other_lop)
2535 free(other_lop, M_NFSDLOCK);
2536 NFSEXITCODE2(error, nd);
2537 return (error);
2538 }
2539
2540 /*
2541 * Check for state errors for Open.
2542 * repstat is passed back out as an error if more critical errors
2543 * are not detected.
2544 */
2545 int
nfsrv_opencheck(nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * new_stp,vnode_t vp,struct nfsrv_descript * nd,NFSPROC_T * p,int repstat)2546 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2547 struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2548 NFSPROC_T *p, int repstat)
2549 {
2550 struct nfsstate *stp, *nstp;
2551 struct nfsclient *clp;
2552 struct nfsstate *ownerstp;
2553 struct nfslockfile *lfp, *new_lfp;
2554 int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2555
2556 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2557 readonly = 1;
2558 /*
2559 * Check for restart conditions (client and server).
2560 */
2561 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2562 &new_stp->ls_stateid, 0);
2563 if (error)
2564 goto out;
2565
2566 /*
2567 * Check for state resource limit exceeded.
2568 * Technically this should be SMP protected, but the worst
2569 * case error is "out by one or two" on the count when it
2570 * returns NFSERR_RESOURCE and the limit is just a rather
2571 * arbitrary high water mark, so no harm is done.
2572 */
2573 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2574 error = NFSERR_RESOURCE;
2575 goto out;
2576 }
2577
2578 tryagain:
2579 new_lfp = malloc(sizeof (struct nfslockfile),
2580 M_NFSDLOCKFILE, M_WAITOK);
2581 if (vp)
2582 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2583 NULL, p);
2584 NFSLOCKSTATE();
2585 /*
2586 * Get the nfsclient structure.
2587 */
2588 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2589 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2590
2591 /*
2592 * Look up the open owner. See if it needs confirmation and
2593 * check the seq#, as required.
2594 */
2595 if (!error)
2596 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2597
2598 if (!error && ownerstp) {
2599 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2600 new_stp->ls_op);
2601 /*
2602 * If the OpenOwner hasn't been confirmed, assume the
2603 * old one was a replay and this one is ok.
2604 * See: RFC3530 Sec. 14.2.18.
2605 */
2606 if (error == NFSERR_BADSEQID &&
2607 (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2608 error = 0;
2609 }
2610
2611 /*
2612 * Check for grace.
2613 */
2614 if (!error)
2615 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2616 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2617 nfsrv_checkstable(clp))
2618 error = NFSERR_NOGRACE;
2619
2620 /*
2621 * If none of the above errors occurred, let repstat be
2622 * returned.
2623 */
2624 if (repstat && !error)
2625 error = repstat;
2626 if (error) {
2627 NFSUNLOCKSTATE();
2628 if (haslock) {
2629 NFSLOCKV4ROOTMUTEX();
2630 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2631 NFSUNLOCKV4ROOTMUTEX();
2632 }
2633 free(new_lfp, M_NFSDLOCKFILE);
2634 goto out;
2635 }
2636
2637 /*
2638 * If vp == NULL, the file doesn't exist yet, so return ok.
2639 * (This always happens on the first pass, so haslock must be 0.)
2640 */
2641 if (vp == NULL) {
2642 NFSUNLOCKSTATE();
2643 free(new_lfp, M_NFSDLOCKFILE);
2644 goto out;
2645 }
2646
2647 /*
2648 * Get the structure for the underlying file.
2649 */
2650 if (getfhret)
2651 error = getfhret;
2652 else
2653 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2654 NULL, 0);
2655 if (new_lfp)
2656 free(new_lfp, M_NFSDLOCKFILE);
2657 if (error) {
2658 NFSUNLOCKSTATE();
2659 if (haslock) {
2660 NFSLOCKV4ROOTMUTEX();
2661 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2662 NFSUNLOCKV4ROOTMUTEX();
2663 }
2664 goto out;
2665 }
2666
2667 /*
2668 * Search for a conflicting open/share.
2669 */
2670 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2671 /*
2672 * For Delegate_Cur, search for the matching Delegation,
2673 * which indicates no conflict.
2674 * An old delegation should have been recovered by the
2675 * client doing a Claim_DELEGATE_Prev, so I won't let
2676 * it match and return NFSERR_EXPIRED. Should I let it
2677 * match?
2678 */
2679 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2680 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2681 (((nd->nd_flag & ND_NFSV41) != 0 &&
2682 stateidp->seqid == 0) ||
2683 stateidp->seqid == stp->ls_stateid.seqid) &&
2684 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2685 NFSX_STATEIDOTHER))
2686 break;
2687 }
2688 if (stp == LIST_END(&lfp->lf_deleg) ||
2689 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2690 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2691 NFSUNLOCKSTATE();
2692 if (haslock) {
2693 NFSLOCKV4ROOTMUTEX();
2694 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2695 NFSUNLOCKV4ROOTMUTEX();
2696 }
2697 error = NFSERR_EXPIRED;
2698 goto out;
2699 }
2700 }
2701
2702 /*
2703 * Check for access/deny bit conflicts. I check for the same
2704 * owner as well, in case the client didn't bother.
2705 */
2706 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2707 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2708 (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2709 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2710 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2711 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2712 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2713 if (ret == 1) {
2714 /*
2715 * nfsrv_clientconflict() unlocks
2716 * state when it returns non-zero.
2717 */
2718 goto tryagain;
2719 }
2720 if (ret == 2)
2721 error = NFSERR_PERM;
2722 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2723 error = NFSERR_RECLAIMCONFLICT;
2724 else
2725 error = NFSERR_SHAREDENIED;
2726 if (ret == 0)
2727 NFSUNLOCKSTATE();
2728 if (haslock) {
2729 NFSLOCKV4ROOTMUTEX();
2730 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2731 NFSUNLOCKV4ROOTMUTEX();
2732 }
2733 goto out;
2734 }
2735 }
2736
2737 /*
2738 * Check for a conflicting delegation. If one is found, call
2739 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2740 * been set yet, it will get the lock. Otherwise, it will recall
2741 * the delegation. Then, we try try again...
2742 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2743 * isn't a conflict.)
2744 * I currently believe the conflict algorithm to be:
2745 * For Open with Read Access and Deny None
2746 * - there is a conflict iff a different client has a write delegation
2747 * For Open with other Write Access or any Deny except None
2748 * - there is a conflict if a different client has any delegation
2749 * - there is a conflict if the same client has a read delegation
2750 * (The current consensus is that this last case should be
2751 * considered a conflict since the client with a read delegation
2752 * could have done an Open with ReadAccess and WriteDeny
2753 * locally and then not have checked for the WriteDeny.)
2754 * The exception is a NFSv4.1/4.2 client that has requested
2755 * an atomic upgrade to a write delegation.
2756 * Don't check for a Reclaim, since that will be dealt with
2757 * by nfsrv_openctrl().
2758 */
2759 if (!(new_stp->ls_flags &
2760 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2761 stp = LIST_FIRST(&lfp->lf_deleg);
2762 while (stp != LIST_END(&lfp->lf_deleg)) {
2763 nstp = LIST_NEXT(stp, ls_file);
2764 if ((readonly && stp->ls_clp != clp &&
2765 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
2766 (!readonly && (stp->ls_clp != clp ||
2767 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
2768 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
2769 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2770 if (ret) {
2771 /*
2772 * nfsrv_delegconflict() unlocks state
2773 * when it returns non-zero.
2774 */
2775 if (ret == -1)
2776 goto tryagain;
2777 error = ret;
2778 goto out;
2779 }
2780 }
2781 stp = nstp;
2782 }
2783 }
2784 NFSUNLOCKSTATE();
2785 if (haslock) {
2786 NFSLOCKV4ROOTMUTEX();
2787 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2788 NFSUNLOCKV4ROOTMUTEX();
2789 }
2790
2791 out:
2792 NFSEXITCODE2(error, nd);
2793 return (error);
2794 }
2795
2796 /*
2797 * Open control function to create/update open state for an open.
2798 */
2799 int
nfsrv_openctrl(struct nfsrv_descript * nd,vnode_t vp,struct nfsstate ** new_stpp,nfsquad_t clientid,nfsv4stateid_t * stateidp,nfsv4stateid_t * delegstateidp,u_int32_t * rflagsp,struct nfsexstuff * exp,NFSPROC_T * p,u_quad_t filerev)2800 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2801 struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2802 nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2803 NFSPROC_T *p, u_quad_t filerev)
2804 {
2805 struct nfsstate *new_stp = *new_stpp;
2806 struct nfsstate *stp, *nstp;
2807 struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2808 struct nfslockfile *lfp, *new_lfp;
2809 struct nfsclient *clp;
2810 int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2811 int readonly = 0, cbret = 1, getfhret = 0;
2812 int gotstate = 0, len = 0;
2813 u_char *clidp = NULL;
2814
2815 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2816 readonly = 1;
2817 /*
2818 * Check for restart conditions (client and server).
2819 * (Paranoia, should have been detected by nfsrv_opencheck().)
2820 * If an error does show up, return NFSERR_EXPIRED, since the
2821 * the seqid# has already been incremented.
2822 */
2823 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2824 &new_stp->ls_stateid, 0);
2825 if (error) {
2826 printf("Nfsd: openctrl unexpected restart err=%d\n",
2827 error);
2828 error = NFSERR_EXPIRED;
2829 goto out;
2830 }
2831
2832 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2833 tryagain:
2834 new_lfp = malloc(sizeof (struct nfslockfile),
2835 M_NFSDLOCKFILE, M_WAITOK);
2836 new_open = malloc(sizeof (struct nfsstate),
2837 M_NFSDSTATE, M_WAITOK);
2838 new_deleg = malloc(sizeof (struct nfsstate),
2839 M_NFSDSTATE, M_WAITOK);
2840 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2841 NULL, p);
2842 NFSLOCKSTATE();
2843 /*
2844 * Get the client structure. Since the linked lists could be changed
2845 * by other nfsd processes if this process does a tsleep(), one of
2846 * two things must be done.
2847 * 1 - don't tsleep()
2848 * or
2849 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2850 * before using the lists, since this lock stops the other
2851 * nfsd. This should only be used for rare cases, since it
2852 * essentially single threads the nfsd.
2853 * At this time, it is only done for cases where the stable
2854 * storage file must be written prior to completion of state
2855 * expiration.
2856 */
2857 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2858 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2859 if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2860 clp->lc_program) {
2861 /*
2862 * This happens on the first open for a client
2863 * that supports callbacks.
2864 */
2865 NFSUNLOCKSTATE();
2866 /*
2867 * Although nfsrv_docallback() will sleep, clp won't
2868 * go away, since they are only removed when the
2869 * nfsv4_lock() has blocked the nfsd threads. The
2870 * fields in clp can change, but having multiple
2871 * threads do this Null callback RPC should be
2872 * harmless.
2873 */
2874 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2875 NULL, 0, NULL, NULL, NULL, 0, p);
2876 NFSLOCKSTATE();
2877 clp->lc_flags &= ~LCL_NEEDSCBNULL;
2878 if (!cbret)
2879 clp->lc_flags |= LCL_CALLBACKSON;
2880 }
2881
2882 /*
2883 * Look up the open owner. See if it needs confirmation and
2884 * check the seq#, as required.
2885 */
2886 if (!error)
2887 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2888
2889 if (error) {
2890 NFSUNLOCKSTATE();
2891 printf("Nfsd: openctrl unexpected state err=%d\n",
2892 error);
2893 free(new_lfp, M_NFSDLOCKFILE);
2894 free(new_open, M_NFSDSTATE);
2895 free(new_deleg, M_NFSDSTATE);
2896 if (haslock) {
2897 NFSLOCKV4ROOTMUTEX();
2898 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2899 NFSUNLOCKV4ROOTMUTEX();
2900 }
2901 error = NFSERR_EXPIRED;
2902 goto out;
2903 }
2904
2905 if (new_stp->ls_flags & NFSLCK_RECLAIM)
2906 nfsrv_markstable(clp);
2907
2908 /*
2909 * Get the structure for the underlying file.
2910 */
2911 if (getfhret)
2912 error = getfhret;
2913 else
2914 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2915 NULL, 0);
2916 if (new_lfp)
2917 free(new_lfp, M_NFSDLOCKFILE);
2918 if (error) {
2919 NFSUNLOCKSTATE();
2920 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2921 error);
2922 free(new_open, M_NFSDSTATE);
2923 free(new_deleg, M_NFSDSTATE);
2924 if (haslock) {
2925 NFSLOCKV4ROOTMUTEX();
2926 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2927 NFSUNLOCKV4ROOTMUTEX();
2928 }
2929 goto out;
2930 }
2931
2932 /*
2933 * Search for a conflicting open/share.
2934 */
2935 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2936 /*
2937 * For Delegate_Cur, search for the matching Delegation,
2938 * which indicates no conflict.
2939 * An old delegation should have been recovered by the
2940 * client doing a Claim_DELEGATE_Prev, so I won't let
2941 * it match and return NFSERR_EXPIRED. Should I let it
2942 * match?
2943 */
2944 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2945 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2946 (((nd->nd_flag & ND_NFSV41) != 0 &&
2947 stateidp->seqid == 0) ||
2948 stateidp->seqid == stp->ls_stateid.seqid) &&
2949 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2950 NFSX_STATEIDOTHER))
2951 break;
2952 }
2953 if (stp == LIST_END(&lfp->lf_deleg) ||
2954 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2955 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2956 NFSUNLOCKSTATE();
2957 printf("Nfsd openctrl unexpected expiry\n");
2958 free(new_open, M_NFSDSTATE);
2959 free(new_deleg, M_NFSDSTATE);
2960 if (haslock) {
2961 NFSLOCKV4ROOTMUTEX();
2962 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2963 NFSUNLOCKV4ROOTMUTEX();
2964 }
2965 error = NFSERR_EXPIRED;
2966 goto out;
2967 }
2968
2969 /*
2970 * Don't issue a Delegation, since one already exists and
2971 * delay delegation timeout, as required.
2972 */
2973 delegate = 0;
2974 nfsrv_delaydelegtimeout(stp);
2975 }
2976
2977 /*
2978 * Check for access/deny bit conflicts. I also check for the
2979 * same owner, since the client might not have bothered to check.
2980 * Also, note an open for the same file and owner, if found,
2981 * which is all we do here for Delegate_Cur, since conflict
2982 * checking is already done.
2983 */
2984 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2985 if (ownerstp && stp->ls_openowner == ownerstp)
2986 openstp = stp;
2987 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2988 /*
2989 * If another client has the file open, the only
2990 * delegation that can be issued is a Read delegation
2991 * and only if it is a Read open with Deny none.
2992 */
2993 if (clp != stp->ls_clp) {
2994 if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2995 NFSLCK_READACCESS)
2996 writedeleg = 0;
2997 else
2998 delegate = 0;
2999 }
3000 if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
3001 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
3002 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
3003 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
3004 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
3005 if (ret == 1) {
3006 /*
3007 * nfsrv_clientconflict() unlocks state
3008 * when it returns non-zero.
3009 */
3010 free(new_open, M_NFSDSTATE);
3011 free(new_deleg, M_NFSDSTATE);
3012 openstp = NULL;
3013 goto tryagain;
3014 }
3015 if (ret == 2)
3016 error = NFSERR_PERM;
3017 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
3018 error = NFSERR_RECLAIMCONFLICT;
3019 else
3020 error = NFSERR_SHAREDENIED;
3021 if (ret == 0)
3022 NFSUNLOCKSTATE();
3023 if (haslock) {
3024 NFSLOCKV4ROOTMUTEX();
3025 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3026 NFSUNLOCKV4ROOTMUTEX();
3027 }
3028 free(new_open, M_NFSDSTATE);
3029 free(new_deleg, M_NFSDSTATE);
3030 printf("nfsd openctrl unexpected client cnfl\n");
3031 goto out;
3032 }
3033 }
3034 }
3035
3036 /*
3037 * Check for a conflicting delegation. If one is found, call
3038 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
3039 * been set yet, it will get the lock. Otherwise, it will recall
3040 * the delegation. Then, we try try again...
3041 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
3042 * isn't a conflict.)
3043 * I currently believe the conflict algorithm to be:
3044 * For Open with Read Access and Deny None
3045 * - there is a conflict iff a different client has a write delegation
3046 * For Open with other Write Access or any Deny except None
3047 * - there is a conflict if a different client has any delegation
3048 * - there is a conflict if the same client has a read delegation
3049 * (The current consensus is that this last case should be
3050 * considered a conflict since the client with a read delegation
3051 * could have done an Open with ReadAccess and WriteDeny
3052 * locally and then not have checked for the WriteDeny.)
3053 * The exception is a NFSv4.1/4.2 client that has requested
3054 * an atomic upgrade to a write delegation.
3055 */
3056 if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
3057 stp = LIST_FIRST(&lfp->lf_deleg);
3058 while (stp != LIST_END(&lfp->lf_deleg)) {
3059 nstp = LIST_NEXT(stp, ls_file);
3060 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
3061 writedeleg = 0;
3062 else if (stp->ls_clp != clp ||
3063 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
3064 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
3065 delegate = 0;
3066 if ((readonly && stp->ls_clp != clp &&
3067 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
3068 (!readonly && (stp->ls_clp != clp ||
3069 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
3070 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
3071 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3072 delegate = 2;
3073 } else {
3074 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
3075 if (ret) {
3076 /*
3077 * nfsrv_delegconflict() unlocks state
3078 * when it returns non-zero.
3079 */
3080 printf("Nfsd openctrl unexpected deleg cnfl\n");
3081 free(new_open, M_NFSDSTATE);
3082 free(new_deleg, M_NFSDSTATE);
3083 if (ret == -1) {
3084 openstp = NULL;
3085 goto tryagain;
3086 }
3087 error = ret;
3088 goto out;
3089 }
3090 }
3091 }
3092 stp = nstp;
3093 }
3094 }
3095
3096 /*
3097 * We only get here if there was no open that conflicted.
3098 * If an open for the owner exists, or in the access/deny bits.
3099 * Otherwise it is a new open. If the open_owner hasn't been
3100 * confirmed, replace the open with the new one needing confirmation,
3101 * otherwise add the open.
3102 */
3103 if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
3104 /*
3105 * Handle NFSLCK_DELEGPREV by searching the old delegations for
3106 * a match. If found, just move the old delegation to the current
3107 * delegation list and issue open. If not found, return
3108 * NFSERR_EXPIRED.
3109 */
3110 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
3111 if (stp->ls_lfp == lfp) {
3112 /* Found it */
3113 if (stp->ls_clp != clp)
3114 panic("olddeleg clp");
3115 LIST_REMOVE(stp, ls_list);
3116 LIST_REMOVE(stp, ls_hash);
3117 stp->ls_flags &= ~NFSLCK_OLDDELEG;
3118 stp->ls_stateid.seqid = delegstateidp->seqid = 1;
3119 stp->ls_stateid.other[0] = delegstateidp->other[0] =
3120 clp->lc_clientid.lval[0];
3121 stp->ls_stateid.other[1] = delegstateidp->other[1] =
3122 clp->lc_clientid.lval[1];
3123 stp->ls_stateid.other[2] = delegstateidp->other[2] =
3124 nfsrv_nextstateindex(clp);
3125 stp->ls_compref = nd->nd_compref;
3126 LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
3127 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3128 stp->ls_stateid), stp, ls_hash);
3129 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3130 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3131 else
3132 *rflagsp |= NFSV4OPEN_READDELEGATE;
3133 clp->lc_delegtime = NFSD_MONOSEC +
3134 nfsrv_lease + NFSRV_LEASEDELTA;
3135
3136 /*
3137 * Now, do the associated open.
3138 */
3139 new_open->ls_stateid.seqid = 1;
3140 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3141 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3142 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3143 new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
3144 NFSLCK_OPEN;
3145 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3146 new_open->ls_flags |= (NFSLCK_READACCESS |
3147 NFSLCK_WRITEACCESS);
3148 else
3149 new_open->ls_flags |= NFSLCK_READACCESS;
3150 new_open->ls_uid = new_stp->ls_uid;
3151 new_open->ls_lfp = lfp;
3152 new_open->ls_clp = clp;
3153 LIST_INIT(&new_open->ls_open);
3154 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3155 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3156 new_open, ls_hash);
3157 /*
3158 * and handle the open owner
3159 */
3160 if (ownerstp) {
3161 new_open->ls_openowner = ownerstp;
3162 LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
3163 } else {
3164 new_open->ls_openowner = new_stp;
3165 new_stp->ls_flags = 0;
3166 nfsrvd_refcache(new_stp->ls_op);
3167 new_stp->ls_noopens = 0;
3168 LIST_INIT(&new_stp->ls_open);
3169 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3170 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3171 *new_stpp = NULL;
3172 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3173 nfsrv_openpluslock++;
3174 }
3175 openstp = new_open;
3176 new_open = NULL;
3177 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3178 nfsrv_openpluslock++;
3179 break;
3180 }
3181 }
3182 if (stp == LIST_END(&clp->lc_olddeleg))
3183 error = NFSERR_EXPIRED;
3184 } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
3185 /*
3186 * Scan to see that no delegation for this client and file
3187 * doesn't already exist.
3188 * There also shouldn't yet be an Open for this file and
3189 * openowner.
3190 */
3191 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
3192 if (stp->ls_clp == clp)
3193 break;
3194 }
3195 if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
3196 /*
3197 * This is the Claim_Previous case with a delegation
3198 * type != Delegate_None.
3199 */
3200 /*
3201 * First, add the delegation. (Although we must issue the
3202 * delegation, we can also ask for an immediate return.)
3203 */
3204 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3205 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
3206 clp->lc_clientid.lval[0];
3207 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
3208 clp->lc_clientid.lval[1];
3209 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
3210 nfsrv_nextstateindex(clp);
3211 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
3212 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3213 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3214 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3215 nfsrv_writedelegcnt++;
3216 } else {
3217 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3218 NFSLCK_READACCESS);
3219 *rflagsp |= NFSV4OPEN_READDELEGATE;
3220 }
3221 new_deleg->ls_uid = new_stp->ls_uid;
3222 new_deleg->ls_lfp = lfp;
3223 new_deleg->ls_clp = clp;
3224 new_deleg->ls_filerev = filerev;
3225 new_deleg->ls_compref = nd->nd_compref;
3226 new_deleg->ls_lastrecall = 0;
3227 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3228 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3229 new_deleg->ls_stateid), new_deleg, ls_hash);
3230 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3231 new_deleg = NULL;
3232 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3233 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3234 LCL_CALLBACKSON ||
3235 NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3236 !NFSVNO_DELEGOK(vp))
3237 *rflagsp |= NFSV4OPEN_RECALL;
3238 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3239 nfsrv_openpluslock++;
3240 nfsrv_delegatecnt++;
3241
3242 /*
3243 * Now, do the associated open.
3244 */
3245 new_open->ls_stateid.seqid = 1;
3246 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3247 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3248 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3249 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3250 NFSLCK_OPEN;
3251 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3252 new_open->ls_flags |= (NFSLCK_READACCESS |
3253 NFSLCK_WRITEACCESS);
3254 else
3255 new_open->ls_flags |= NFSLCK_READACCESS;
3256 new_open->ls_uid = new_stp->ls_uid;
3257 new_open->ls_lfp = lfp;
3258 new_open->ls_clp = clp;
3259 LIST_INIT(&new_open->ls_open);
3260 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3261 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3262 new_open, ls_hash);
3263 /*
3264 * and handle the open owner
3265 */
3266 if (ownerstp) {
3267 new_open->ls_openowner = ownerstp;
3268 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3269 } else {
3270 new_open->ls_openowner = new_stp;
3271 new_stp->ls_flags = 0;
3272 nfsrvd_refcache(new_stp->ls_op);
3273 new_stp->ls_noopens = 0;
3274 LIST_INIT(&new_stp->ls_open);
3275 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3276 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3277 *new_stpp = NULL;
3278 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3279 nfsrv_openpluslock++;
3280 }
3281 openstp = new_open;
3282 new_open = NULL;
3283 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3284 nfsrv_openpluslock++;
3285 } else {
3286 error = NFSERR_RECLAIMCONFLICT;
3287 }
3288 } else if (ownerstp) {
3289 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3290 /* Replace the open */
3291 if (ownerstp->ls_op)
3292 nfsrvd_derefcache(ownerstp->ls_op);
3293 ownerstp->ls_op = new_stp->ls_op;
3294 nfsrvd_refcache(ownerstp->ls_op);
3295 ownerstp->ls_seq = new_stp->ls_seq;
3296 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3297 stp = LIST_FIRST(&ownerstp->ls_open);
3298 stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3299 NFSLCK_OPEN;
3300 stp->ls_stateid.seqid = 1;
3301 stp->ls_uid = new_stp->ls_uid;
3302 if (lfp != stp->ls_lfp) {
3303 LIST_REMOVE(stp, ls_file);
3304 LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3305 stp->ls_lfp = lfp;
3306 }
3307 openstp = stp;
3308 } else if (openstp) {
3309 openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3310 openstp->ls_stateid.seqid++;
3311 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3312 openstp->ls_stateid.seqid == 0)
3313 openstp->ls_stateid.seqid = 1;
3314
3315 /*
3316 * This is where we can choose to issue a delegation.
3317 */
3318 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3319 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3320 new_stp, lfp, rflagsp, delegstateidp);
3321 } else {
3322 new_open->ls_stateid.seqid = 1;
3323 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3324 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3325 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3326 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3327 NFSLCK_OPEN;
3328 new_open->ls_uid = new_stp->ls_uid;
3329 new_open->ls_openowner = ownerstp;
3330 new_open->ls_lfp = lfp;
3331 new_open->ls_clp = clp;
3332 LIST_INIT(&new_open->ls_open);
3333 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3334 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3335 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3336 new_open, ls_hash);
3337 openstp = new_open;
3338 new_open = NULL;
3339 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3340 nfsrv_openpluslock++;
3341
3342 /*
3343 * This is where we can choose to issue a delegation.
3344 */
3345 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3346 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3347 new_stp, lfp, rflagsp, delegstateidp);
3348 }
3349 } else {
3350 /*
3351 * New owner case. Start the open_owner sequence with a
3352 * Needs confirmation (unless a reclaim) and hang the
3353 * new open off it.
3354 */
3355 new_open->ls_stateid.seqid = 1;
3356 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3357 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3358 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3359 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3360 NFSLCK_OPEN;
3361 new_open->ls_uid = new_stp->ls_uid;
3362 LIST_INIT(&new_open->ls_open);
3363 new_open->ls_openowner = new_stp;
3364 new_open->ls_lfp = lfp;
3365 new_open->ls_clp = clp;
3366 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3367 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3368 new_stp->ls_flags = 0;
3369 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
3370 /*
3371 * This is where we can choose to issue a delegation.
3372 */
3373 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3374 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3375 new_stp, lfp, rflagsp, delegstateidp);
3376 /* NFSv4.1 never needs confirmation. */
3377 new_stp->ls_flags = 0;
3378
3379 /*
3380 * Since NFSv4.1 never does an OpenConfirm, the first
3381 * open state will be acquired here.
3382 */
3383 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3384 clp->lc_flags |= LCL_STAMPEDSTABLE;
3385 len = clp->lc_idlen;
3386 NFSBCOPY(clp->lc_id, clidp, len);
3387 gotstate = 1;
3388 }
3389 } else {
3390 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3391 new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3392 }
3393 nfsrvd_refcache(new_stp->ls_op);
3394 new_stp->ls_noopens = 0;
3395 LIST_INIT(&new_stp->ls_open);
3396 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3397 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3398 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3399 new_open, ls_hash);
3400 openstp = new_open;
3401 new_open = NULL;
3402 *new_stpp = NULL;
3403 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3404 nfsrv_openpluslock++;
3405 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3406 nfsrv_openpluslock++;
3407 }
3408 if (!error) {
3409 stateidp->seqid = openstp->ls_stateid.seqid;
3410 stateidp->other[0] = openstp->ls_stateid.other[0];
3411 stateidp->other[1] = openstp->ls_stateid.other[1];
3412 stateidp->other[2] = openstp->ls_stateid.other[2];
3413 }
3414 NFSUNLOCKSTATE();
3415 if (haslock) {
3416 NFSLOCKV4ROOTMUTEX();
3417 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3418 NFSUNLOCKV4ROOTMUTEX();
3419 }
3420 if (new_open)
3421 free(new_open, M_NFSDSTATE);
3422 if (new_deleg)
3423 free(new_deleg, M_NFSDSTATE);
3424
3425 /*
3426 * If the NFSv4.1 client just acquired its first open, write a timestamp
3427 * to the stable storage file.
3428 */
3429 if (gotstate != 0) {
3430 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3431 nfsrv_backupstable();
3432 }
3433
3434 out:
3435 free(clidp, M_TEMP);
3436 NFSEXITCODE2(error, nd);
3437 return (error);
3438 }
3439
3440 /*
3441 * Open update. Does the confirm, downgrade and close.
3442 */
3443 int
nfsrv_openupdate(vnode_t vp,struct nfsstate * new_stp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsrv_descript * nd,NFSPROC_T * p,int * retwriteaccessp)3444 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3445 nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
3446 int *retwriteaccessp)
3447 {
3448 struct nfsstate *stp;
3449 struct nfsclient *clp;
3450 u_int32_t bits;
3451 int error = 0, gotstate = 0, len = 0;
3452 u_char *clidp = NULL;
3453
3454 /*
3455 * Check for restart conditions (client and server).
3456 */
3457 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3458 &new_stp->ls_stateid, 0);
3459 if (error)
3460 goto out;
3461
3462 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3463 NFSLOCKSTATE();
3464 /*
3465 * Get the open structure via clientid and stateid.
3466 */
3467 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3468 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3469 if (!error)
3470 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3471 new_stp->ls_flags, &stp);
3472
3473 /*
3474 * Sanity check the open.
3475 */
3476 if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3477 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3478 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3479 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3480 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3481 error = NFSERR_BADSTATEID;
3482
3483 if (!error)
3484 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3485 stp->ls_openowner, new_stp->ls_op);
3486 if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3487 (((nd->nd_flag & ND_NFSV41) == 0 &&
3488 !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3489 ((nd->nd_flag & ND_NFSV41) != 0 &&
3490 new_stp->ls_stateid.seqid != 0)))
3491 error = NFSERR_OLDSTATEID;
3492 if (!error && vp->v_type != VREG) {
3493 if (vp->v_type == VDIR)
3494 error = NFSERR_ISDIR;
3495 else
3496 error = NFSERR_INVAL;
3497 }
3498
3499 if (error) {
3500 /*
3501 * If a client tries to confirm an Open with a bad
3502 * seqid# and there are no byte range locks or other Opens
3503 * on the openowner, just throw it away, so the next use of the
3504 * openowner will start a fresh seq#.
3505 */
3506 if (error == NFSERR_BADSEQID &&
3507 (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3508 nfsrv_nootherstate(stp))
3509 nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3510 NFSUNLOCKSTATE();
3511 goto out;
3512 }
3513
3514 /*
3515 * Set the return stateid.
3516 */
3517 stateidp->seqid = stp->ls_stateid.seqid + 1;
3518 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3519 stateidp->seqid = 1;
3520 stateidp->other[0] = stp->ls_stateid.other[0];
3521 stateidp->other[1] = stp->ls_stateid.other[1];
3522 stateidp->other[2] = stp->ls_stateid.other[2];
3523 /*
3524 * Now, handle the three cases.
3525 */
3526 if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3527 /*
3528 * If the open doesn't need confirmation, it seems to me that
3529 * there is a client error, but I'll just log it and keep going?
3530 */
3531 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3532 printf("Nfsv4d: stray open confirm\n");
3533 stp->ls_openowner->ls_flags = 0;
3534 stp->ls_stateid.seqid++;
3535 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3536 stp->ls_stateid.seqid == 0)
3537 stp->ls_stateid.seqid = 1;
3538 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3539 clp->lc_flags |= LCL_STAMPEDSTABLE;
3540 len = clp->lc_idlen;
3541 NFSBCOPY(clp->lc_id, clidp, len);
3542 gotstate = 1;
3543 }
3544 } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3545 if (retwriteaccessp != NULL) {
3546 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
3547 *retwriteaccessp = 1;
3548 else
3549 *retwriteaccessp = 0;
3550 }
3551 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3552 ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3553 nfsrv_freeopen(stp, vp, 1, p);
3554 } else {
3555 nfsrv_freeopen(stp, NULL, 0, p);
3556 }
3557 } else {
3558 /*
3559 * Update the share bits, making sure that the new set are a
3560 * subset of the old ones.
3561 */
3562 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3563 if (~(stp->ls_flags) & bits) {
3564 NFSUNLOCKSTATE();
3565 error = NFSERR_INVAL;
3566 goto out;
3567 }
3568 stp->ls_flags = (bits | NFSLCK_OPEN);
3569 stp->ls_stateid.seqid++;
3570 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3571 stp->ls_stateid.seqid == 0)
3572 stp->ls_stateid.seqid = 1;
3573 }
3574 NFSUNLOCKSTATE();
3575
3576 /*
3577 * If the client just confirmed its first open, write a timestamp
3578 * to the stable storage file.
3579 */
3580 if (gotstate != 0) {
3581 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3582 nfsrv_backupstable();
3583 }
3584
3585 out:
3586 free(clidp, M_TEMP);
3587 NFSEXITCODE2(error, nd);
3588 return (error);
3589 }
3590
3591 /*
3592 * Delegation update. Does the purge and return.
3593 */
3594 int
nfsrv_delegupdate(struct nfsrv_descript * nd,nfsquad_t clientid,nfsv4stateid_t * stateidp,vnode_t vp,int op,struct ucred * cred,NFSPROC_T * p,int * retwriteaccessp)3595 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3596 nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3597 NFSPROC_T *p, int *retwriteaccessp)
3598 {
3599 struct nfsstate *stp;
3600 struct nfsclient *clp;
3601 int error = 0;
3602 fhandle_t fh;
3603
3604 /*
3605 * Do a sanity check against the file handle for DelegReturn.
3606 */
3607 if (vp) {
3608 error = nfsvno_getfh(vp, &fh, p);
3609 if (error)
3610 goto out;
3611 }
3612 /*
3613 * Check for restart conditions (client and server).
3614 */
3615 if (op == NFSV4OP_DELEGRETURN)
3616 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3617 stateidp, 0);
3618 else
3619 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3620 stateidp, 0);
3621
3622 NFSLOCKSTATE();
3623 /*
3624 * Get the open structure via clientid and stateid.
3625 */
3626 if (!error)
3627 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3628 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3629 if (error) {
3630 if (error == NFSERR_CBPATHDOWN)
3631 error = 0;
3632 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3633 error = NFSERR_STALESTATEID;
3634 }
3635 if (!error && op == NFSV4OP_DELEGRETURN) {
3636 error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3637 if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3638 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3639 error = NFSERR_OLDSTATEID;
3640 }
3641 /*
3642 * NFSERR_EXPIRED means that the state has gone away,
3643 * so Delegations have been purged. Just return ok.
3644 */
3645 if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3646 NFSUNLOCKSTATE();
3647 error = 0;
3648 goto out;
3649 }
3650 if (error) {
3651 NFSUNLOCKSTATE();
3652 goto out;
3653 }
3654
3655 if (op == NFSV4OP_DELEGRETURN) {
3656 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3657 sizeof (fhandle_t))) {
3658 NFSUNLOCKSTATE();
3659 error = NFSERR_BADSTATEID;
3660 goto out;
3661 }
3662 if (retwriteaccessp != NULL) {
3663 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
3664 *retwriteaccessp = 1;
3665 else
3666 *retwriteaccessp = 0;
3667 }
3668 nfsrv_freedeleg(stp);
3669 } else {
3670 nfsrv_freedeleglist(&clp->lc_olddeleg);
3671 }
3672 NFSUNLOCKSTATE();
3673 error = 0;
3674
3675 out:
3676 NFSEXITCODE(error);
3677 return (error);
3678 }
3679
3680 /*
3681 * Release lock owner.
3682 */
3683 int
nfsrv_releaselckown(struct nfsstate * new_stp,nfsquad_t clientid,NFSPROC_T * p)3684 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3685 NFSPROC_T *p)
3686 {
3687 struct nfsstate *stp, *nstp, *openstp, *ownstp;
3688 struct nfsclient *clp;
3689 int error = 0;
3690
3691 /*
3692 * Check for restart conditions (client and server).
3693 */
3694 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3695 &new_stp->ls_stateid, 0);
3696 if (error)
3697 goto out;
3698
3699 NFSLOCKSTATE();
3700 /*
3701 * Get the lock owner by name.
3702 */
3703 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3704 (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3705 if (error) {
3706 NFSUNLOCKSTATE();
3707 goto out;
3708 }
3709 LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3710 LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3711 stp = LIST_FIRST(&openstp->ls_open);
3712 while (stp != LIST_END(&openstp->ls_open)) {
3713 nstp = LIST_NEXT(stp, ls_list);
3714 /*
3715 * If the owner matches, check for locks and
3716 * then free or return an error.
3717 */
3718 if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3719 !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3720 stp->ls_ownerlen)){
3721 if (LIST_EMPTY(&stp->ls_lock)) {
3722 nfsrv_freelockowner(stp, NULL, 0, p);
3723 } else {
3724 NFSUNLOCKSTATE();
3725 error = NFSERR_LOCKSHELD;
3726 goto out;
3727 }
3728 }
3729 stp = nstp;
3730 }
3731 }
3732 }
3733 NFSUNLOCKSTATE();
3734
3735 out:
3736 NFSEXITCODE(error);
3737 return (error);
3738 }
3739
3740 /*
3741 * Get the file handle for a lock structure.
3742 */
3743 static int
nfsrv_getlockfh(vnode_t vp,u_short flags,struct nfslockfile * new_lfp,fhandle_t * nfhp,NFSPROC_T * p)3744 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3745 fhandle_t *nfhp, NFSPROC_T *p)
3746 {
3747 fhandle_t *fhp = NULL;
3748 int error;
3749
3750 /*
3751 * For lock, use the new nfslock structure, otherwise just
3752 * a fhandle_t on the stack.
3753 */
3754 if (flags & NFSLCK_OPEN) {
3755 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3756 fhp = &new_lfp->lf_fh;
3757 } else if (nfhp) {
3758 fhp = nfhp;
3759 } else {
3760 panic("nfsrv_getlockfh");
3761 }
3762 error = nfsvno_getfh(vp, fhp, p);
3763 NFSEXITCODE(error);
3764 return (error);
3765 }
3766
3767 /*
3768 * Get an nfs lock structure. Allocate one, as required, and return a
3769 * pointer to it.
3770 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3771 */
3772 static int
nfsrv_getlockfile(u_short flags,struct nfslockfile ** new_lfpp,struct nfslockfile ** lfpp,fhandle_t * nfhp,int lockit)3773 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3774 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3775 {
3776 struct nfslockfile *lfp;
3777 fhandle_t *fhp = NULL, *tfhp;
3778 struct nfslockhashhead *hp;
3779 struct nfslockfile *new_lfp = NULL;
3780
3781 /*
3782 * For lock, use the new nfslock structure, otherwise just
3783 * a fhandle_t on the stack.
3784 */
3785 if (flags & NFSLCK_OPEN) {
3786 new_lfp = *new_lfpp;
3787 fhp = &new_lfp->lf_fh;
3788 } else if (nfhp) {
3789 fhp = nfhp;
3790 } else {
3791 panic("nfsrv_getlockfile");
3792 }
3793
3794 hp = NFSLOCKHASH(fhp);
3795 LIST_FOREACH(lfp, hp, lf_hash) {
3796 tfhp = &lfp->lf_fh;
3797 if (NFSVNO_CMPFH(fhp, tfhp)) {
3798 if (lockit)
3799 nfsrv_locklf(lfp);
3800 *lfpp = lfp;
3801 return (0);
3802 }
3803 }
3804 if (!(flags & NFSLCK_OPEN))
3805 return (-1);
3806
3807 /*
3808 * No match, so chain the new one into the list.
3809 */
3810 LIST_INIT(&new_lfp->lf_open);
3811 LIST_INIT(&new_lfp->lf_lock);
3812 LIST_INIT(&new_lfp->lf_deleg);
3813 LIST_INIT(&new_lfp->lf_locallock);
3814 LIST_INIT(&new_lfp->lf_rollback);
3815 new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3816 new_lfp->lf_locallock_lck.nfslock_lock = 0;
3817 new_lfp->lf_usecount = 0;
3818 LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3819 *lfpp = new_lfp;
3820 *new_lfpp = NULL;
3821 return (0);
3822 }
3823
3824 /*
3825 * This function adds a nfslock lock structure to the list for the associated
3826 * nfsstate and nfslockfile structures. It will be inserted after the
3827 * entry pointed at by insert_lop.
3828 */
3829 static void
nfsrv_insertlock(struct nfslock * new_lop,struct nfslock * insert_lop,struct nfsstate * stp,struct nfslockfile * lfp)3830 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3831 struct nfsstate *stp, struct nfslockfile *lfp)
3832 {
3833 struct nfslock *lop, *nlop;
3834
3835 new_lop->lo_stp = stp;
3836 new_lop->lo_lfp = lfp;
3837
3838 if (stp != NULL) {
3839 /* Insert in increasing lo_first order */
3840 lop = LIST_FIRST(&lfp->lf_lock);
3841 if (lop == LIST_END(&lfp->lf_lock) ||
3842 new_lop->lo_first <= lop->lo_first) {
3843 LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3844 } else {
3845 nlop = LIST_NEXT(lop, lo_lckfile);
3846 while (nlop != LIST_END(&lfp->lf_lock) &&
3847 nlop->lo_first < new_lop->lo_first) {
3848 lop = nlop;
3849 nlop = LIST_NEXT(lop, lo_lckfile);
3850 }
3851 LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3852 }
3853 } else {
3854 new_lop->lo_lckfile.le_prev = NULL; /* list not used */
3855 }
3856
3857 /*
3858 * Insert after insert_lop, which is overloaded as stp or lfp for
3859 * an empty list.
3860 */
3861 if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3862 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3863 else if ((struct nfsstate *)insert_lop == stp)
3864 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3865 else
3866 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3867 if (stp != NULL) {
3868 NFSD_VNET(nfsstatsv1_p)->srvlocks++;
3869 nfsrv_openpluslock++;
3870 }
3871 }
3872
3873 /*
3874 * This function updates the locking for a lock owner and given file. It
3875 * maintains a list of lock ranges ordered on increasing file offset that
3876 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3877 * It always adds new_lop to the list and sometimes uses the one pointed
3878 * at by other_lopp.
3879 */
3880 static void
nfsrv_updatelock(struct nfsstate * stp,struct nfslock ** new_lopp,struct nfslock ** other_lopp,struct nfslockfile * lfp)3881 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3882 struct nfslock **other_lopp, struct nfslockfile *lfp)
3883 {
3884 struct nfslock *new_lop = *new_lopp;
3885 struct nfslock *lop, *tlop, *ilop;
3886 struct nfslock *other_lop = *other_lopp;
3887 int unlock = 0, myfile = 0;
3888 u_int64_t tmp;
3889
3890 /*
3891 * Work down the list until the lock is merged.
3892 */
3893 if (new_lop->lo_flags & NFSLCK_UNLOCK)
3894 unlock = 1;
3895 if (stp != NULL) {
3896 ilop = (struct nfslock *)stp;
3897 lop = LIST_FIRST(&stp->ls_lock);
3898 } else {
3899 ilop = (struct nfslock *)lfp;
3900 lop = LIST_FIRST(&lfp->lf_locallock);
3901 }
3902 while (lop != NULL) {
3903 /*
3904 * Only check locks for this file that aren't before the start of
3905 * new lock's range.
3906 */
3907 if (lop->lo_lfp == lfp) {
3908 myfile = 1;
3909 if (lop->lo_end >= new_lop->lo_first) {
3910 if (new_lop->lo_end < lop->lo_first) {
3911 /*
3912 * If the new lock ends before the start of the
3913 * current lock's range, no merge, just insert
3914 * the new lock.
3915 */
3916 break;
3917 }
3918 if (new_lop->lo_flags == lop->lo_flags ||
3919 (new_lop->lo_first <= lop->lo_first &&
3920 new_lop->lo_end >= lop->lo_end)) {
3921 /*
3922 * This lock can be absorbed by the new lock/unlock.
3923 * This happens when it covers the entire range
3924 * of the old lock or is contiguous
3925 * with the old lock and is of the same type or an
3926 * unlock.
3927 */
3928 if (lop->lo_first < new_lop->lo_first)
3929 new_lop->lo_first = lop->lo_first;
3930 if (lop->lo_end > new_lop->lo_end)
3931 new_lop->lo_end = lop->lo_end;
3932 tlop = lop;
3933 lop = LIST_NEXT(lop, lo_lckowner);
3934 nfsrv_freenfslock(tlop);
3935 continue;
3936 }
3937
3938 /*
3939 * All these cases are for contiguous locks that are not the
3940 * same type, so they can't be merged.
3941 */
3942 if (new_lop->lo_first <= lop->lo_first) {
3943 /*
3944 * This case is where the new lock overlaps with the
3945 * first part of the old lock. Move the start of the
3946 * old lock to just past the end of the new lock. The
3947 * new lock will be inserted in front of the old, since
3948 * ilop hasn't been updated. (We are done now.)
3949 */
3950 lop->lo_first = new_lop->lo_end;
3951 break;
3952 }
3953 if (new_lop->lo_end >= lop->lo_end) {
3954 /*
3955 * This case is where the new lock overlaps with the
3956 * end of the old lock's range. Move the old lock's
3957 * end to just before the new lock's first and insert
3958 * the new lock after the old lock.
3959 * Might not be done yet, since the new lock could
3960 * overlap further locks with higher ranges.
3961 */
3962 lop->lo_end = new_lop->lo_first;
3963 ilop = lop;
3964 lop = LIST_NEXT(lop, lo_lckowner);
3965 continue;
3966 }
3967 /*
3968 * The final case is where the new lock's range is in the
3969 * middle of the current lock's and splits the current lock
3970 * up. Use *other_lopp to handle the second part of the
3971 * split old lock range. (We are done now.)
3972 * For unlock, we use new_lop as other_lop and tmp, since
3973 * other_lop and new_lop are the same for this case.
3974 * We noted the unlock case above, so we don't need
3975 * new_lop->lo_flags any longer.
3976 */
3977 tmp = new_lop->lo_first;
3978 if (other_lop == NULL) {
3979 if (!unlock)
3980 panic("nfsd srv update unlock");
3981 other_lop = new_lop;
3982 *new_lopp = NULL;
3983 }
3984 other_lop->lo_first = new_lop->lo_end;
3985 other_lop->lo_end = lop->lo_end;
3986 other_lop->lo_flags = lop->lo_flags;
3987 other_lop->lo_stp = stp;
3988 other_lop->lo_lfp = lfp;
3989 lop->lo_end = tmp;
3990 nfsrv_insertlock(other_lop, lop, stp, lfp);
3991 *other_lopp = NULL;
3992 ilop = lop;
3993 break;
3994 }
3995 }
3996 ilop = lop;
3997 lop = LIST_NEXT(lop, lo_lckowner);
3998 if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3999 break;
4000 }
4001
4002 /*
4003 * Insert the new lock in the list at the appropriate place.
4004 */
4005 if (!unlock) {
4006 nfsrv_insertlock(new_lop, ilop, stp, lfp);
4007 *new_lopp = NULL;
4008 }
4009 }
4010
4011 /*
4012 * This function handles sequencing of locks, etc.
4013 * It returns an error that indicates what the caller should do.
4014 */
4015 static int
nfsrv_checkseqid(struct nfsrv_descript * nd,u_int32_t seqid,struct nfsstate * stp,struct nfsrvcache * op)4016 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
4017 struct nfsstate *stp, struct nfsrvcache *op)
4018 {
4019 int error = 0;
4020
4021 if ((nd->nd_flag & ND_NFSV41) != 0)
4022 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
4023 goto out;
4024 if (op != nd->nd_rp)
4025 panic("nfsrvstate checkseqid");
4026 if (!(op->rc_flag & RC_INPROG))
4027 panic("nfsrvstate not inprog");
4028 if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
4029 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
4030 panic("nfsrvstate op refcnt");
4031 }
4032
4033 /* If ND_ERELOOKUP is set, the seqid has already been handled. */
4034 if ((nd->nd_flag & ND_ERELOOKUP) != 0)
4035 goto out;
4036
4037 if ((stp->ls_seq + 1) == seqid) {
4038 if (stp->ls_op)
4039 nfsrvd_derefcache(stp->ls_op);
4040 stp->ls_op = op;
4041 nfsrvd_refcache(op);
4042 stp->ls_seq = seqid;
4043 goto out;
4044 } else if (stp->ls_seq == seqid && stp->ls_op &&
4045 op->rc_xid == stp->ls_op->rc_xid &&
4046 op->rc_refcnt == 0 &&
4047 op->rc_reqlen == stp->ls_op->rc_reqlen &&
4048 op->rc_cksum == stp->ls_op->rc_cksum) {
4049 if (stp->ls_op->rc_flag & RC_INPROG) {
4050 error = NFSERR_DONTREPLY;
4051 goto out;
4052 }
4053 nd->nd_rp = stp->ls_op;
4054 nd->nd_rp->rc_flag |= RC_INPROG;
4055 nfsrvd_delcache(op);
4056 error = NFSERR_REPLYFROMCACHE;
4057 goto out;
4058 }
4059 error = NFSERR_BADSEQID;
4060
4061 out:
4062 NFSEXITCODE2(error, nd);
4063 return (error);
4064 }
4065
4066 /*
4067 * Get the client ip address for callbacks. If the strings can't be parsed,
4068 * just set lc_program to 0 to indicate no callbacks are possible.
4069 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
4070 * the address to the client's transport address. This won't be used
4071 * for callbacks, but can be printed out by nfsstats for info.)
4072 * Return error if the xdr can't be parsed, 0 otherwise.
4073 */
4074 int
nfsrv_getclientipaddr(struct nfsrv_descript * nd,struct nfsclient * clp)4075 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
4076 {
4077 u_int32_t *tl;
4078 u_char *cp, *cp2;
4079 int i, j, maxalen = 0, minalen = 0;
4080 sa_family_t af;
4081 #ifdef INET
4082 struct sockaddr_in *rin = NULL, *sin;
4083 #endif
4084 #ifdef INET6
4085 struct sockaddr_in6 *rin6 = NULL, *sin6;
4086 #endif
4087 u_char *addr;
4088 int error = 0, cantparse = 0;
4089 union {
4090 in_addr_t ival;
4091 u_char cval[4];
4092 } ip;
4093 union {
4094 in_port_t sval;
4095 u_char cval[2];
4096 } port;
4097
4098 /* 8 is the maximum length of the port# string. */
4099 addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
4100 clp->lc_req.nr_client = NULL;
4101 clp->lc_req.nr_lock = 0;
4102 af = AF_UNSPEC;
4103 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4104 i = fxdr_unsigned(int, *tl);
4105 if (i >= 3 && i <= 4) {
4106 error = nfsrv_mtostr(nd, addr, i);
4107 if (error)
4108 goto nfsmout;
4109 #ifdef INET
4110 if (!strcmp(addr, "tcp")) {
4111 clp->lc_flags |= LCL_TCPCALLBACK;
4112 clp->lc_req.nr_sotype = SOCK_STREAM;
4113 clp->lc_req.nr_soproto = IPPROTO_TCP;
4114 af = AF_INET;
4115 } else if (!strcmp(addr, "udp")) {
4116 clp->lc_req.nr_sotype = SOCK_DGRAM;
4117 clp->lc_req.nr_soproto = IPPROTO_UDP;
4118 af = AF_INET;
4119 }
4120 #endif
4121 #ifdef INET6
4122 if (af == AF_UNSPEC) {
4123 if (!strcmp(addr, "tcp6")) {
4124 clp->lc_flags |= LCL_TCPCALLBACK;
4125 clp->lc_req.nr_sotype = SOCK_STREAM;
4126 clp->lc_req.nr_soproto = IPPROTO_TCP;
4127 af = AF_INET6;
4128 } else if (!strcmp(addr, "udp6")) {
4129 clp->lc_req.nr_sotype = SOCK_DGRAM;
4130 clp->lc_req.nr_soproto = IPPROTO_UDP;
4131 af = AF_INET6;
4132 }
4133 }
4134 #endif
4135 if (af == AF_UNSPEC) {
4136 cantparse = 1;
4137 }
4138 } else {
4139 cantparse = 1;
4140 if (i > 0) {
4141 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4142 if (error)
4143 goto nfsmout;
4144 }
4145 }
4146 /*
4147 * The caller has allocated clp->lc_req.nr_nam to be large enough
4148 * for either AF_INET or AF_INET6 and zeroed out the contents.
4149 * maxalen is set to the maximum length of the host IP address string
4150 * plus 8 for the maximum length of the port#.
4151 * minalen is set to the minimum length of the host IP address string
4152 * plus 4 for the minimum length of the port#.
4153 * These lengths do not include NULL termination,
4154 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
4155 */
4156 switch (af) {
4157 #ifdef INET
4158 case AF_INET:
4159 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4160 rin->sin_family = AF_INET;
4161 rin->sin_len = sizeof(struct sockaddr_in);
4162 maxalen = INET_ADDRSTRLEN - 1 + 8;
4163 minalen = 7 + 4;
4164 break;
4165 #endif
4166 #ifdef INET6
4167 case AF_INET6:
4168 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4169 rin6->sin6_family = AF_INET6;
4170 rin6->sin6_len = sizeof(struct sockaddr_in6);
4171 maxalen = INET6_ADDRSTRLEN - 1 + 8;
4172 minalen = 3 + 4;
4173 break;
4174 #endif
4175 }
4176 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4177 i = fxdr_unsigned(int, *tl);
4178 if (i < 0) {
4179 error = NFSERR_BADXDR;
4180 goto nfsmout;
4181 } else if (i == 0) {
4182 cantparse = 1;
4183 } else if (!cantparse && i <= maxalen && i >= minalen) {
4184 error = nfsrv_mtostr(nd, addr, i);
4185 if (error)
4186 goto nfsmout;
4187
4188 /*
4189 * Parse out the address fields. We expect 6 decimal numbers
4190 * separated by '.'s for AF_INET and two decimal numbers
4191 * preceeded by '.'s for AF_INET6.
4192 */
4193 cp = NULL;
4194 switch (af) {
4195 #ifdef INET6
4196 /*
4197 * For AF_INET6, first parse the host address.
4198 */
4199 case AF_INET6:
4200 cp = strchr(addr, '.');
4201 if (cp != NULL) {
4202 *cp++ = '\0';
4203 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
4204 i = 4;
4205 else {
4206 cp = NULL;
4207 cantparse = 1;
4208 }
4209 }
4210 break;
4211 #endif
4212 #ifdef INET
4213 case AF_INET:
4214 cp = addr;
4215 i = 0;
4216 break;
4217 #endif
4218 }
4219 while (cp != NULL && *cp && i < 6) {
4220 cp2 = cp;
4221 while (*cp2 && *cp2 != '.')
4222 cp2++;
4223 if (*cp2)
4224 *cp2++ = '\0';
4225 else if (i != 5) {
4226 cantparse = 1;
4227 break;
4228 }
4229 j = nfsrv_getipnumber(cp);
4230 if (j >= 0) {
4231 if (i < 4)
4232 ip.cval[3 - i] = j;
4233 else
4234 port.cval[5 - i] = j;
4235 } else {
4236 cantparse = 1;
4237 break;
4238 }
4239 cp = cp2;
4240 i++;
4241 }
4242 if (!cantparse) {
4243 /*
4244 * The host address INADDR_ANY is (mis)used to indicate
4245 * "there is no valid callback address".
4246 */
4247 switch (af) {
4248 #ifdef INET6
4249 case AF_INET6:
4250 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
4251 &in6addr_any))
4252 rin6->sin6_port = htons(port.sval);
4253 else
4254 cantparse = 1;
4255 break;
4256 #endif
4257 #ifdef INET
4258 case AF_INET:
4259 if (ip.ival != INADDR_ANY) {
4260 rin->sin_addr.s_addr = htonl(ip.ival);
4261 rin->sin_port = htons(port.sval);
4262 } else {
4263 cantparse = 1;
4264 }
4265 break;
4266 #endif
4267 }
4268 }
4269 } else {
4270 cantparse = 1;
4271 if (i > 0) {
4272 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4273 if (error)
4274 goto nfsmout;
4275 }
4276 }
4277 if (cantparse) {
4278 switch (nd->nd_nam->sa_family) {
4279 #ifdef INET
4280 case AF_INET:
4281 sin = (struct sockaddr_in *)nd->nd_nam;
4282 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4283 rin->sin_family = AF_INET;
4284 rin->sin_len = sizeof(struct sockaddr_in);
4285 rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4286 rin->sin_port = 0x0;
4287 break;
4288 #endif
4289 #ifdef INET6
4290 case AF_INET6:
4291 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4292 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4293 rin6->sin6_family = AF_INET6;
4294 rin6->sin6_len = sizeof(struct sockaddr_in6);
4295 rin6->sin6_addr = sin6->sin6_addr;
4296 rin6->sin6_port = 0x0;
4297 break;
4298 #endif
4299 }
4300 clp->lc_program = 0;
4301 }
4302 nfsmout:
4303 free(addr, M_TEMP);
4304 NFSEXITCODE2(error, nd);
4305 return (error);
4306 }
4307
4308 /*
4309 * Turn a string of up to three decimal digits into a number. Return -1 upon
4310 * error.
4311 */
4312 static int
nfsrv_getipnumber(u_char * cp)4313 nfsrv_getipnumber(u_char *cp)
4314 {
4315 int i = 0, j = 0;
4316
4317 while (*cp) {
4318 if (j > 2 || *cp < '0' || *cp > '9')
4319 return (-1);
4320 i *= 10;
4321 i += (*cp - '0');
4322 cp++;
4323 j++;
4324 }
4325 if (i < 256)
4326 return (i);
4327 return (-1);
4328 }
4329
4330 /*
4331 * This function checks for restart conditions.
4332 */
4333 static int
nfsrv_checkrestart(nfsquad_t clientid,u_int32_t flags,nfsv4stateid_t * stateidp,int specialid)4334 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4335 nfsv4stateid_t *stateidp, int specialid)
4336 {
4337 int ret = 0;
4338
4339 /*
4340 * First check for a server restart. Open, LockT, ReleaseLockOwner
4341 * and DelegPurge have a clientid, the rest a stateid.
4342 */
4343 if (flags &
4344 (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4345 if (clientid.lval[0] != NFSD_VNET(nfsrvboottime)) {
4346 ret = NFSERR_STALECLIENTID;
4347 goto out;
4348 }
4349 } else if (stateidp->other[0] != NFSD_VNET(nfsrvboottime) &&
4350 specialid == 0) {
4351 ret = NFSERR_STALESTATEID;
4352 goto out;
4353 }
4354
4355 /*
4356 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4357 * not use a lock/open owner seqid#, so the check can be done now.
4358 * (The others will be checked, as required, later.)
4359 */
4360 if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4361 goto out;
4362
4363 NFSLOCKSTATE();
4364 ret = nfsrv_checkgrace(NULL, NULL, flags);
4365 NFSUNLOCKSTATE();
4366
4367 out:
4368 NFSEXITCODE(ret);
4369 return (ret);
4370 }
4371
4372 /*
4373 * Check for grace.
4374 */
4375 static int
nfsrv_checkgrace(struct nfsrv_descript * nd,struct nfsclient * clp,u_int32_t flags)4376 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4377 u_int32_t flags)
4378 {
4379 int error = 0, notreclaimed;
4380 struct nfsrv_stable *sp;
4381
4382 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE |
4383 NFSNSF_GRACEOVER)) == 0) {
4384 /*
4385 * First, check to see if all of the clients have done a
4386 * ReclaimComplete. If so, grace can end now.
4387 */
4388 notreclaimed = 0;
4389 if (!NFSD_VNET(nfsd_disable_grace)) {
4390 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
4391 nst_list) {
4392 if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4393 notreclaimed = 1;
4394 break;
4395 }
4396 }
4397 }
4398 if (notreclaimed == 0)
4399 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
4400 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4401 }
4402
4403 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) {
4404 if (flags & NFSLCK_RECLAIM) {
4405 error = NFSERR_NOGRACE;
4406 goto out;
4407 }
4408 } else {
4409 if (!(flags & NFSLCK_RECLAIM)) {
4410 error = NFSERR_GRACE;
4411 goto out;
4412 }
4413 if (nd != NULL && clp != NULL &&
4414 (nd->nd_flag & ND_NFSV41) != 0 &&
4415 (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4416 error = NFSERR_NOGRACE;
4417 goto out;
4418 }
4419
4420 /*
4421 * If grace is almost over and we are still getting Reclaims,
4422 * extend grace a bit.
4423 */
4424 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4425 NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
4426 NFSD_VNET(nfsrv_stablefirst).nsf_eograce =
4427 NFSD_MONOSEC + NFSRV_LEASEDELTA;
4428 }
4429
4430 out:
4431 NFSEXITCODE(error);
4432 return (error);
4433 }
4434
4435 /*
4436 * Do a server callback.
4437 * The "trunc" argument is slightly overloaded and refers to different
4438 * boolean arguments for CBRECALL and CBLAYOUTRECALL.
4439 */
4440 static int
nfsrv_docallback(struct nfsclient * clp,int procnum,nfsv4stateid_t * stateidp,int trunc,fhandle_t * fhp,struct nfsvattr * nap,nfsattrbit_t * attrbitp,int laytype,NFSPROC_T * p)4441 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
4442 int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
4443 int laytype, NFSPROC_T *p)
4444 {
4445 struct mbuf *m;
4446 u_int32_t *tl;
4447 struct nfsrv_descript *nd;
4448 struct ucred *cred;
4449 int error = 0, slotpos;
4450 u_int32_t callback;
4451 struct nfsdsession *sep = NULL;
4452 uint64_t tval;
4453 bool dotls;
4454
4455 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
4456 cred = newnfs_getcred();
4457 NFSLOCKSTATE(); /* mostly for lc_cbref++ */
4458 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4459 NFSUNLOCKSTATE();
4460 panic("docallb");
4461 }
4462 clp->lc_cbref++;
4463
4464 /*
4465 * Fill the callback program# and version into the request
4466 * structure for newnfs_connect() to use.
4467 */
4468 clp->lc_req.nr_prog = clp->lc_program;
4469 #ifdef notnow
4470 if ((clp->lc_flags & LCL_NFSV41) != 0)
4471 clp->lc_req.nr_vers = NFSV41_CBVERS;
4472 else
4473 #endif
4474 clp->lc_req.nr_vers = NFSV4_CBVERS;
4475
4476 /*
4477 * First, fill in some of the fields of nd and cr.
4478 */
4479 nd->nd_flag = ND_NFSV4;
4480 if (clp->lc_flags & LCL_GSS)
4481 nd->nd_flag |= ND_KERBV;
4482 if ((clp->lc_flags & LCL_NFSV41) != 0)
4483 nd->nd_flag |= ND_NFSV41;
4484 if ((clp->lc_flags & LCL_NFSV42) != 0)
4485 nd->nd_flag |= ND_NFSV42;
4486 nd->nd_repstat = 0;
4487 cred->cr_uid = clp->lc_uid;
4488 cred->cr_gid = clp->lc_gid;
4489 callback = clp->lc_callback;
4490 NFSUNLOCKSTATE();
4491 cred->cr_ngroups = 1;
4492
4493 /*
4494 * Get the first mbuf for the request.
4495 */
4496 MGET(m, M_WAITOK, MT_DATA);
4497 m->m_len = 0;
4498 nd->nd_mreq = nd->nd_mb = m;
4499 nd->nd_bpos = mtod(m, caddr_t);
4500
4501 /*
4502 * and build the callback request.
4503 */
4504 if (procnum == NFSV4OP_CBGETATTR) {
4505 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4506 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4507 "CB Getattr", &sep, &slotpos);
4508 if (error != 0) {
4509 m_freem(nd->nd_mreq);
4510 goto errout;
4511 }
4512 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4513 (void)nfsrv_putattrbit(nd, attrbitp);
4514 } else if (procnum == NFSV4OP_CBRECALL) {
4515 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4516 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4517 "CB Recall", &sep, &slotpos);
4518 if (error != 0) {
4519 m_freem(nd->nd_mreq);
4520 goto errout;
4521 }
4522 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4523 *tl++ = txdr_unsigned(stateidp->seqid);
4524 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4525 NFSX_STATEIDOTHER);
4526 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4527 if (trunc)
4528 *tl = newnfs_true;
4529 else
4530 *tl = newnfs_false;
4531 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4532 } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
4533 NFSD_DEBUG(4, "docallback layout recall\n");
4534 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4535 error = nfsrv_cbcallargs(nd, clp, callback,
4536 NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
4537 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
4538 if (error != 0) {
4539 m_freem(nd->nd_mreq);
4540 goto errout;
4541 }
4542 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
4543 *tl++ = txdr_unsigned(laytype);
4544 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
4545 if (trunc)
4546 *tl++ = newnfs_true;
4547 else
4548 *tl++ = newnfs_false;
4549 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
4550 (void)nfsm_fhtom(NULL, nd, (uint8_t *)fhp, NFSX_MYFH, 0);
4551 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
4552 tval = 0;
4553 txdr_hyper(tval, tl); tl += 2;
4554 tval = UINT64_MAX;
4555 txdr_hyper(tval, tl); tl += 2;
4556 *tl++ = txdr_unsigned(stateidp->seqid);
4557 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
4558 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4559 NFSD_DEBUG(4, "aft args\n");
4560 } else if (procnum == NFSV4PROC_CBNULL) {
4561 nd->nd_procnum = NFSV4PROC_CBNULL;
4562 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4563 error = nfsv4_getcbsession(clp, &sep);
4564 if (error != 0) {
4565 m_freem(nd->nd_mreq);
4566 goto errout;
4567 }
4568 }
4569 } else {
4570 error = NFSERR_SERVERFAULT;
4571 m_freem(nd->nd_mreq);
4572 goto errout;
4573 }
4574
4575 /*
4576 * Call newnfs_connect(), as required, and then newnfs_request().
4577 */
4578 dotls = false;
4579 if ((clp->lc_flags & LCL_TLSCB) != 0)
4580 dotls = true;
4581 (void) newnfs_sndlock(&clp->lc_req.nr_lock);
4582 if (clp->lc_req.nr_client == NULL) {
4583 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4584 error = ECONNREFUSED;
4585 if (procnum != NFSV4PROC_CBNULL)
4586 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4587 true);
4588 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4589 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4590 error = newnfs_connect(NULL, &clp->lc_req, cred,
4591 NULL, 1, dotls, &clp->lc_req.nr_client);
4592 else
4593 error = newnfs_connect(NULL, &clp->lc_req, cred,
4594 NULL, 3, dotls, &clp->lc_req.nr_client);
4595 }
4596 newnfs_sndunlock(&clp->lc_req.nr_lock);
4597 NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
4598 if (!error) {
4599 if ((nd->nd_flag & ND_NFSV41) != 0) {
4600 KASSERT(sep != NULL, ("sep NULL"));
4601 if (sep->sess_cbsess.nfsess_xprt != NULL)
4602 error = newnfs_request(nd, NULL, clp,
4603 &clp->lc_req, NULL, NULL, cred,
4604 clp->lc_program, clp->lc_req.nr_vers, NULL,
4605 1, NULL, &sep->sess_cbsess);
4606 else {
4607 /*
4608 * This should probably never occur, but if a
4609 * client somehow does an RPC without a
4610 * SequenceID Op that causes a callback just
4611 * after the nfsd threads have been terminated
4612 * and restarted we could conceivably get here
4613 * without a backchannel xprt.
4614 */
4615 printf("nfsrv_docallback: no xprt\n");
4616 error = ECONNREFUSED;
4617 }
4618 NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
4619 if (error != 0 && procnum != NFSV4PROC_CBNULL) {
4620 /*
4621 * It is likely that the callback was never
4622 * processed by the client and, as such,
4623 * the sequence# for the session slot needs
4624 * to be backed up by one to avoid a
4625 * NFSERR_SEQMISORDERED error reply.
4626 * For the unlikely case where the callback
4627 * was processed by the client, this will
4628 * make the next callback on the slot
4629 * appear to be a retry.
4630 * Since callbacks never specify that the
4631 * reply be cached, this "apparent retry"
4632 * should not be a problem.
4633 */
4634 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4635 true);
4636 }
4637 nfsrv_freesession(NULL, sep, NULL, false, NULL);
4638 } else
4639 error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4640 NULL, NULL, cred, clp->lc_program,
4641 clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4642 }
4643 errout:
4644 NFSFREECRED(cred);
4645
4646 /*
4647 * If error is set here, the Callback path isn't working
4648 * properly, so twiddle the appropriate LCL_ flags.
4649 * (nd_repstat != 0 indicates the Callback path is working,
4650 * but the callback failed on the client.)
4651 */
4652 if (error) {
4653 /*
4654 * Mark the callback pathway down, which disabled issuing
4655 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4656 */
4657 NFSLOCKSTATE();
4658 clp->lc_flags |= LCL_CBDOWN;
4659 NFSUNLOCKSTATE();
4660 } else {
4661 /*
4662 * Callback worked. If the callback path was down, disable
4663 * callbacks, so no more delegations will be issued. (This
4664 * is done on the assumption that the callback pathway is
4665 * flakey.)
4666 */
4667 NFSLOCKSTATE();
4668 if (clp->lc_flags & LCL_CBDOWN)
4669 clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4670 NFSUNLOCKSTATE();
4671 if (nd->nd_repstat) {
4672 error = nd->nd_repstat;
4673 NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
4674 procnum, error);
4675 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4676 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4677 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4678 p, NULL);
4679 m_freem(nd->nd_mrep);
4680 }
4681 NFSLOCKSTATE();
4682 clp->lc_cbref--;
4683 if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4684 clp->lc_flags &= ~LCL_WAKEUPWANTED;
4685 wakeup(clp);
4686 }
4687 NFSUNLOCKSTATE();
4688
4689 free(nd, M_TEMP);
4690 NFSEXITCODE(error);
4691 return (error);
4692 }
4693
4694 /*
4695 * Set up the compound RPC for the callback.
4696 */
4697 static int
nfsrv_cbcallargs(struct nfsrv_descript * nd,struct nfsclient * clp,uint32_t callback,int op,const char * optag,struct nfsdsession ** sepp,int * slotposp)4698 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4699 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
4700 int *slotposp)
4701 {
4702 uint32_t *tl;
4703 int error, len;
4704
4705 len = strlen(optag);
4706 (void)nfsm_strtom(nd, optag, len);
4707 NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4708 if ((nd->nd_flag & ND_NFSV41) != 0) {
4709 if ((nd->nd_flag & ND_NFSV42) != 0)
4710 *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
4711 else
4712 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4713 *tl++ = txdr_unsigned(callback);
4714 *tl++ = txdr_unsigned(2);
4715 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4716 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
4717 if (error != 0)
4718 return (error);
4719 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4720 *tl = txdr_unsigned(op);
4721 } else {
4722 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4723 *tl++ = txdr_unsigned(callback);
4724 *tl++ = txdr_unsigned(1);
4725 *tl = txdr_unsigned(op);
4726 }
4727 return (0);
4728 }
4729
4730 /*
4731 * Return the next index# for a clientid. Mostly just increment and return
4732 * the next one, but... if the 32bit unsigned does actually wrap around,
4733 * it should be rebooted.
4734 * At an average rate of one new client per second, it will wrap around in
4735 * approximately 136 years. (I think the server will have been shut
4736 * down or rebooted before then.)
4737 */
4738 static u_int32_t
nfsrv_nextclientindex(void)4739 nfsrv_nextclientindex(void)
4740 {
4741 static u_int32_t client_index = 0;
4742
4743 client_index++;
4744 if (client_index != 0)
4745 return (client_index);
4746
4747 printf("%s: out of clientids\n", __func__);
4748 return (client_index);
4749 }
4750
4751 /*
4752 * Return the next index# for a stateid. Mostly just increment and return
4753 * the next one, but... if the 32bit unsigned does actually wrap around
4754 * (will a BSD server stay up that long?), find
4755 * new start and end values.
4756 */
4757 static u_int32_t
nfsrv_nextstateindex(struct nfsclient * clp)4758 nfsrv_nextstateindex(struct nfsclient *clp)
4759 {
4760 struct nfsstate *stp;
4761 int i;
4762 u_int32_t canuse, min_index, max_index;
4763
4764 if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4765 clp->lc_stateindex++;
4766 if (clp->lc_stateindex != clp->lc_statemaxindex)
4767 return (clp->lc_stateindex);
4768 }
4769
4770 /*
4771 * Yuck, we've hit the end.
4772 * Look for a new min and max.
4773 */
4774 min_index = 0;
4775 max_index = 0xffffffff;
4776 for (i = 0; i < nfsrv_statehashsize; i++) {
4777 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4778 if (stp->ls_stateid.other[2] > 0x80000000) {
4779 if (stp->ls_stateid.other[2] < max_index)
4780 max_index = stp->ls_stateid.other[2];
4781 } else {
4782 if (stp->ls_stateid.other[2] > min_index)
4783 min_index = stp->ls_stateid.other[2];
4784 }
4785 }
4786 }
4787
4788 /*
4789 * Yikes, highly unlikely, but I'll handle it anyhow.
4790 */
4791 if (min_index == 0x80000000 && max_index == 0x80000001) {
4792 canuse = 0;
4793 /*
4794 * Loop around until we find an unused entry. Return that
4795 * and set LCL_INDEXNOTOK, so the search will continue next time.
4796 * (This is one of those rare cases where a goto is the
4797 * cleanest way to code the loop.)
4798 */
4799 tryagain:
4800 for (i = 0; i < nfsrv_statehashsize; i++) {
4801 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4802 if (stp->ls_stateid.other[2] == canuse) {
4803 canuse++;
4804 goto tryagain;
4805 }
4806 }
4807 }
4808 clp->lc_flags |= LCL_INDEXNOTOK;
4809 return (canuse);
4810 }
4811
4812 /*
4813 * Ok to start again from min + 1.
4814 */
4815 clp->lc_stateindex = min_index + 1;
4816 clp->lc_statemaxindex = max_index;
4817 clp->lc_flags &= ~LCL_INDEXNOTOK;
4818 return (clp->lc_stateindex);
4819 }
4820
4821 /*
4822 * The following functions handle the stable storage file that deals with
4823 * the edge conditions described in RFC3530 Sec. 8.6.3.
4824 * The file is as follows:
4825 * - a single record at the beginning that has the lease time of the
4826 * previous server instance (before the last reboot) and the nfsrvboottime
4827 * values for the previous server boots.
4828 * These previous boot times are used to ensure that the current
4829 * nfsrvboottime does not, somehow, get set to a previous one.
4830 * (This is important so that Stale ClientIDs and StateIDs can
4831 * be recognized.)
4832 * The number of previous nfsvrboottime values precedes the list.
4833 * - followed by some number of appended records with:
4834 * - client id string
4835 * - flag that indicates it is a record revoking state via lease
4836 * expiration or similar
4837 * OR has successfully acquired state.
4838 * These structures vary in length, with the client string at the end, up
4839 * to NFSV4_OPAQUELIMIT in size.
4840 *
4841 * At the end of the grace period, the file is truncated, the first
4842 * record is rewritten with updated information and any acquired state
4843 * records for successful reclaims of state are written.
4844 *
4845 * Subsequent records are appended when the first state is issued to
4846 * a client and when state is revoked for a client.
4847 *
4848 * When reading the file in, state issued records that come later in
4849 * the file override older ones, since the append log is in cronological order.
4850 * If, for some reason, the file can't be read, the grace period is
4851 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4852 */
4853
4854 /*
4855 * Read in the stable storage file. Called by nfssvc() before the nfsd
4856 * processes start servicing requests.
4857 */
4858 void
nfsrv_setupstable(NFSPROC_T * p)4859 nfsrv_setupstable(NFSPROC_T *p)
4860 {
4861 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4862 struct nfsrv_stable *sp, *nsp;
4863 struct nfst_rec *tsp;
4864 int error, i, tryagain;
4865 off_t off = 0;
4866 ssize_t aresid, len;
4867
4868 /*
4869 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4870 * a reboot, so state has not been lost.
4871 */
4872 if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4873 return;
4874 /*
4875 * Set Grace over just until the file reads successfully.
4876 */
4877 NFSD_VNET(nfsrvboottime) = time_second;
4878 LIST_INIT(&sf->nsf_head);
4879 sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4880 sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4881 if (sf->nsf_fp == NULL)
4882 return;
4883 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4884 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4885 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4886 if (error || aresid || sf->nsf_numboots == 0 ||
4887 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4888 return;
4889
4890 /*
4891 * Now, read in the boottimes.
4892 */
4893 sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4894 sizeof(time_t), M_TEMP, M_WAITOK);
4895 off = sizeof (struct nfsf_rec);
4896 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4897 (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4898 UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4899 if (error || aresid) {
4900 free(sf->nsf_bootvals, M_TEMP);
4901 sf->nsf_bootvals = NULL;
4902 return;
4903 }
4904
4905 /*
4906 * Make sure this nfsrvboottime is different from all recorded
4907 * previous ones.
4908 */
4909 do {
4910 tryagain = 0;
4911 for (i = 0; i < sf->nsf_numboots; i++) {
4912 if (NFSD_VNET(nfsrvboottime) == sf->nsf_bootvals[i]) {
4913 NFSD_VNET(nfsrvboottime)++;
4914 tryagain = 1;
4915 break;
4916 }
4917 }
4918 } while (tryagain);
4919
4920 sf->nsf_flags |= NFSNSF_OK;
4921 off += (sf->nsf_numboots * sizeof (time_t));
4922
4923 /*
4924 * Read through the file, building a list of records for grace
4925 * checking.
4926 * Each record is between sizeof (struct nfst_rec) and
4927 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4928 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4929 */
4930 tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4931 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4932 do {
4933 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4934 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4935 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4936 len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4937 if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4938 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4939 /*
4940 * Yuck, the file has been corrupted, so just return
4941 * after clearing out any restart state, so the grace period
4942 * is over.
4943 */
4944 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4945 LIST_REMOVE(sp, nst_list);
4946 free(sp, M_TEMP);
4947 }
4948 free(tsp, M_TEMP);
4949 sf->nsf_flags &= ~NFSNSF_OK;
4950 free(sf->nsf_bootvals, M_TEMP);
4951 sf->nsf_bootvals = NULL;
4952 return;
4953 }
4954 if (len > 0) {
4955 off += sizeof (struct nfst_rec) + tsp->len - 1;
4956 /*
4957 * Search the list for a matching client.
4958 */
4959 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4960 if (tsp->len == sp->nst_len &&
4961 !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4962 break;
4963 }
4964 if (sp == LIST_END(&sf->nsf_head)) {
4965 sp = (struct nfsrv_stable *)malloc(tsp->len +
4966 sizeof (struct nfsrv_stable) - 1, M_TEMP,
4967 M_WAITOK);
4968 NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4969 sizeof (struct nfst_rec) + tsp->len - 1);
4970 LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4971 } else {
4972 if (tsp->flag == NFSNST_REVOKE)
4973 sp->nst_flag |= NFSNST_REVOKE;
4974 else
4975 /*
4976 * A subsequent timestamp indicates the client
4977 * did a setclientid/confirm and any previous
4978 * revoke is no longer relevant.
4979 */
4980 sp->nst_flag &= ~NFSNST_REVOKE;
4981 }
4982 }
4983 } while (len > 0);
4984 free(tsp, M_TEMP);
4985 sf->nsf_flags = NFSNSF_OK;
4986 sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4987 NFSRV_LEASEDELTA;
4988 }
4989
4990 /*
4991 * Update the stable storage file, now that the grace period is over.
4992 */
4993 void
nfsrv_updatestable(NFSPROC_T * p)4994 nfsrv_updatestable(NFSPROC_T *p)
4995 {
4996 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4997 struct nfsrv_stable *sp, *nsp;
4998 int i;
4999 struct nfsvattr nva;
5000 vnode_t vp;
5001 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
5002 mount_t mp = NULL;
5003 #endif
5004 int error;
5005
5006 if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
5007 return;
5008 sf->nsf_flags |= NFSNSF_UPDATEDONE;
5009 /*
5010 * Ok, we need to rewrite the stable storage file.
5011 * - truncate to 0 length
5012 * - write the new first structure
5013 * - loop through the data structures, writing out any that
5014 * have timestamps older than the old boot
5015 */
5016 if (sf->nsf_bootvals) {
5017 sf->nsf_numboots++;
5018 for (i = sf->nsf_numboots - 2; i >= 0; i--)
5019 sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
5020 } else {
5021 sf->nsf_numboots = 1;
5022 sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t),
5023 M_TEMP, M_WAITOK);
5024 }
5025 sf->nsf_bootvals[0] = NFSD_VNET(nfsrvboottime);
5026 sf->nsf_lease = nfsrv_lease;
5027 NFSVNO_ATTRINIT(&nva);
5028 NFSVNO_SETATTRVAL(&nva, size, 0);
5029 vp = NFSFPVNODE(sf->nsf_fp);
5030 vn_start_write(vp, &mp, V_WAIT);
5031 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5032 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
5033 NULL);
5034 NFSVOPUNLOCK(vp);
5035 } else
5036 error = EPERM;
5037 vn_finished_write(mp);
5038 if (!error)
5039 error = NFSD_RDWR(UIO_WRITE, vp,
5040 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
5041 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5042 if (!error)
5043 error = NFSD_RDWR(UIO_WRITE, vp,
5044 (caddr_t)sf->nsf_bootvals,
5045 sf->nsf_numboots * sizeof (time_t),
5046 (off_t)(sizeof (struct nfsf_rec)),
5047 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5048 free(sf->nsf_bootvals, M_TEMP);
5049 sf->nsf_bootvals = NULL;
5050 if (error) {
5051 sf->nsf_flags &= ~NFSNSF_OK;
5052 printf("EEK! Can't write NfsV4 stable storage file\n");
5053 return;
5054 }
5055 sf->nsf_flags |= NFSNSF_OK;
5056
5057 /*
5058 * Loop through the list and write out timestamp records for
5059 * any clients that successfully reclaimed state.
5060 */
5061 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
5062 if (sp->nst_flag & NFSNST_GOTSTATE) {
5063 nfsrv_writestable(sp->nst_client, sp->nst_len,
5064 NFSNST_NEWSTATE, p);
5065 sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
5066 }
5067 LIST_REMOVE(sp, nst_list);
5068 free(sp, M_TEMP);
5069 }
5070 nfsrv_backupstable();
5071 }
5072
5073 /*
5074 * Append a record to the stable storage file.
5075 */
5076 void
nfsrv_writestable(u_char * client,int len,int flag,NFSPROC_T * p)5077 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
5078 {
5079 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
5080 struct nfst_rec *sp;
5081 int error;
5082
5083 if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
5084 return;
5085 sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
5086 len - 1, M_TEMP, M_WAITOK);
5087 sp->len = len;
5088 NFSBCOPY(client, sp->client, len);
5089 sp->flag = flag;
5090 error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
5091 (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
5092 UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
5093 free(sp, M_TEMP);
5094 if (error) {
5095 sf->nsf_flags &= ~NFSNSF_OK;
5096 printf("EEK! Can't write NfsV4 stable storage file\n");
5097 }
5098 }
5099
5100 /*
5101 * This function is called during the grace period to mark a client
5102 * that successfully reclaimed state.
5103 */
5104 static void
nfsrv_markstable(struct nfsclient * clp)5105 nfsrv_markstable(struct nfsclient *clp)
5106 {
5107 struct nfsrv_stable *sp;
5108
5109 /*
5110 * First find the client structure.
5111 */
5112 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5113 if (sp->nst_len == clp->lc_idlen &&
5114 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5115 break;
5116 }
5117 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5118 return;
5119
5120 /*
5121 * Now, just mark it and set the nfsclient back pointer.
5122 */
5123 sp->nst_flag |= NFSNST_GOTSTATE;
5124 sp->nst_clp = clp;
5125 }
5126
5127 /*
5128 * This function is called when a NFSv4.1 client does a ReclaimComplete.
5129 * Very similar to nfsrv_markstable(), except for the flag being set.
5130 */
5131 static void
nfsrv_markreclaim(struct nfsclient * clp)5132 nfsrv_markreclaim(struct nfsclient *clp)
5133 {
5134 struct nfsrv_stable *sp;
5135
5136 /*
5137 * First find the client structure.
5138 */
5139 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5140 if (sp->nst_len == clp->lc_idlen &&
5141 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5142 break;
5143 }
5144 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5145 return;
5146
5147 /*
5148 * Now, just set the flag.
5149 */
5150 sp->nst_flag |= NFSNST_RECLAIMED;
5151
5152 /*
5153 * Free up any old delegations.
5154 */
5155 nfsrv_freedeleglist(&clp->lc_olddeleg);
5156 }
5157
5158 /*
5159 * This function is called for a reclaim, to see if it gets grace.
5160 * It returns 0 if a reclaim is allowed, 1 otherwise.
5161 */
5162 static int
nfsrv_checkstable(struct nfsclient * clp)5163 nfsrv_checkstable(struct nfsclient *clp)
5164 {
5165 struct nfsrv_stable *sp;
5166
5167 /*
5168 * First, find the entry for the client.
5169 */
5170 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5171 if (sp->nst_len == clp->lc_idlen &&
5172 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5173 break;
5174 }
5175
5176 /*
5177 * If not in the list, state was revoked or no state was issued
5178 * since the previous reboot, a reclaim is denied.
5179 */
5180 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head) ||
5181 (sp->nst_flag & NFSNST_REVOKE) ||
5182 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK))
5183 return (1);
5184 return (0);
5185 }
5186
5187 /*
5188 * Test for and try to clear out a conflicting client. This is called by
5189 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5190 * a found.
5191 * The trick here is that it can't revoke a conflicting client with an
5192 * expired lease unless it holds the v4root lock, so...
5193 * If no v4root lock, get the lock and return 1 to indicate "try again".
5194 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5195 * the revocation worked and the conflicting client is "bye, bye", so it
5196 * can be tried again.
5197 * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
5198 * Unlocks State before a non-zero value is returned.
5199 */
5200 static int
nfsrv_clientconflict(struct nfsclient * clp,int * haslockp,vnode_t vp,NFSPROC_T * p)5201 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5202 NFSPROC_T *p)
5203 {
5204 int gotlock, lktype = 0;
5205
5206 /*
5207 * If lease hasn't expired, we can't fix it.
5208 */
5209 if (clp->lc_expiry >= NFSD_MONOSEC ||
5210 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE))
5211 return (0);
5212 if (*haslockp == 0) {
5213 NFSUNLOCKSTATE();
5214 if (vp != NULL) {
5215 lktype = NFSVOPISLOCKED(vp);
5216 NFSVOPUNLOCK(vp);
5217 }
5218 NFSLOCKV4ROOTMUTEX();
5219 nfsv4_relref(&nfsv4rootfs_lock);
5220 do {
5221 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5222 NFSV4ROOTLOCKMUTEXPTR, NULL);
5223 } while (!gotlock);
5224 NFSUNLOCKV4ROOTMUTEX();
5225 *haslockp = 1;
5226 if (vp != NULL) {
5227 NFSVOPLOCK(vp, lktype | LK_RETRY);
5228 if (VN_IS_DOOMED(vp))
5229 return (2);
5230 }
5231 return (1);
5232 }
5233 NFSUNLOCKSTATE();
5234
5235 /*
5236 * Ok, we can expire the conflicting client.
5237 */
5238 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5239 nfsrv_backupstable();
5240 nfsrv_cleanclient(clp, p, false, NULL);
5241 nfsrv_freedeleglist(&clp->lc_deleg);
5242 nfsrv_freedeleglist(&clp->lc_olddeleg);
5243 LIST_REMOVE(clp, lc_hash);
5244 nfsrv_zapclient(clp, p);
5245 return (1);
5246 }
5247
5248 /*
5249 * Resolve a delegation conflict.
5250 * Returns 0 to indicate the conflict was resolved without sleeping.
5251 * Return -1 to indicate that the caller should check for conflicts again.
5252 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5253 *
5254 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5255 * for a return of 0, since there was no sleep and it could be required
5256 * later. It is released for a return of NFSERR_DELAY, since the caller
5257 * will return that error. It is released when a sleep was done waiting
5258 * for the delegation to be returned or expire (so that other nfsds can
5259 * handle ops). Then, it must be acquired for the write to stable storage.
5260 * (This function is somewhat similar to nfsrv_clientconflict(), but
5261 * the semantics differ in a couple of subtle ways. The return of 0
5262 * indicates the conflict was resolved without sleeping here, not
5263 * that the conflict can't be resolved and the handling of nfsv4root_lock
5264 * differs, as noted above.)
5265 * Unlocks State before returning a non-zero value.
5266 */
5267 static int
nfsrv_delegconflict(struct nfsstate * stp,int * haslockp,NFSPROC_T * p,vnode_t vp)5268 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5269 vnode_t vp)
5270 {
5271 struct nfsclient *clp = stp->ls_clp;
5272 int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5273 nfsv4stateid_t tstateid;
5274 fhandle_t tfh;
5275
5276 /*
5277 * If the conflict is with an old delegation...
5278 */
5279 if (stp->ls_flags & NFSLCK_OLDDELEG) {
5280 /*
5281 * You can delete it, if it has expired.
5282 */
5283 if (clp->lc_delegtime < NFSD_MONOSEC) {
5284 nfsrv_freedeleg(stp);
5285 NFSUNLOCKSTATE();
5286 error = -1;
5287 goto out;
5288 }
5289 NFSUNLOCKSTATE();
5290 /*
5291 * During this delay, the old delegation could expire or it
5292 * could be recovered by the client via an Open with
5293 * CLAIM_DELEGATE_PREV.
5294 * Release the nfsv4root_lock, if held.
5295 */
5296 if (*haslockp) {
5297 *haslockp = 0;
5298 NFSLOCKV4ROOTMUTEX();
5299 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5300 NFSUNLOCKV4ROOTMUTEX();
5301 }
5302 error = NFSERR_DELAY;
5303 goto out;
5304 }
5305
5306 /*
5307 * It's a current delegation, so:
5308 * - check to see if the delegation has expired
5309 * - if so, get the v4root lock and then expire it
5310 */
5311 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
5312 NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
5313 stp->ls_delegtime >= NFSD_MONOSEC)) {
5314 /*
5315 * - do a recall callback, since not yet done
5316 * For now, never allow truncate to be set. To use
5317 * truncate safely, it must be guaranteed that the
5318 * Remove, Rename or Setattr with size of 0 will
5319 * succeed and that would require major changes to
5320 * the VFS/Vnode OPs.
5321 * Set the expiry time large enough so that it won't expire
5322 * until after the callback, then set it correctly, once
5323 * the callback is done. (The delegation will now time
5324 * out whether or not the Recall worked ok. The timeout
5325 * will be extended when ops are done on the delegation
5326 * stateid, up to the timelimit.)
5327 */
5328 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
5329 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5330 NFSRV_LEASEDELTA;
5331 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
5332 nfsrv_lease) + NFSRV_LEASEDELTA;
5333 stp->ls_flags |= NFSLCK_DELEGRECALL;
5334 }
5335 stp->ls_lastrecall = time_uptime + 1;
5336
5337 /*
5338 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5339 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5340 * in order to try and avoid a race that could happen
5341 * when a CBRecall request passed the Open reply with
5342 * the delegation in it when transitting the network.
5343 * Since nfsrv_docallback will sleep, don't use stp after
5344 * the call.
5345 */
5346 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5347 sizeof (tstateid));
5348 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5349 sizeof (tfh));
5350 NFSUNLOCKSTATE();
5351 if (*haslockp) {
5352 *haslockp = 0;
5353 NFSLOCKV4ROOTMUTEX();
5354 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5355 NFSUNLOCKV4ROOTMUTEX();
5356 }
5357 retrycnt = 0;
5358 do {
5359 error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5360 &tstateid, 0, &tfh, NULL, NULL, 0, p);
5361 retrycnt++;
5362 } while ((error == NFSERR_BADSTATEID ||
5363 error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5364 error = NFSERR_DELAY;
5365 goto out;
5366 }
5367
5368 if (clp->lc_expiry >= NFSD_MONOSEC &&
5369 stp->ls_delegtime >= NFSD_MONOSEC) {
5370 NFSUNLOCKSTATE();
5371 /*
5372 * A recall has been done, but it has not yet expired.
5373 * So, RETURN_DELAY.
5374 */
5375 if (*haslockp) {
5376 *haslockp = 0;
5377 NFSLOCKV4ROOTMUTEX();
5378 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5379 NFSUNLOCKV4ROOTMUTEX();
5380 }
5381 error = NFSERR_DELAY;
5382 goto out;
5383 }
5384
5385 /*
5386 * If we don't yet have the lock, just get it and then return,
5387 * since we need that before deleting expired state, such as
5388 * this delegation.
5389 * When getting the lock, unlock the vnode, so other nfsds that
5390 * are in progress, won't get stuck waiting for the vnode lock.
5391 */
5392 if (*haslockp == 0) {
5393 NFSUNLOCKSTATE();
5394 if (vp != NULL) {
5395 lktype = NFSVOPISLOCKED(vp);
5396 NFSVOPUNLOCK(vp);
5397 }
5398 NFSLOCKV4ROOTMUTEX();
5399 nfsv4_relref(&nfsv4rootfs_lock);
5400 do {
5401 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5402 NFSV4ROOTLOCKMUTEXPTR, NULL);
5403 } while (!gotlock);
5404 NFSUNLOCKV4ROOTMUTEX();
5405 *haslockp = 1;
5406 if (vp != NULL) {
5407 NFSVOPLOCK(vp, lktype | LK_RETRY);
5408 if (VN_IS_DOOMED(vp)) {
5409 *haslockp = 0;
5410 NFSLOCKV4ROOTMUTEX();
5411 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5412 NFSUNLOCKV4ROOTMUTEX();
5413 error = NFSERR_PERM;
5414 goto out;
5415 }
5416 }
5417 error = -1;
5418 goto out;
5419 }
5420
5421 NFSUNLOCKSTATE();
5422 /*
5423 * Ok, we can delete the expired delegation.
5424 * First, write the Revoke record to stable storage and then
5425 * clear out the conflict.
5426 * Since all other nfsd threads are now blocked, we can safely
5427 * sleep without the state changing.
5428 */
5429 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5430 nfsrv_backupstable();
5431 if (clp->lc_expiry < NFSD_MONOSEC) {
5432 nfsrv_cleanclient(clp, p, false, NULL);
5433 nfsrv_freedeleglist(&clp->lc_deleg);
5434 nfsrv_freedeleglist(&clp->lc_olddeleg);
5435 LIST_REMOVE(clp, lc_hash);
5436 zapped_clp = 1;
5437 } else {
5438 nfsrv_freedeleg(stp);
5439 zapped_clp = 0;
5440 }
5441 if (zapped_clp)
5442 nfsrv_zapclient(clp, p);
5443 error = -1;
5444
5445 out:
5446 NFSEXITCODE(error);
5447 return (error);
5448 }
5449
5450 /*
5451 * Check for a remove allowed, if remove is set to 1 and get rid of
5452 * delegations.
5453 */
5454 int
nfsrv_checkremove(vnode_t vp,int remove,struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)5455 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
5456 nfsquad_t clientid, NFSPROC_T *p)
5457 {
5458 struct nfsclient *clp;
5459 struct nfsstate *stp;
5460 struct nfslockfile *lfp;
5461 int error, haslock = 0;
5462 fhandle_t nfh;
5463
5464 clp = NULL;
5465 /*
5466 * First, get the lock file structure.
5467 * (A return of -1 means no associated state, so remove ok.)
5468 */
5469 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5470 tryagain:
5471 NFSLOCKSTATE();
5472 if (error == 0 && clientid.qval != 0)
5473 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
5474 (nfsquad_t)((u_quad_t)0), 0, nd, p);
5475 if (!error)
5476 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5477 if (error) {
5478 NFSUNLOCKSTATE();
5479 if (haslock) {
5480 NFSLOCKV4ROOTMUTEX();
5481 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5482 NFSUNLOCKV4ROOTMUTEX();
5483 }
5484 if (error == -1)
5485 error = 0;
5486 goto out;
5487 }
5488
5489 /*
5490 * Now, we must Recall any delegations.
5491 */
5492 error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
5493 if (error) {
5494 /*
5495 * nfsrv_cleandeleg() unlocks state for non-zero
5496 * return.
5497 */
5498 if (error == -1)
5499 goto tryagain;
5500 if (haslock) {
5501 NFSLOCKV4ROOTMUTEX();
5502 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5503 NFSUNLOCKV4ROOTMUTEX();
5504 }
5505 goto out;
5506 }
5507
5508 /*
5509 * Now, look for a conflicting open share.
5510 */
5511 if (remove) {
5512 /*
5513 * If the entry in the directory was the last reference to the
5514 * corresponding filesystem object, the object can be destroyed
5515 * */
5516 if(lfp->lf_usecount>1)
5517 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5518 if (stp->ls_flags & NFSLCK_WRITEDENY) {
5519 error = NFSERR_FILEOPEN;
5520 break;
5521 }
5522 }
5523 }
5524
5525 NFSUNLOCKSTATE();
5526 if (haslock) {
5527 NFSLOCKV4ROOTMUTEX();
5528 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5529 NFSUNLOCKV4ROOTMUTEX();
5530 }
5531
5532 out:
5533 NFSEXITCODE(error);
5534 return (error);
5535 }
5536
5537 /*
5538 * Clear out all delegations for the file referred to by lfp.
5539 * May return NFSERR_DELAY, if there will be a delay waiting for
5540 * delegations to expire.
5541 * Returns -1 to indicate it slept while recalling a delegation.
5542 * This function has the side effect of deleting the nfslockfile structure,
5543 * if it no longer has associated state and didn't have to sleep.
5544 * Unlocks State before a non-zero value is returned.
5545 */
5546 static int
nfsrv_cleandeleg(vnode_t vp,struct nfslockfile * lfp,struct nfsclient * clp,int * haslockp,NFSPROC_T * p)5547 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5548 struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5549 {
5550 struct nfsstate *stp, *nstp;
5551 int ret = 0;
5552
5553 stp = LIST_FIRST(&lfp->lf_deleg);
5554 while (stp != LIST_END(&lfp->lf_deleg)) {
5555 nstp = LIST_NEXT(stp, ls_file);
5556 if (stp->ls_clp != clp) {
5557 ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5558 if (ret) {
5559 /*
5560 * nfsrv_delegconflict() unlocks state
5561 * when it returns non-zero.
5562 */
5563 goto out;
5564 }
5565 }
5566 stp = nstp;
5567 }
5568 out:
5569 NFSEXITCODE(ret);
5570 return (ret);
5571 }
5572
5573 /*
5574 * There are certain operations that, when being done outside of NFSv4,
5575 * require that any NFSv4 delegation for the file be recalled.
5576 * This function is to be called for those cases:
5577 * VOP_RENAME() - When a delegation is being recalled for any reason,
5578 * the client may have to do Opens against the server, using the file's
5579 * final component name. If the file has been renamed on the server,
5580 * that component name will be incorrect and the Open will fail.
5581 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5582 * been removed on the server, if there is a delegation issued to
5583 * that client for the file. I say "theoretically" since clients
5584 * normally do an Access Op before the Open and that Access Op will
5585 * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5586 * they will detect the file's removal in the same manner. (There is
5587 * one case where RFC3530 allows a client to do an Open without first
5588 * doing an Access Op, which is passage of a check against the ACE
5589 * returned with a Write delegation, but current practice is to ignore
5590 * the ACE and always do an Access Op.)
5591 * Since the functions can only be called with an unlocked vnode, this
5592 * can't be done at this time.
5593 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5594 * locks locally in the client, which are not visible to the server. To
5595 * deal with this, issuing of delegations for a vnode must be disabled
5596 * and all delegations for the vnode recalled. This is done via the
5597 * second function, using the VV_DISABLEDELEG vflag on the vnode.
5598 */
5599 void
nfsd_recalldelegation(vnode_t vp,NFSPROC_T * p)5600 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5601 {
5602 time_t starttime;
5603 int error;
5604
5605 /*
5606 * First, check to see if the server is currently running and it has
5607 * been called for a regular file when issuing delegations.
5608 */
5609 if (NFSD_VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG ||
5610 nfsrv_issuedelegs == 0)
5611 return;
5612
5613 KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5614 /*
5615 * First, get a reference on the nfsv4rootfs_lock so that an
5616 * exclusive lock cannot be acquired by another thread.
5617 */
5618 NFSLOCKV4ROOTMUTEX();
5619 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5620 NFSUNLOCKV4ROOTMUTEX();
5621
5622 /*
5623 * Now, call nfsrv_checkremove() in a loop while it returns
5624 * NFSERR_DELAY. Return upon any other error or when timed out.
5625 */
5626 starttime = NFSD_MONOSEC;
5627 do {
5628 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5629 error = nfsrv_checkremove(vp, 0, NULL,
5630 (nfsquad_t)((u_quad_t)0), p);
5631 NFSVOPUNLOCK(vp);
5632 } else
5633 error = EPERM;
5634 if (error == NFSERR_DELAY) {
5635 if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5636 break;
5637 /* Sleep for a short period of time */
5638 (void) nfs_catnap(PZERO, 0, "nfsremove");
5639 }
5640 } while (error == NFSERR_DELAY);
5641 NFSLOCKV4ROOTMUTEX();
5642 nfsv4_relref(&nfsv4rootfs_lock);
5643 NFSUNLOCKV4ROOTMUTEX();
5644 }
5645
5646 void
nfsd_disabledelegation(vnode_t vp,NFSPROC_T * p)5647 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5648 {
5649
5650 #ifdef VV_DISABLEDELEG
5651 /*
5652 * First, flag issuance of delegations disabled.
5653 */
5654 atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5655 #endif
5656
5657 /*
5658 * Then call nfsd_recalldelegation() to get rid of all extant
5659 * delegations.
5660 */
5661 nfsd_recalldelegation(vp, p);
5662 }
5663
5664 /*
5665 * Check for conflicting locks, etc. and then get rid of delegations.
5666 * (At one point I thought that I should get rid of delegations for any
5667 * Setattr, since it could potentially disallow the I/O op (read or write)
5668 * allowed by the delegation. However, Setattr Ops that aren't changing
5669 * the size get a stateid of all 0s, so you can't tell if it is a delegation
5670 * for the same client or a different one, so I decided to only get rid
5671 * of delegations for other clients when the size is being changed.)
5672 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5673 * as Write backs, even if there is no delegation, so it really isn't any
5674 * different?)
5675 */
5676 int
nfsrv_checksetattr(vnode_t vp,struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,struct nfsexstuff * exp,NFSPROC_T * p)5677 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5678 nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5679 struct nfsexstuff *exp, NFSPROC_T *p)
5680 {
5681 struct nfsstate st, *stp = &st;
5682 struct nfslock lo, *lop = &lo;
5683 int error = 0;
5684 nfsquad_t clientid;
5685
5686 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5687 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5688 lop->lo_first = nvap->na_size;
5689 } else {
5690 stp->ls_flags = 0;
5691 lop->lo_first = 0;
5692 }
5693 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5694 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5695 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5696 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5697 stp->ls_flags |= NFSLCK_SETATTR;
5698 if (stp->ls_flags == 0)
5699 goto out;
5700 lop->lo_end = NFS64BITSSET;
5701 lop->lo_flags = NFSLCK_WRITE;
5702 stp->ls_ownerlen = 0;
5703 stp->ls_op = NULL;
5704 stp->ls_uid = nd->nd_cred->cr_uid;
5705 stp->ls_stateid.seqid = stateidp->seqid;
5706 clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5707 clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5708 stp->ls_stateid.other[2] = stateidp->other[2];
5709 error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5710 stateidp, exp, nd, p);
5711
5712 out:
5713 NFSEXITCODE2(error, nd);
5714 return (error);
5715 }
5716
5717 /*
5718 * Check for a write delegation and do a CBGETATTR if there is one, updating
5719 * the attributes, as required.
5720 * Should I return an error if I can't get the attributes? (For now, I'll
5721 * just return ok.
5722 */
5723 int
nfsrv_checkgetattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSPROC_T * p)5724 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5725 struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
5726 {
5727 struct nfsstate *stp;
5728 struct nfslockfile *lfp;
5729 struct nfsclient *clp;
5730 struct nfsvattr nva;
5731 fhandle_t nfh;
5732 int error = 0;
5733 nfsattrbit_t cbbits;
5734 u_quad_t delegfilerev;
5735
5736 NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5737 if (!NFSNONZERO_ATTRBIT(&cbbits))
5738 goto out;
5739 if (nfsrv_writedelegcnt == 0)
5740 goto out;
5741
5742 /*
5743 * Get the lock file structure.
5744 * (A return of -1 means no associated state, so return ok.)
5745 */
5746 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5747 NFSLOCKSTATE();
5748 if (!error)
5749 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5750 if (error) {
5751 NFSUNLOCKSTATE();
5752 if (error == -1)
5753 error = 0;
5754 goto out;
5755 }
5756
5757 /*
5758 * Now, look for a write delegation.
5759 */
5760 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5761 if (stp->ls_flags & NFSLCK_DELEGWRITE)
5762 break;
5763 }
5764 if (stp == LIST_END(&lfp->lf_deleg)) {
5765 NFSUNLOCKSTATE();
5766 goto out;
5767 }
5768 clp = stp->ls_clp;
5769
5770 /* If the clientid is not confirmed, ignore the delegation. */
5771 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5772 NFSUNLOCKSTATE();
5773 goto out;
5774 }
5775
5776 delegfilerev = stp->ls_filerev;
5777 /*
5778 * If the Write delegation was issued as a part of this Compound RPC
5779 * or if we have an Implied Clientid (used in a previous Op in this
5780 * compound) and it is the client the delegation was issued to,
5781 * just return ok.
5782 * I also assume that it is from the same client iff the network
5783 * host IP address is the same as the callback address. (Not
5784 * exactly correct by the RFC, but avoids a lot of Getattr
5785 * callbacks.)
5786 */
5787 if (nd->nd_compref == stp->ls_compref ||
5788 ((nd->nd_flag & ND_IMPLIEDCLID) &&
5789 clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5790 nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5791 NFSUNLOCKSTATE();
5792 goto out;
5793 }
5794
5795 /*
5796 * We are now done with the delegation state structure,
5797 * so the statelock can be released and we can now tsleep().
5798 */
5799
5800 /*
5801 * Now, we must do the CB Getattr callback, to see if Change or Size
5802 * has changed.
5803 */
5804 if (clp->lc_expiry >= NFSD_MONOSEC) {
5805 NFSUNLOCKSTATE();
5806 NFSVNO_ATTRINIT(&nva);
5807 nva.na_filerev = NFS64BITSSET;
5808 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5809 0, &nfh, &nva, &cbbits, 0, p);
5810 if (!error) {
5811 if ((nva.na_filerev != NFS64BITSSET &&
5812 nva.na_filerev > delegfilerev) ||
5813 (NFSVNO_ISSETSIZE(&nva) &&
5814 nva.na_size != nvap->na_size)) {
5815 error = nfsvno_updfilerev(vp, nvap, nd, p);
5816 if (NFSVNO_ISSETSIZE(&nva))
5817 nvap->na_size = nva.na_size;
5818 }
5819 } else
5820 error = 0; /* Ignore callback errors for now. */
5821 } else {
5822 NFSUNLOCKSTATE();
5823 }
5824
5825 out:
5826 NFSEXITCODE2(error, nd);
5827 return (error);
5828 }
5829
5830 /*
5831 * This function looks for openowners that haven't had any opens for
5832 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5833 * is set.
5834 */
5835 void
nfsrv_throwawayopens(NFSPROC_T * p)5836 nfsrv_throwawayopens(NFSPROC_T *p)
5837 {
5838 struct nfsclient *clp, *nclp;
5839 struct nfsstate *stp, *nstp;
5840 int i;
5841
5842 NFSLOCKSTATE();
5843 NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS;
5844 /*
5845 * For each client...
5846 */
5847 for (i = 0; i < nfsrv_clienthashsize; i++) {
5848 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
5849 nclp) {
5850 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5851 if (LIST_EMPTY(&stp->ls_open) &&
5852 (stp->ls_noopens > NFSNOOPEN ||
5853 (nfsrv_openpluslock * 2) >
5854 nfsrv_v4statelimit))
5855 nfsrv_freeopenowner(stp, 0, p);
5856 }
5857 }
5858 }
5859 NFSUNLOCKSTATE();
5860 }
5861
5862 /*
5863 * This function checks to see if the credentials are the same.
5864 * The check for same credentials is needed for state management operations
5865 * for NFSv4.0 or NFSv4.1/4.2 when SP4_MACH_CRED is configured via
5866 * ExchangeID.
5867 * Returns 1 for not same, 0 otherwise.
5868 */
5869 static int
nfsrv_notsamecredname(int op,struct nfsrv_descript * nd,struct nfsclient * clp)5870 nfsrv_notsamecredname(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
5871 {
5872
5873 /* Check for the SP4_MACH_CRED case. */
5874 if (op != 0 && nfsrv_checkmachcred(op, nd, clp) != 0)
5875 return (1);
5876
5877 /* For NFSv4.1/4.2, SP4_NONE always allows this. */
5878 if ((nd->nd_flag & ND_NFSV41) != 0)
5879 return (0);
5880
5881 if (nd->nd_flag & ND_GSS) {
5882 if (!(clp->lc_flags & LCL_GSS))
5883 return (1);
5884 if (clp->lc_flags & LCL_NAME) {
5885 if (nd->nd_princlen != clp->lc_namelen ||
5886 NFSBCMP(nd->nd_principal, clp->lc_name,
5887 clp->lc_namelen))
5888 return (1);
5889 else
5890 return (0);
5891 }
5892 if (nd->nd_cred->cr_uid == clp->lc_uid)
5893 return (0);
5894 else
5895 return (1);
5896 } else if (clp->lc_flags & LCL_GSS)
5897 return (1);
5898 /*
5899 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5900 * in RFC3530, which talks about principals, but doesn't say anything
5901 * about uids for AUTH_SYS.)
5902 */
5903 if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5904 return (0);
5905 else
5906 return (1);
5907 }
5908
5909 /*
5910 * Calculate the lease expiry time.
5911 */
5912 static time_t
nfsrv_leaseexpiry(void)5913 nfsrv_leaseexpiry(void)
5914 {
5915
5916 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC)
5917 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5918 return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5919 }
5920
5921 /*
5922 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5923 */
5924 static void
nfsrv_delaydelegtimeout(struct nfsstate * stp)5925 nfsrv_delaydelegtimeout(struct nfsstate *stp)
5926 {
5927
5928 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5929 return;
5930
5931 if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5932 stp->ls_delegtime < stp->ls_delegtimelimit) {
5933 stp->ls_delegtime += nfsrv_lease;
5934 if (stp->ls_delegtime > stp->ls_delegtimelimit)
5935 stp->ls_delegtime = stp->ls_delegtimelimit;
5936 }
5937 }
5938
5939 /*
5940 * This function checks to see if there is any other state associated
5941 * with the openowner for this Open.
5942 * It returns 1 if there is no other state, 0 otherwise.
5943 */
5944 static int
nfsrv_nootherstate(struct nfsstate * stp)5945 nfsrv_nootherstate(struct nfsstate *stp)
5946 {
5947 struct nfsstate *tstp;
5948
5949 LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5950 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5951 return (0);
5952 }
5953 return (1);
5954 }
5955
5956 /*
5957 * Create a list of lock deltas (changes to local byte range locking
5958 * that can be rolled back using the list) and apply the changes via
5959 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5960 * the rollback or update function will be called after this.
5961 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5962 * call fails. If it returns an error, it will unlock the list.
5963 */
5964 static int
nfsrv_locallock(vnode_t vp,struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)5965 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5966 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5967 {
5968 struct nfslock *lop, *nlop;
5969 int error = 0;
5970
5971 /* Loop through the list of locks. */
5972 lop = LIST_FIRST(&lfp->lf_locallock);
5973 while (first < end && lop != NULL) {
5974 nlop = LIST_NEXT(lop, lo_lckowner);
5975 if (first >= lop->lo_end) {
5976 /* not there yet */
5977 lop = nlop;
5978 } else if (first < lop->lo_first) {
5979 /* new one starts before entry in list */
5980 if (end <= lop->lo_first) {
5981 /* no overlap between old and new */
5982 error = nfsrv_dolocal(vp, lfp, flags,
5983 NFSLCK_UNLOCK, first, end, cfp, p);
5984 if (error != 0)
5985 break;
5986 first = end;
5987 } else {
5988 /* handle fragment overlapped with new one */
5989 error = nfsrv_dolocal(vp, lfp, flags,
5990 NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5991 p);
5992 if (error != 0)
5993 break;
5994 first = lop->lo_first;
5995 }
5996 } else {
5997 /* new one overlaps this entry in list */
5998 if (end <= lop->lo_end) {
5999 /* overlaps all of new one */
6000 error = nfsrv_dolocal(vp, lfp, flags,
6001 lop->lo_flags, first, end, cfp, p);
6002 if (error != 0)
6003 break;
6004 first = end;
6005 } else {
6006 /* handle fragment overlapped with new one */
6007 error = nfsrv_dolocal(vp, lfp, flags,
6008 lop->lo_flags, first, lop->lo_end, cfp, p);
6009 if (error != 0)
6010 break;
6011 first = lop->lo_end;
6012 lop = nlop;
6013 }
6014 }
6015 }
6016 if (first < end && error == 0)
6017 /* handle fragment past end of list */
6018 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
6019 end, cfp, p);
6020
6021 NFSEXITCODE(error);
6022 return (error);
6023 }
6024
6025 /*
6026 * Local lock unlock. Unlock all byte ranges that are no longer locked
6027 * by NFSv4. To do this, unlock any subranges of first-->end that
6028 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
6029 * list. This list has all locks for the file held by other
6030 * <clientid, lockowner> tuples. The list is ordered by increasing
6031 * lo_first value, but may have entries that overlap each other, for
6032 * the case of read locks.
6033 */
6034 static void
nfsrv_localunlock(vnode_t vp,struct nfslockfile * lfp,uint64_t init_first,uint64_t init_end,NFSPROC_T * p)6035 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
6036 uint64_t init_end, NFSPROC_T *p)
6037 {
6038 struct nfslock *lop;
6039 uint64_t first, end, prevfirst __unused;
6040
6041 first = init_first;
6042 end = init_end;
6043 while (first < init_end) {
6044 /* Loop through all nfs locks, adjusting first and end */
6045 prevfirst = 0;
6046 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
6047 KASSERT(prevfirst <= lop->lo_first,
6048 ("nfsv4 locks out of order"));
6049 KASSERT(lop->lo_first < lop->lo_end,
6050 ("nfsv4 bogus lock"));
6051 prevfirst = lop->lo_first;
6052 if (first >= lop->lo_first &&
6053 first < lop->lo_end)
6054 /*
6055 * Overlaps with initial part, so trim
6056 * off that initial part by moving first past
6057 * it.
6058 */
6059 first = lop->lo_end;
6060 else if (end > lop->lo_first &&
6061 lop->lo_first > first) {
6062 /*
6063 * This lock defines the end of the
6064 * segment to unlock, so set end to the
6065 * start of it and break out of the loop.
6066 */
6067 end = lop->lo_first;
6068 break;
6069 }
6070 if (first >= end)
6071 /*
6072 * There is no segment left to do, so
6073 * break out of this loop and then exit
6074 * the outer while() since first will be set
6075 * to end, which must equal init_end here.
6076 */
6077 break;
6078 }
6079 if (first < end) {
6080 /* Unlock this segment */
6081 (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
6082 NFSLCK_READ, first, end, NULL, p);
6083 nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
6084 first, end);
6085 }
6086 /*
6087 * Now move past this segment and look for any further
6088 * segment in the range, if there is one.
6089 */
6090 first = end;
6091 end = init_end;
6092 }
6093 }
6094
6095 /*
6096 * Do the local lock operation and update the rollback list, as required.
6097 * Perform the rollback and return the error if nfsvno_advlock() fails.
6098 */
6099 static int
nfsrv_dolocal(vnode_t vp,struct nfslockfile * lfp,int flags,int oldflags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)6100 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
6101 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
6102 {
6103 struct nfsrollback *rlp;
6104 int error = 0, ltype, oldltype;
6105
6106 if (flags & NFSLCK_WRITE)
6107 ltype = F_WRLCK;
6108 else if (flags & NFSLCK_READ)
6109 ltype = F_RDLCK;
6110 else
6111 ltype = F_UNLCK;
6112 if (oldflags & NFSLCK_WRITE)
6113 oldltype = F_WRLCK;
6114 else if (oldflags & NFSLCK_READ)
6115 oldltype = F_RDLCK;
6116 else
6117 oldltype = F_UNLCK;
6118 if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
6119 /* nothing to do */
6120 goto out;
6121 error = nfsvno_advlock(vp, ltype, first, end, p);
6122 if (error != 0) {
6123 if (cfp != NULL) {
6124 cfp->cl_clientid.lval[0] = 0;
6125 cfp->cl_clientid.lval[1] = 0;
6126 cfp->cl_first = 0;
6127 cfp->cl_end = NFS64BITSSET;
6128 cfp->cl_flags = NFSLCK_WRITE;
6129 cfp->cl_ownerlen = 5;
6130 NFSBCOPY("LOCAL", cfp->cl_owner, 5);
6131 }
6132 nfsrv_locallock_rollback(vp, lfp, p);
6133 } else if (ltype != F_UNLCK) {
6134 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
6135 M_WAITOK);
6136 rlp->rlck_first = first;
6137 rlp->rlck_end = end;
6138 rlp->rlck_type = oldltype;
6139 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
6140 }
6141
6142 out:
6143 NFSEXITCODE(error);
6144 return (error);
6145 }
6146
6147 /*
6148 * Roll back local lock changes and free up the rollback list.
6149 */
6150 static void
nfsrv_locallock_rollback(vnode_t vp,struct nfslockfile * lfp,NFSPROC_T * p)6151 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
6152 {
6153 struct nfsrollback *rlp, *nrlp;
6154
6155 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
6156 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
6157 rlp->rlck_end, p);
6158 free(rlp, M_NFSDROLLBACK);
6159 }
6160 LIST_INIT(&lfp->lf_rollback);
6161 }
6162
6163 /*
6164 * Update local lock list and delete rollback list (ie now committed to the
6165 * local locks). Most of the work is done by the internal function.
6166 */
6167 static void
nfsrv_locallock_commit(struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end)6168 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
6169 uint64_t end)
6170 {
6171 struct nfsrollback *rlp, *nrlp;
6172 struct nfslock *new_lop, *other_lop;
6173
6174 new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
6175 if (flags & (NFSLCK_READ | NFSLCK_WRITE))
6176 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
6177 M_WAITOK);
6178 else
6179 other_lop = NULL;
6180 new_lop->lo_flags = flags;
6181 new_lop->lo_first = first;
6182 new_lop->lo_end = end;
6183 nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
6184 if (new_lop != NULL)
6185 free(new_lop, M_NFSDLOCK);
6186 if (other_lop != NULL)
6187 free(other_lop, M_NFSDLOCK);
6188
6189 /* and get rid of the rollback list */
6190 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
6191 free(rlp, M_NFSDROLLBACK);
6192 LIST_INIT(&lfp->lf_rollback);
6193 }
6194
6195 /*
6196 * Lock the struct nfslockfile for local lock updating.
6197 */
6198 static void
nfsrv_locklf(struct nfslockfile * lfp)6199 nfsrv_locklf(struct nfslockfile *lfp)
6200 {
6201 int gotlock;
6202
6203 /* lf_usecount ensures *lfp won't be free'd */
6204 lfp->lf_usecount++;
6205 do {
6206 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6207 NFSSTATEMUTEXPTR, NULL);
6208 } while (gotlock == 0);
6209 lfp->lf_usecount--;
6210 }
6211
6212 /*
6213 * Unlock the struct nfslockfile after local lock updating.
6214 */
6215 static void
nfsrv_unlocklf(struct nfslockfile * lfp)6216 nfsrv_unlocklf(struct nfslockfile *lfp)
6217 {
6218
6219 nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6220 }
6221
6222 /*
6223 * Clear out all state for the NFSv4 server.
6224 * Must be called by a thread that can sleep when no nfsds are running.
6225 */
6226 void
nfsrv_throwawayallstate(NFSPROC_T * p)6227 nfsrv_throwawayallstate(NFSPROC_T *p)
6228 {
6229 struct nfsclient *clp, *nclp;
6230 struct nfslockfile *lfp, *nlfp;
6231 int i;
6232
6233 /*
6234 * For each client, clean out the state and then free the structure.
6235 */
6236 for (i = 0; i < nfsrv_clienthashsize; i++) {
6237 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
6238 nclp) {
6239 nfsrv_cleanclient(clp, p, false, NULL);
6240 nfsrv_freedeleglist(&clp->lc_deleg);
6241 nfsrv_freedeleglist(&clp->lc_olddeleg);
6242 free(clp->lc_stateid, M_NFSDCLIENT);
6243 free(clp, M_NFSDCLIENT);
6244 }
6245 }
6246
6247 /*
6248 * Also, free up any remaining lock file structures.
6249 */
6250 for (i = 0; i < nfsrv_lockhashsize; i++) {
6251 LIST_FOREACH_SAFE(lfp, &NFSD_VNET(nfslockhash)[i], lf_hash,
6252 nlfp) {
6253 printf("nfsd unload: fnd a lock file struct\n");
6254 nfsrv_freenfslockfile(lfp);
6255 }
6256 }
6257
6258 /* And get rid of the deviceid structures and layouts. */
6259 nfsrv_freealllayoutsanddevids();
6260 }
6261
6262 /*
6263 * Check the sequence# for the session and slot provided as an argument.
6264 * Also, renew the lease if the session will return NFS_OK.
6265 */
6266 int
nfsrv_checksequence(struct nfsrv_descript * nd,uint32_t sequenceid,uint32_t * highest_slotidp,uint32_t * target_highest_slotidp,int cache_this,uint32_t * sflagsp,NFSPROC_T * p)6267 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6268 uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6269 uint32_t *sflagsp, NFSPROC_T *p)
6270 {
6271 struct nfsdsession *sep;
6272 struct nfssessionhash *shp;
6273 int error;
6274
6275 shp = NFSSESSIONHASH(nd->nd_sessionid);
6276 NFSLOCKSESSION(shp);
6277 sep = nfsrv_findsession(nd->nd_sessionid);
6278 if (sep == NULL) {
6279 NFSUNLOCKSESSION(shp);
6280 return (NFSERR_BADSESSION);
6281 }
6282 error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6283 sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6284 if (error != 0) {
6285 NFSUNLOCKSESSION(shp);
6286 return (error);
6287 }
6288 if (cache_this != 0)
6289 nd->nd_flag |= ND_SAVEREPLY;
6290 /* Renew the lease. */
6291 sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6292 nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6293 nd->nd_flag |= ND_IMPLIEDCLID;
6294
6295 /* Handle the SP4_MECH_CRED case for NFSv4.1/4.2. */
6296 if ((sep->sess_clp->lc_flags & LCL_MACHCRED) != 0 &&
6297 (nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
6298 nd->nd_princlen == sep->sess_clp->lc_namelen &&
6299 !NFSBCMP(sep->sess_clp->lc_name, nd->nd_principal,
6300 nd->nd_princlen)) {
6301 nd->nd_flag |= ND_MACHCRED;
6302 NFSSET_OPBIT(&nd->nd_allowops, &sep->sess_clp->lc_allowops);
6303 }
6304
6305 /* Save maximum request and reply sizes. */
6306 nd->nd_maxreq = sep->sess_maxreq;
6307 nd->nd_maxresp = sep->sess_maxresp;
6308
6309 *sflagsp = 0;
6310 if (sep->sess_clp->lc_req.nr_client == NULL ||
6311 (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
6312 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
6313 NFSUNLOCKSESSION(shp);
6314 if (error == NFSERR_EXPIRED) {
6315 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6316 error = 0;
6317 } else if (error == NFSERR_ADMINREVOKED) {
6318 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6319 error = 0;
6320 }
6321 *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6322 return (0);
6323 }
6324
6325 /*
6326 * Check/set reclaim complete for this session/clientid.
6327 */
6328 int
nfsrv_checkreclaimcomplete(struct nfsrv_descript * nd,int onefs)6329 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6330 {
6331 struct nfsdsession *sep;
6332 struct nfssessionhash *shp;
6333 int error = 0;
6334
6335 shp = NFSSESSIONHASH(nd->nd_sessionid);
6336 NFSLOCKSTATE();
6337 NFSLOCKSESSION(shp);
6338 sep = nfsrv_findsession(nd->nd_sessionid);
6339 if (sep == NULL) {
6340 NFSUNLOCKSESSION(shp);
6341 NFSUNLOCKSTATE();
6342 return (NFSERR_BADSESSION);
6343 }
6344
6345 if (onefs != 0)
6346 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6347 /* Check to see if reclaim complete has already happened. */
6348 else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6349 error = NFSERR_COMPLETEALREADY;
6350 else {
6351 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6352 nfsrv_markreclaim(sep->sess_clp);
6353 }
6354 NFSUNLOCKSESSION(shp);
6355 NFSUNLOCKSTATE();
6356 return (error);
6357 }
6358
6359 /*
6360 * Cache the reply in a session slot.
6361 */
6362 void
nfsrv_cache_session(struct nfsrv_descript * nd,struct mbuf ** m)6363 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6364 {
6365 struct nfsdsession *sep;
6366 struct nfssessionhash *shp;
6367 char *buf, *cp;
6368 #ifdef INET
6369 struct sockaddr_in *sin;
6370 #endif
6371 #ifdef INET6
6372 struct sockaddr_in6 *sin6;
6373 #endif
6374
6375 shp = NFSSESSIONHASH(nd->nd_sessionid);
6376 NFSLOCKSESSION(shp);
6377 sep = nfsrv_findsession(nd->nd_sessionid);
6378 if (sep == NULL) {
6379 NFSUNLOCKSESSION(shp);
6380 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags &
6381 NFSNSF_GRACEOVER) != 0) {
6382 buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6383 switch (nd->nd_nam->sa_family) {
6384 #ifdef INET
6385 case AF_INET:
6386 sin = (struct sockaddr_in *)nd->nd_nam;
6387 cp = inet_ntop(sin->sin_family,
6388 &sin->sin_addr.s_addr, buf,
6389 INET6_ADDRSTRLEN);
6390 break;
6391 #endif
6392 #ifdef INET6
6393 case AF_INET6:
6394 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6395 cp = inet_ntop(sin6->sin6_family,
6396 &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6397 break;
6398 #endif
6399 default:
6400 cp = NULL;
6401 }
6402 if (cp != NULL)
6403 printf("nfsrv_cache_session: no session "
6404 "IPaddr=%s, check NFS clients for unique "
6405 "/etc/hostid's\n", cp);
6406 else
6407 printf("nfsrv_cache_session: no session, "
6408 "check NFS clients for unique "
6409 "/etc/hostid's\n");
6410 free(buf, M_TEMP);
6411 }
6412 m_freem(*m);
6413 return;
6414 }
6415 nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6416 m);
6417 NFSUNLOCKSESSION(shp);
6418 }
6419
6420 /*
6421 * Search for a session that matches the sessionid.
6422 */
6423 static struct nfsdsession *
nfsrv_findsession(uint8_t * sessionid)6424 nfsrv_findsession(uint8_t *sessionid)
6425 {
6426 struct nfsdsession *sep;
6427 struct nfssessionhash *shp;
6428
6429 shp = NFSSESSIONHASH(sessionid);
6430 LIST_FOREACH(sep, &shp->list, sess_hash) {
6431 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6432 break;
6433 }
6434 return (sep);
6435 }
6436
6437 /*
6438 * Destroy a session.
6439 */
6440 int
nfsrv_destroysession(struct nfsrv_descript * nd,uint8_t * sessionid)6441 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6442 {
6443 int error, igotlock, samesess;
6444
6445 samesess = 0;
6446 if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6447 (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6448 samesess = 1;
6449 if ((nd->nd_flag & ND_LASTOP) == 0)
6450 return (NFSERR_BADSESSION);
6451 }
6452
6453 /* Lock out other nfsd threads */
6454 NFSLOCKV4ROOTMUTEX();
6455 nfsv4_relref(&nfsv4rootfs_lock);
6456 do {
6457 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6458 NFSV4ROOTLOCKMUTEXPTR, NULL);
6459 } while (igotlock == 0);
6460 NFSUNLOCKV4ROOTMUTEX();
6461
6462 error = nfsrv_freesession(nd, NULL, sessionid, false, NULL);
6463 if (error == 0 && samesess != 0)
6464 nd->nd_flag &= ~ND_HASSEQUENCE;
6465
6466 NFSLOCKV4ROOTMUTEX();
6467 nfsv4_unlock(&nfsv4rootfs_lock, 1);
6468 NFSUNLOCKV4ROOTMUTEX();
6469 return (error);
6470 }
6471
6472 /*
6473 * Bind a connection to a session.
6474 * For now, only certain variants are supported, since the current session
6475 * structure can only handle a single backchannel entry, which will be
6476 * applied to all connections if it is set.
6477 */
6478 int
nfsrv_bindconnsess(struct nfsrv_descript * nd,uint8_t * sessionid,int * foreaftp)6479 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6480 {
6481 struct nfssessionhash *shp;
6482 struct nfsdsession *sep;
6483 struct nfsclient *clp;
6484 SVCXPRT *savxprt;
6485 int error;
6486
6487 error = 0;
6488 savxprt = NULL;
6489 shp = NFSSESSIONHASH(sessionid);
6490 NFSLOCKSTATE();
6491 NFSLOCKSESSION(shp);
6492 sep = nfsrv_findsession(sessionid);
6493 if (sep != NULL) {
6494 clp = sep->sess_clp;
6495 error = nfsrv_checkmachcred(NFSV4OP_BINDCONNTOSESS, nd, clp);
6496 if (error != 0)
6497 goto out;
6498 if (*foreaftp == NFSCDFC4_BACK ||
6499 *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6500 *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6501 /* Try to set up a backchannel. */
6502 if (clp->lc_req.nr_client == NULL) {
6503 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6504 "backchannel\n");
6505 clp->lc_req.nr_client = (struct __rpc_client *)
6506 clnt_bck_create(nd->nd_xprt->xp_socket,
6507 sep->sess_cbprogram, NFSV4_CBVERS);
6508 }
6509 if (clp->lc_req.nr_client != NULL) {
6510 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6511 "backchannel\n");
6512 savxprt = sep->sess_cbsess.nfsess_xprt;
6513 SVC_ACQUIRE(nd->nd_xprt);
6514 CLNT_ACQUIRE(clp->lc_req.nr_client);
6515 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
6516 /* Disable idle timeout. */
6517 nd->nd_xprt->xp_idletimeout = 0;
6518 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6519 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6520 clp->lc_flags |= LCL_DONEBINDCONN |
6521 LCL_NEEDSCBNULL;
6522 clp->lc_flags &= ~LCL_CBDOWN;
6523 if (*foreaftp == NFSCDFS4_BACK)
6524 *foreaftp = NFSCDFS4_BACK;
6525 else
6526 *foreaftp = NFSCDFS4_BOTH;
6527 } else if (*foreaftp != NFSCDFC4_BACK) {
6528 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6529 "up backchannel\n");
6530 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6531 clp->lc_flags |= LCL_DONEBINDCONN;
6532 *foreaftp = NFSCDFS4_FORE;
6533 } else {
6534 error = NFSERR_NOTSUPP;
6535 printf("nfsrv_bindconnsess: Can't add "
6536 "backchannel\n");
6537 }
6538 } else {
6539 NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6540 clp->lc_flags |= LCL_DONEBINDCONN;
6541 *foreaftp = NFSCDFS4_FORE;
6542 }
6543 } else
6544 error = NFSERR_BADSESSION;
6545 out:
6546 NFSUNLOCKSESSION(shp);
6547 NFSUNLOCKSTATE();
6548 if (savxprt != NULL)
6549 SVC_RELEASE(savxprt);
6550 return (error);
6551 }
6552
6553 /*
6554 * Free up a session structure.
6555 */
6556 static int
nfsrv_freesession(struct nfsrv_descript * nd,struct nfsdsession * sep,uint8_t * sessionid,bool locked,SVCXPRT ** old_xprtp)6557 nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
6558 uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp)
6559 {
6560 struct nfssessionhash *shp;
6561 int i;
6562
6563 if (!locked)
6564 NFSLOCKSTATE();
6565 if (sep == NULL) {
6566 shp = NFSSESSIONHASH(sessionid);
6567 NFSLOCKSESSION(shp);
6568 sep = nfsrv_findsession(sessionid);
6569 } else {
6570 shp = NFSSESSIONHASH(sep->sess_sessionid);
6571 NFSLOCKSESSION(shp);
6572 }
6573 if (sep != NULL) {
6574 /* Check for the SP4_MACH_CRED case. */
6575 if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
6576 nd, sep->sess_clp) != 0) {
6577 NFSUNLOCKSESSION(shp);
6578 if (!locked)
6579 NFSUNLOCKSTATE();
6580 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
6581 }
6582
6583 sep->sess_refcnt--;
6584 if (sep->sess_refcnt > 0) {
6585 NFSUNLOCKSESSION(shp);
6586 if (!locked)
6587 NFSUNLOCKSTATE();
6588 return (NFSERR_BACKCHANBUSY);
6589 }
6590 LIST_REMOVE(sep, sess_hash);
6591 LIST_REMOVE(sep, sess_list);
6592 }
6593 NFSUNLOCKSESSION(shp);
6594 if (!locked)
6595 NFSUNLOCKSTATE();
6596 if (sep == NULL)
6597 return (NFSERR_BADSESSION);
6598 for (i = 0; i < NFSV4_SLOTS; i++)
6599 if (sep->sess_slots[i].nfssl_reply != NULL)
6600 m_freem(sep->sess_slots[i].nfssl_reply);
6601 if (!locked) {
6602 if (sep->sess_cbsess.nfsess_xprt != NULL)
6603 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6604 if (old_xprtp != NULL)
6605 *old_xprtp = NULL;
6606 } else if (old_xprtp != NULL)
6607 *old_xprtp = sep->sess_cbsess.nfsess_xprt;
6608 free(sep, M_NFSDSESSION);
6609 return (0);
6610 }
6611
6612 /*
6613 * Free a stateid.
6614 * RFC5661 says that it should fail when there are associated opens, locks
6615 * or delegations. Since stateids represent opens, I don't see how you can
6616 * free an open stateid (it will be free'd when closed), so this function
6617 * only works for lock stateids (freeing the lock_owner) or delegations.
6618 */
6619 int
nfsrv_freestateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6620 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6621 NFSPROC_T *p)
6622 {
6623 struct nfsclient *clp;
6624 struct nfsstate *stp;
6625 int error;
6626
6627 NFSLOCKSTATE();
6628 /*
6629 * Look up the stateid
6630 */
6631 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6632 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6633 if (error == 0) {
6634 /* First, check for a delegation. */
6635 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6636 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6637 NFSX_STATEIDOTHER))
6638 break;
6639 }
6640 if (stp != NULL) {
6641 nfsrv_freedeleg(stp);
6642 NFSUNLOCKSTATE();
6643 return (error);
6644 }
6645 }
6646 /* Not a delegation, try for a lock_owner. */
6647 if (error == 0)
6648 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6649 if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6650 NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6651 /* Not a lock_owner stateid. */
6652 error = NFSERR_LOCKSHELD;
6653 if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6654 error = NFSERR_LOCKSHELD;
6655 if (error == 0)
6656 nfsrv_freelockowner(stp, NULL, 0, p);
6657 NFSUNLOCKSTATE();
6658 return (error);
6659 }
6660
6661 /*
6662 * Test a stateid.
6663 */
6664 int
nfsrv_teststateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6665 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6666 NFSPROC_T *p)
6667 {
6668 struct nfsclient *clp;
6669 struct nfsstate *stp;
6670 int error;
6671
6672 NFSLOCKSTATE();
6673 /*
6674 * Look up the stateid
6675 */
6676 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6677 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6678 if (error == 0)
6679 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6680 if (error == 0 && stateidp->seqid != 0 &&
6681 SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6682 error = NFSERR_OLDSTATEID;
6683 NFSUNLOCKSTATE();
6684 return (error);
6685 }
6686
6687 /*
6688 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6689 */
6690 static int
nfsv4_setcbsequence(struct nfsrv_descript * nd,struct nfsclient * clp,int dont_replycache,struct nfsdsession ** sepp,int * slotposp)6691 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6692 int dont_replycache, struct nfsdsession **sepp, int *slotposp)
6693 {
6694 struct nfsdsession *sep;
6695 uint32_t *tl, slotseq = 0;
6696 int maxslot;
6697 uint8_t sessionid[NFSX_V4SESSIONID];
6698 int error;
6699
6700 error = nfsv4_getcbsession(clp, sepp);
6701 if (error != 0)
6702 return (error);
6703 sep = *sepp;
6704 nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
6705 &slotseq, sessionid, true);
6706 KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6707
6708 /* Build the Sequence arguments. */
6709 NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6710 bcopy(sessionid, tl, NFSX_V4SESSIONID);
6711 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6712 nd->nd_slotseq = tl;
6713 nd->nd_slotid = *slotposp;
6714 nd->nd_flag |= ND_HASSLOTID;
6715 *tl++ = txdr_unsigned(slotseq);
6716 *tl++ = txdr_unsigned(*slotposp);
6717 *tl++ = txdr_unsigned(maxslot);
6718 if (dont_replycache == 0)
6719 *tl++ = newnfs_true;
6720 else
6721 *tl++ = newnfs_false;
6722 *tl = 0; /* No referring call list, for now. */
6723 nd->nd_flag |= ND_HASSEQUENCE;
6724 return (0);
6725 }
6726
6727 /*
6728 * Get a session for the callback.
6729 */
6730 static int
nfsv4_getcbsession(struct nfsclient * clp,struct nfsdsession ** sepp)6731 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6732 {
6733 struct nfsdsession *sep;
6734
6735 NFSLOCKSTATE();
6736 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6737 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6738 break;
6739 }
6740 if (sep == NULL) {
6741 NFSUNLOCKSTATE();
6742 return (NFSERR_BADSESSION);
6743 }
6744 sep->sess_refcnt++;
6745 *sepp = sep;
6746 NFSUNLOCKSTATE();
6747 return (0);
6748 }
6749
6750 /*
6751 * Free up all backchannel xprts. This needs to be done when the nfsd threads
6752 * exit, since those transports will all be going away.
6753 * This is only called after all the nfsd threads are done performing RPCs,
6754 * so locking shouldn't be an issue.
6755 */
6756 void
nfsrv_freeallbackchannel_xprts(void)6757 nfsrv_freeallbackchannel_xprts(void)
6758 {
6759 struct nfsdsession *sep;
6760 struct nfsclient *clp;
6761 SVCXPRT *xprt;
6762 int i;
6763
6764 for (i = 0; i < nfsrv_clienthashsize; i++) {
6765 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
6766 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6767 xprt = sep->sess_cbsess.nfsess_xprt;
6768 sep->sess_cbsess.nfsess_xprt = NULL;
6769 if (xprt != NULL)
6770 SVC_RELEASE(xprt);
6771 }
6772 }
6773 }
6774 }
6775
6776 /*
6777 * Do a layout commit. Actually just call nfsrv_updatemdsattr().
6778 * I have no idea if the rest of these arguments will ever be useful?
6779 */
6780 int
nfsrv_layoutcommit(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int hasnewoff,uint64_t newoff,uint64_t offset,uint64_t len,int hasnewmtime,struct timespec * newmtimep,int reclaim,nfsv4stateid_t * stateidp,int maxcnt,char * layp,int * hasnewsizep,uint64_t * newsizep,struct ucred * cred,NFSPROC_T * p)6781 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
6782 int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
6783 int hasnewmtime, struct timespec *newmtimep, int reclaim,
6784 nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
6785 uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
6786 {
6787 struct nfsvattr na;
6788 int error;
6789
6790 error = nfsrv_updatemdsattr(vp, &na, p);
6791 if (error == 0) {
6792 *hasnewsizep = 1;
6793 *newsizep = na.na_size;
6794 }
6795 return (error);
6796 }
6797
6798 /*
6799 * Try and get a layout.
6800 */
6801 int
nfsrv_layoutget(struct nfsrv_descript * nd,vnode_t vp,struct nfsexstuff * exp,int layouttype,int * iomode,uint64_t * offset,uint64_t * len,uint64_t minlen,nfsv4stateid_t * stateidp,int maxcnt,int * retonclose,int * layoutlenp,char * layp,struct ucred * cred,NFSPROC_T * p)6802 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
6803 int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
6804 uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
6805 int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
6806 {
6807 struct nfslayouthash *lhyp;
6808 struct nfslayout *lyp;
6809 char *devid;
6810 fhandle_t fh, *dsfhp;
6811 int error, mirrorcnt;
6812
6813 if (nfsrv_devidcnt == 0)
6814 return (NFSERR_UNKNLAYOUTTYPE);
6815
6816 if (*offset != 0)
6817 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
6818 (uintmax_t)*len);
6819 error = nfsvno_getfh(vp, &fh, p);
6820 NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
6821 if (error != 0)
6822 return (error);
6823
6824 /*
6825 * For now, all layouts are for entire files.
6826 * Only issue Read/Write layouts if requested for a non-readonly fs.
6827 */
6828 if (NFSVNO_EXRDONLY(exp)) {
6829 if (*iomode == NFSLAYOUTIOMODE_RW)
6830 return (NFSERR_LAYOUTTRYLATER);
6831 *iomode = NFSLAYOUTIOMODE_READ;
6832 }
6833 if (*iomode != NFSLAYOUTIOMODE_RW)
6834 *iomode = NFSLAYOUTIOMODE_READ;
6835
6836 /*
6837 * Check to see if a write layout can be issued for this file.
6838 * This is used during mirror recovery to avoid RW layouts being
6839 * issued for a file while it is being copied to the recovered
6840 * mirror.
6841 */
6842 if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
6843 return (NFSERR_LAYOUTTRYLATER);
6844
6845 *retonclose = 0;
6846 *offset = 0;
6847 *len = UINT64_MAX;
6848
6849 /* First, see if a layout already exists and return if found. */
6850 lhyp = NFSLAYOUTHASH(&fh);
6851 NFSLOCKLAYOUT(lhyp);
6852 error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
6853 NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
6854 /*
6855 * Not sure if the seqid must be the same, so I won't check it.
6856 */
6857 if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
6858 stateidp->other[1] != lyp->lay_stateid.other[1] ||
6859 stateidp->other[2] != lyp->lay_stateid.other[2])) {
6860 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
6861 NFSUNLOCKLAYOUT(lhyp);
6862 NFSD_DEBUG(1, "ret bad stateid\n");
6863 return (NFSERR_BADSTATEID);
6864 }
6865 /*
6866 * I believe we get here because there is a race between
6867 * the client processing the CBLAYOUTRECALL and the layout
6868 * being deleted here on the server.
6869 * The client has now done a LayoutGet with a non-layout
6870 * stateid, as it would when there is no layout.
6871 * As such, free this layout and set error == NFSERR_BADSTATEID
6872 * so the code below will create a new layout structure as
6873 * would happen if no layout was found.
6874 * "lyp" will be set before being used below, but set it NULL
6875 * as a safety belt.
6876 */
6877 nfsrv_freelayout(&lhyp->list, lyp);
6878 lyp = NULL;
6879 error = NFSERR_BADSTATEID;
6880 }
6881 if (error == 0) {
6882 if (lyp->lay_layoutlen > maxcnt) {
6883 NFSUNLOCKLAYOUT(lhyp);
6884 NFSD_DEBUG(1, "ret layout too small\n");
6885 return (NFSERR_TOOSMALL);
6886 }
6887 if (*iomode == NFSLAYOUTIOMODE_RW) {
6888 if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
6889 NFSUNLOCKLAYOUT(lhyp);
6890 NFSD_DEBUG(1, "ret layout nospace\n");
6891 return (NFSERR_NOSPC);
6892 }
6893 lyp->lay_flags |= NFSLAY_RW;
6894 } else
6895 lyp->lay_flags |= NFSLAY_READ;
6896 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
6897 *layoutlenp = lyp->lay_layoutlen;
6898 if (++lyp->lay_stateid.seqid == 0)
6899 lyp->lay_stateid.seqid = 1;
6900 stateidp->seqid = lyp->lay_stateid.seqid;
6901 NFSUNLOCKLAYOUT(lhyp);
6902 NFSD_DEBUG(4, "ret fnd layout\n");
6903 return (0);
6904 }
6905 NFSUNLOCKLAYOUT(lhyp);
6906
6907 /* Find the device id and file handle. */
6908 dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6909 devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6910 error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
6911 NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
6912 if (error == 0) {
6913 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
6914 if (NFSX_V4FILELAYOUT > maxcnt)
6915 error = NFSERR_TOOSMALL;
6916 else
6917 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
6918 devid, vp->v_mount->mnt_stat.f_fsid);
6919 } else {
6920 if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
6921 error = NFSERR_TOOSMALL;
6922 else
6923 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
6924 &fh, dsfhp, devid,
6925 vp->v_mount->mnt_stat.f_fsid);
6926 }
6927 }
6928 free(dsfhp, M_TEMP);
6929 free(devid, M_TEMP);
6930 if (error != 0)
6931 return (error);
6932
6933 /*
6934 * Now, add this layout to the list.
6935 */
6936 error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
6937 NFSD_DEBUG(4, "layoutget addl=%d\n", error);
6938 /*
6939 * The lyp will be set to NULL by nfsrv_addlayout() if it
6940 * linked the new structure into the lists.
6941 */
6942 free(lyp, M_NFSDSTATE);
6943 return (error);
6944 }
6945
6946 /*
6947 * Generate a File Layout.
6948 */
6949 static struct nfslayout *
nfsrv_filelayout(struct nfsrv_descript * nd,int iomode,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6950 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
6951 fhandle_t *dsfhp, char *devid, fsid_t fs)
6952 {
6953 uint32_t *tl;
6954 struct nfslayout *lyp;
6955 uint64_t pattern_offset;
6956
6957 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
6958 M_WAITOK | M_ZERO);
6959 lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
6960 if (iomode == NFSLAYOUTIOMODE_RW)
6961 lyp->lay_flags = NFSLAY_RW;
6962 else
6963 lyp->lay_flags = NFSLAY_READ;
6964 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6965 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6966 lyp->lay_fsid = fs;
6967 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6968
6969 /* Fill in the xdr for the files layout. */
6970 tl = (uint32_t *)lyp->lay_xdr;
6971 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6972 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6973
6974 /* Set the stripe size to the maximum I/O size. */
6975 *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
6976 *tl++ = 0; /* 1st stripe index. */
6977 pattern_offset = 0;
6978 txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
6979 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6980 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6981 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6982 lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
6983 return (lyp);
6984 }
6985
6986 #define FLEX_OWNERID "999"
6987 #define FLEX_UID0 "0"
6988 /*
6989 * Generate a Flex File Layout.
6990 * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
6991 * string goes on the wire, it isn't supposed to be used by the client,
6992 * since this server uses tight coupling.
6993 * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
6994 * a string of "0". This works around the Linux Flex File Layout driver bug
6995 * which uses the synthetic uid/gid strings for the "tightly coupled" case.
6996 */
6997 static struct nfslayout *
nfsrv_flexlayout(struct nfsrv_descript * nd,int iomode,int mirrorcnt,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6998 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
6999 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
7000 {
7001 uint32_t *tl;
7002 struct nfslayout *lyp;
7003 uint64_t lenval;
7004 int i;
7005
7006 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
7007 M_NFSDSTATE, M_WAITOK | M_ZERO);
7008 lyp->lay_type = NFSLAYOUT_FLEXFILE;
7009 if (iomode == NFSLAYOUTIOMODE_RW)
7010 lyp->lay_flags = NFSLAY_RW;
7011 else
7012 lyp->lay_flags = NFSLAY_READ;
7013 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
7014 lyp->lay_clientid.qval = nd->nd_clientid.qval;
7015 lyp->lay_fsid = fs;
7016 lyp->lay_mirrorcnt = mirrorcnt;
7017 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
7018
7019 /* Fill in the xdr for the files layout. */
7020 tl = (uint32_t *)lyp->lay_xdr;
7021 lenval = 0;
7022 txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */
7023 *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */
7024 for (i = 0; i < mirrorcnt; i++) {
7025 *tl++ = txdr_unsigned(1); /* One stripe. */
7026 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
7027 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7028 devid += NFSX_V4DEVICEID;
7029 *tl++ = txdr_unsigned(1); /* Efficiency. */
7030 *tl++ = 0; /* Proxy Stateid. */
7031 *tl++ = 0x55555555;
7032 *tl++ = 0x55555555;
7033 *tl++ = 0x55555555;
7034 *tl++ = txdr_unsigned(1); /* 1 file handle. */
7035 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
7036 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
7037 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
7038 dsfhp++;
7039 if (nfsrv_flexlinuxhack != 0) {
7040 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
7041 *tl = 0; /* 0 pad string. */
7042 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7043 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
7044 *tl = 0; /* 0 pad string. */
7045 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7046 } else {
7047 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7048 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7049 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7050 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7051 }
7052 }
7053 *tl++ = txdr_unsigned(0); /* ff_flags. */
7054 *tl = txdr_unsigned(60); /* Status interval hint. */
7055 lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
7056 return (lyp);
7057 }
7058
7059 /*
7060 * Parse and process Flex File errors returned via LayoutReturn.
7061 */
7062 static void
nfsrv_flexlayouterr(struct nfsrv_descript * nd,uint32_t * layp,int maxcnt,NFSPROC_T * p)7063 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
7064 NFSPROC_T *p)
7065 {
7066 uint32_t *tl;
7067 int cnt, errcnt, i, j, opnum, stat;
7068 char devid[NFSX_V4DEVICEID];
7069
7070 tl = layp;
7071 maxcnt -= NFSX_UNSIGNED;
7072 if (maxcnt > 0)
7073 cnt = fxdr_unsigned(int, *tl++);
7074 else
7075 cnt = 0;
7076 NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
7077 for (i = 0; i < cnt; i++) {
7078 maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
7079 NFSX_UNSIGNED;
7080 if (maxcnt <= 0)
7081 break;
7082 /* Skip offset, length and stateid for now. */
7083 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
7084 errcnt = fxdr_unsigned(int, *tl++);
7085 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
7086 for (j = 0; j < errcnt; j++) {
7087 maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
7088 if (maxcnt < 0)
7089 break;
7090 NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
7091 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7092 stat = fxdr_unsigned(int, *tl++);
7093 opnum = fxdr_unsigned(int, *tl++);
7094 NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
7095 stat);
7096 /*
7097 * Except for NFSERR_ACCES, NFSERR_STALE and
7098 * NFSERR_NOSPC errors, disable the mirror.
7099 */
7100 if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
7101 stat != NFSERR_NOSPC)
7102 nfsrv_delds(devid, p);
7103
7104 /* For NFSERR_NOSPC, mark all devids and layouts. */
7105 if (stat == NFSERR_NOSPC)
7106 nfsrv_marknospc(devid, true);
7107 }
7108 }
7109 }
7110
7111 /*
7112 * This function removes all flex file layouts which has a mirror with
7113 * a device id that matches the argument.
7114 * Called when the DS represented by the device id has failed.
7115 */
7116 void
nfsrv_flexmirrordel(char * devid,NFSPROC_T * p)7117 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
7118 {
7119 uint32_t *tl;
7120 struct nfslayout *lyp, *nlyp;
7121 struct nfslayouthash *lhyp;
7122 struct nfslayouthead loclyp;
7123 int i, j;
7124
7125 NFSD_DEBUG(4, "flexmirrordel\n");
7126 /* Move all layouts found onto a local list. */
7127 TAILQ_INIT(&loclyp);
7128 for (i = 0; i < nfsrv_layouthashsize; i++) {
7129 lhyp = &nfslayouthash[i];
7130 NFSLOCKLAYOUT(lhyp);
7131 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7132 if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
7133 lyp->lay_mirrorcnt > 1) {
7134 NFSD_DEBUG(4, "possible match\n");
7135 tl = lyp->lay_xdr;
7136 tl += 3;
7137 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
7138 tl++;
7139 if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
7140 == 0) {
7141 /* Found one. */
7142 NFSD_DEBUG(4, "fnd one\n");
7143 TAILQ_REMOVE(&lhyp->list, lyp,
7144 lay_list);
7145 TAILQ_INSERT_HEAD(&loclyp, lyp,
7146 lay_list);
7147 break;
7148 }
7149 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
7150 NFSM_RNDUP(NFSX_V4PNFSFH) /
7151 NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
7152 }
7153 }
7154 }
7155 NFSUNLOCKLAYOUT(lhyp);
7156 }
7157
7158 /* Now, try to do a Layout recall for each one found. */
7159 TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
7160 NFSD_DEBUG(4, "do layout recall\n");
7161 /*
7162 * The layout stateid.seqid needs to be incremented
7163 * before doing a LAYOUT_RECALL callback.
7164 */
7165 if (++lyp->lay_stateid.seqid == 0)
7166 lyp->lay_stateid.seqid = 1;
7167 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
7168 &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
7169 nfsrv_freelayout(&loclyp, lyp);
7170 }
7171 }
7172
7173 /*
7174 * Do a recall callback to the client for this layout.
7175 */
7176 static int
nfsrv_recalllayout(nfsquad_t clid,nfsv4stateid_t * stateidp,fhandle_t * fhp,struct nfslayout * lyp,int changed,int laytype,NFSPROC_T * p)7177 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
7178 struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
7179 {
7180 struct nfsclient *clp;
7181 int error;
7182
7183 NFSD_DEBUG(4, "nfsrv_recalllayout\n");
7184 error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
7185 0, NULL, p);
7186 NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
7187 if (error != 0) {
7188 printf("nfsrv_recalllayout: getclient err=%d\n", error);
7189 return (error);
7190 }
7191 if ((clp->lc_flags & LCL_NFSV41) != 0) {
7192 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
7193 stateidp, changed, fhp, NULL, NULL, laytype, p);
7194 /* If lyp != NULL, handle an error return here. */
7195 if (error != 0 && lyp != NULL) {
7196 NFSDRECALLLOCK();
7197 /*
7198 * Mark it returned, since no layout recall
7199 * has been done.
7200 * All errors seem to be non-recoverable, although
7201 * NFSERR_NOMATCHLAYOUT is a normal event.
7202 */
7203 if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
7204 lyp->lay_flags |= NFSLAY_RETURNED;
7205 wakeup(lyp);
7206 }
7207 NFSDRECALLUNLOCK();
7208 if (error != NFSERR_NOMATCHLAYOUT)
7209 printf("nfsrv_recalllayout: err=%d\n", error);
7210 }
7211 } else
7212 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
7213 return (error);
7214 }
7215
7216 /*
7217 * Find a layout to recall when we exceed our high water mark.
7218 */
7219 void
nfsrv_recalloldlayout(NFSPROC_T * p)7220 nfsrv_recalloldlayout(NFSPROC_T *p)
7221 {
7222 struct nfslayouthash *lhyp;
7223 struct nfslayout *lyp;
7224 nfsquad_t clientid;
7225 nfsv4stateid_t stateid;
7226 fhandle_t fh;
7227 int error, laytype = 0, ret;
7228
7229 lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
7230 NFSLOCKLAYOUT(lhyp);
7231 TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
7232 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
7233 lyp->lay_flags |= NFSLAY_CALLB;
7234 /*
7235 * The layout stateid.seqid needs to be incremented
7236 * before doing a LAYOUT_RECALL callback.
7237 */
7238 if (++lyp->lay_stateid.seqid == 0)
7239 lyp->lay_stateid.seqid = 1;
7240 clientid = lyp->lay_clientid;
7241 stateid = lyp->lay_stateid;
7242 NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
7243 laytype = lyp->lay_type;
7244 break;
7245 }
7246 }
7247 NFSUNLOCKLAYOUT(lhyp);
7248 if (lyp != NULL) {
7249 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
7250 laytype, p);
7251 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
7252 NFSD_DEBUG(4, "recallold=%d\n", error);
7253 if (error != 0) {
7254 NFSLOCKLAYOUT(lhyp);
7255 /*
7256 * Since the hash list was unlocked, we need to
7257 * find it again.
7258 */
7259 ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
7260 &lyp);
7261 if (ret == 0 &&
7262 (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
7263 lyp->lay_stateid.other[0] == stateid.other[0] &&
7264 lyp->lay_stateid.other[1] == stateid.other[1] &&
7265 lyp->lay_stateid.other[2] == stateid.other[2]) {
7266 /*
7267 * The client no longer knows this layout, so
7268 * it can be free'd now.
7269 */
7270 if (error == NFSERR_NOMATCHLAYOUT)
7271 nfsrv_freelayout(&lhyp->list, lyp);
7272 else {
7273 /*
7274 * Leave it to be tried later by
7275 * clearing NFSLAY_CALLB and moving
7276 * it to the head of the list, so it
7277 * won't be tried again for a while.
7278 */
7279 lyp->lay_flags &= ~NFSLAY_CALLB;
7280 TAILQ_REMOVE(&lhyp->list, lyp,
7281 lay_list);
7282 TAILQ_INSERT_HEAD(&lhyp->list, lyp,
7283 lay_list);
7284 }
7285 }
7286 NFSUNLOCKLAYOUT(lhyp);
7287 }
7288 }
7289 }
7290
7291 /*
7292 * Try and return layout(s).
7293 */
7294 int
nfsrv_layoutreturn(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int iomode,uint64_t offset,uint64_t len,int reclaim,int kind,nfsv4stateid_t * stateidp,int maxcnt,uint32_t * layp,int * fndp,struct ucred * cred,NFSPROC_T * p)7295 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
7296 int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
7297 int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
7298 struct ucred *cred, NFSPROC_T *p)
7299 {
7300 struct nfsvattr na;
7301 struct nfslayouthash *lhyp;
7302 struct nfslayout *lyp;
7303 fhandle_t fh;
7304 int error = 0;
7305
7306 *fndp = 0;
7307 if (kind == NFSV4LAYOUTRET_FILE) {
7308 error = nfsvno_getfh(vp, &fh, p);
7309 if (error == 0) {
7310 error = nfsrv_updatemdsattr(vp, &na, p);
7311 if (error != 0)
7312 printf("nfsrv_layoutreturn: updatemdsattr"
7313 " failed=%d\n", error);
7314 }
7315 if (error == 0) {
7316 if (reclaim == newnfs_true) {
7317 error = nfsrv_checkgrace(NULL, NULL,
7318 NFSLCK_RECLAIM);
7319 if (error != NFSERR_NOGRACE)
7320 error = 0;
7321 return (error);
7322 }
7323 lhyp = NFSLAYOUTHASH(&fh);
7324 NFSDRECALLLOCK();
7325 NFSLOCKLAYOUT(lhyp);
7326 error = nfsrv_findlayout(&nd->nd_clientid, &fh,
7327 layouttype, p, &lyp);
7328 NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
7329 if (error == 0 &&
7330 stateidp->other[0] == lyp->lay_stateid.other[0] &&
7331 stateidp->other[1] == lyp->lay_stateid.other[1] &&
7332 stateidp->other[2] == lyp->lay_stateid.other[2]) {
7333 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
7334 " %x %x %x laystateid %d %x %x %x"
7335 " off=%ju len=%ju flgs=0x%x\n",
7336 stateidp->seqid, stateidp->other[0],
7337 stateidp->other[1], stateidp->other[2],
7338 lyp->lay_stateid.seqid,
7339 lyp->lay_stateid.other[0],
7340 lyp->lay_stateid.other[1],
7341 lyp->lay_stateid.other[2],
7342 (uintmax_t)offset, (uintmax_t)len,
7343 lyp->lay_flags);
7344 if (++lyp->lay_stateid.seqid == 0)
7345 lyp->lay_stateid.seqid = 1;
7346 stateidp->seqid = lyp->lay_stateid.seqid;
7347 if (offset == 0 && len == UINT64_MAX) {
7348 if ((iomode & NFSLAYOUTIOMODE_READ) !=
7349 0)
7350 lyp->lay_flags &= ~NFSLAY_READ;
7351 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7352 lyp->lay_flags &= ~NFSLAY_RW;
7353 if ((lyp->lay_flags & (NFSLAY_READ |
7354 NFSLAY_RW)) == 0)
7355 nfsrv_freelayout(&lhyp->list,
7356 lyp);
7357 else
7358 *fndp = 1;
7359 } else
7360 *fndp = 1;
7361 }
7362 NFSUNLOCKLAYOUT(lhyp);
7363 /* Search the nfsrv_recalllist for a match. */
7364 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
7365 if (NFSBCMP(&lyp->lay_fh, &fh,
7366 sizeof(fh)) == 0 &&
7367 lyp->lay_clientid.qval ==
7368 nd->nd_clientid.qval &&
7369 stateidp->other[0] ==
7370 lyp->lay_stateid.other[0] &&
7371 stateidp->other[1] ==
7372 lyp->lay_stateid.other[1] &&
7373 stateidp->other[2] ==
7374 lyp->lay_stateid.other[2]) {
7375 lyp->lay_flags |= NFSLAY_RETURNED;
7376 wakeup(lyp);
7377 error = 0;
7378 }
7379 }
7380 NFSDRECALLUNLOCK();
7381 }
7382 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
7383 nfsrv_flexlayouterr(nd, layp, maxcnt, p);
7384 } else if (kind == NFSV4LAYOUTRET_FSID)
7385 nfsrv_freelayouts(&nd->nd_clientid,
7386 &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
7387 else if (kind == NFSV4LAYOUTRET_ALL)
7388 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
7389 else
7390 error = NFSERR_INVAL;
7391 if (error == -1)
7392 error = 0;
7393 return (error);
7394 }
7395
7396 /*
7397 * Look for an existing layout.
7398 */
7399 static int
nfsrv_findlayout(nfsquad_t * clientidp,fhandle_t * fhp,int laytype,NFSPROC_T * p,struct nfslayout ** lypp)7400 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
7401 NFSPROC_T *p, struct nfslayout **lypp)
7402 {
7403 struct nfslayouthash *lhyp;
7404 struct nfslayout *lyp;
7405 int ret;
7406
7407 *lypp = NULL;
7408 ret = 0;
7409 lhyp = NFSLAYOUTHASH(fhp);
7410 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
7411 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7412 lyp->lay_clientid.qval == clientidp->qval &&
7413 lyp->lay_type == laytype)
7414 break;
7415 }
7416 if (lyp != NULL)
7417 *lypp = lyp;
7418 else
7419 ret = -1;
7420 return (ret);
7421 }
7422
7423 /*
7424 * Add the new layout, as required.
7425 */
7426 static int
nfsrv_addlayout(struct nfsrv_descript * nd,struct nfslayout ** lypp,nfsv4stateid_t * stateidp,char * layp,int * layoutlenp,NFSPROC_T * p)7427 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
7428 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
7429 {
7430 struct nfsclient *clp;
7431 struct nfslayouthash *lhyp;
7432 struct nfslayout *lyp, *nlyp;
7433 fhandle_t *fhp;
7434 int error;
7435
7436 KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
7437 ("nfsrv_layoutget: no nd_clientid\n"));
7438 lyp = *lypp;
7439 fhp = &lyp->lay_fh;
7440 NFSLOCKSTATE();
7441 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
7442 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
7443 if (error != 0) {
7444 NFSUNLOCKSTATE();
7445 return (error);
7446 }
7447 lyp->lay_stateid.seqid = stateidp->seqid = 1;
7448 lyp->lay_stateid.other[0] = stateidp->other[0] =
7449 clp->lc_clientid.lval[0];
7450 lyp->lay_stateid.other[1] = stateidp->other[1] =
7451 clp->lc_clientid.lval[1];
7452 lyp->lay_stateid.other[2] = stateidp->other[2] =
7453 nfsrv_nextstateindex(clp);
7454 NFSUNLOCKSTATE();
7455
7456 lhyp = NFSLAYOUTHASH(fhp);
7457 NFSLOCKLAYOUT(lhyp);
7458 TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
7459 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7460 nlyp->lay_clientid.qval == nd->nd_clientid.qval)
7461 break;
7462 }
7463 if (nlyp != NULL) {
7464 /* A layout already exists, so use it. */
7465 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
7466 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
7467 *layoutlenp = nlyp->lay_layoutlen;
7468 if (++nlyp->lay_stateid.seqid == 0)
7469 nlyp->lay_stateid.seqid = 1;
7470 stateidp->seqid = nlyp->lay_stateid.seqid;
7471 stateidp->other[0] = nlyp->lay_stateid.other[0];
7472 stateidp->other[1] = nlyp->lay_stateid.other[1];
7473 stateidp->other[2] = nlyp->lay_stateid.other[2];
7474 NFSUNLOCKLAYOUT(lhyp);
7475 return (0);
7476 }
7477
7478 /* Insert the new layout in the lists. */
7479 *lypp = NULL;
7480 atomic_add_int(&nfsrv_layoutcnt, 1);
7481 NFSD_VNET(nfsstatsv1_p)->srvlayouts++;
7482 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
7483 *layoutlenp = lyp->lay_layoutlen;
7484 TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
7485 NFSUNLOCKLAYOUT(lhyp);
7486 return (0);
7487 }
7488
7489 /*
7490 * Get the devinfo for a deviceid.
7491 */
7492 int
nfsrv_getdevinfo(char * devid,int layouttype,uint32_t * maxcnt,uint32_t * notify,int * devaddrlen,char ** devaddr)7493 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
7494 uint32_t *notify, int *devaddrlen, char **devaddr)
7495 {
7496 struct nfsdevice *ds;
7497
7498 if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
7499 NFSLAYOUT_FLEXFILE) ||
7500 (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
7501 return (NFSERR_UNKNLAYOUTTYPE);
7502
7503 /*
7504 * Now, search for the device id. Note that the structures won't go
7505 * away, but the order changes in the list. As such, the lock only
7506 * needs to be held during the search through the list.
7507 */
7508 NFSDDSLOCK();
7509 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7510 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
7511 ds->nfsdev_nmp != NULL)
7512 break;
7513 }
7514 NFSDDSUNLOCK();
7515 if (ds == NULL)
7516 return (NFSERR_NOENT);
7517
7518 /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
7519 *devaddrlen = 0;
7520 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
7521 *devaddrlen = ds->nfsdev_fileaddrlen;
7522 *devaddr = ds->nfsdev_fileaddr;
7523 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7524 *devaddrlen = ds->nfsdev_flexaddrlen;
7525 *devaddr = ds->nfsdev_flexaddr;
7526 }
7527 if (*devaddrlen == 0)
7528 return (NFSERR_UNKNLAYOUTTYPE);
7529
7530 /*
7531 * The XDR overhead is 3 unsigned values: layout_type,
7532 * length_of_address and notify bitmap.
7533 * If the notify array is changed to not all zeros, the
7534 * count of unsigned values must be increased.
7535 */
7536 if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
7537 3 * NFSX_UNSIGNED) {
7538 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
7539 return (NFSERR_TOOSMALL);
7540 }
7541 return (0);
7542 }
7543
7544 /*
7545 * Free a list of layout state structures.
7546 */
7547 static void
nfsrv_freelayoutlist(nfsquad_t clientid)7548 nfsrv_freelayoutlist(nfsquad_t clientid)
7549 {
7550 struct nfslayouthash *lhyp;
7551 struct nfslayout *lyp, *nlyp;
7552 int i;
7553
7554 for (i = 0; i < nfsrv_layouthashsize; i++) {
7555 lhyp = &nfslayouthash[i];
7556 NFSLOCKLAYOUT(lhyp);
7557 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7558 if (lyp->lay_clientid.qval == clientid.qval)
7559 nfsrv_freelayout(&lhyp->list, lyp);
7560 }
7561 NFSUNLOCKLAYOUT(lhyp);
7562 }
7563 }
7564
7565 /*
7566 * Free up a layout.
7567 */
7568 static void
nfsrv_freelayout(struct nfslayouthead * lhp,struct nfslayout * lyp)7569 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
7570 {
7571
7572 NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
7573 atomic_add_int(&nfsrv_layoutcnt, -1);
7574 NFSD_VNET(nfsstatsv1_p)->srvlayouts--;
7575 TAILQ_REMOVE(lhp, lyp, lay_list);
7576 free(lyp, M_NFSDSTATE);
7577 }
7578
7579 /*
7580 * Free up a device id.
7581 */
7582 void
nfsrv_freeonedevid(struct nfsdevice * ds)7583 nfsrv_freeonedevid(struct nfsdevice *ds)
7584 {
7585 int i;
7586
7587 atomic_add_int(&nfsrv_devidcnt, -1);
7588 vrele(ds->nfsdev_dvp);
7589 for (i = 0; i < nfsrv_dsdirsize; i++)
7590 if (ds->nfsdev_dsdir[i] != NULL)
7591 vrele(ds->nfsdev_dsdir[i]);
7592 free(ds->nfsdev_fileaddr, M_NFSDSTATE);
7593 free(ds->nfsdev_flexaddr, M_NFSDSTATE);
7594 free(ds->nfsdev_host, M_NFSDSTATE);
7595 free(ds, M_NFSDSTATE);
7596 }
7597
7598 /*
7599 * Free up a device id and its mirrors.
7600 */
7601 static void
nfsrv_freedevid(struct nfsdevice * ds)7602 nfsrv_freedevid(struct nfsdevice *ds)
7603 {
7604
7605 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
7606 nfsrv_freeonedevid(ds);
7607 }
7608
7609 /*
7610 * Free all layouts and device ids.
7611 * Done when the nfsd threads are shut down since there may be a new
7612 * modified device id list created when the nfsd is restarted.
7613 */
7614 void
nfsrv_freealllayoutsanddevids(void)7615 nfsrv_freealllayoutsanddevids(void)
7616 {
7617 struct nfsdontlist *mrp, *nmrp;
7618 struct nfslayout *lyp, *nlyp;
7619
7620 /* Get rid of the deviceid structures. */
7621 nfsrv_freealldevids();
7622 TAILQ_INIT(&nfsrv_devidhead);
7623 nfsrv_devidcnt = 0;
7624
7625 /* Get rid of all layouts. */
7626 nfsrv_freealllayouts();
7627
7628 /* Get rid of any nfsdontlist entries. */
7629 LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
7630 free(mrp, M_NFSDSTATE);
7631 LIST_INIT(&nfsrv_dontlisthead);
7632 nfsrv_dontlistlen = 0;
7633
7634 /* Free layouts in the recall list. */
7635 TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
7636 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
7637 TAILQ_INIT(&nfsrv_recalllisthead);
7638 }
7639
7640 /*
7641 * Free layouts that match the arguments.
7642 */
7643 static void
nfsrv_freelayouts(nfsquad_t * clid,fsid_t * fs,int laytype,int iomode)7644 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
7645 {
7646 struct nfslayouthash *lhyp;
7647 struct nfslayout *lyp, *nlyp;
7648 int i;
7649
7650 for (i = 0; i < nfsrv_layouthashsize; i++) {
7651 lhyp = &nfslayouthash[i];
7652 NFSLOCKLAYOUT(lhyp);
7653 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7654 if (clid->qval != lyp->lay_clientid.qval)
7655 continue;
7656 if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
7657 continue;
7658 if (laytype != lyp->lay_type)
7659 continue;
7660 if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
7661 lyp->lay_flags &= ~NFSLAY_READ;
7662 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7663 lyp->lay_flags &= ~NFSLAY_RW;
7664 if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
7665 nfsrv_freelayout(&lhyp->list, lyp);
7666 }
7667 NFSUNLOCKLAYOUT(lhyp);
7668 }
7669 }
7670
7671 /*
7672 * Free all layouts for the argument file.
7673 */
7674 void
nfsrv_freefilelayouts(fhandle_t * fhp)7675 nfsrv_freefilelayouts(fhandle_t *fhp)
7676 {
7677 struct nfslayouthash *lhyp;
7678 struct nfslayout *lyp, *nlyp;
7679
7680 lhyp = NFSLAYOUTHASH(fhp);
7681 NFSLOCKLAYOUT(lhyp);
7682 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7683 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
7684 nfsrv_freelayout(&lhyp->list, lyp);
7685 }
7686 NFSUNLOCKLAYOUT(lhyp);
7687 }
7688
7689 /*
7690 * Free all layouts.
7691 */
7692 static void
nfsrv_freealllayouts(void)7693 nfsrv_freealllayouts(void)
7694 {
7695 struct nfslayouthash *lhyp;
7696 struct nfslayout *lyp, *nlyp;
7697 int i;
7698
7699 for (i = 0; i < nfsrv_layouthashsize; i++) {
7700 lhyp = &nfslayouthash[i];
7701 NFSLOCKLAYOUT(lhyp);
7702 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
7703 nfsrv_freelayout(&lhyp->list, lyp);
7704 NFSUNLOCKLAYOUT(lhyp);
7705 }
7706 }
7707
7708 /*
7709 * Look up the mount path for the DS server.
7710 */
7711 static int
nfsrv_setdsserver(char * dspathp,char * mdspathp,NFSPROC_T * p,struct nfsdevice ** dsp)7712 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
7713 struct nfsdevice **dsp)
7714 {
7715 struct nameidata nd;
7716 struct nfsdevice *ds;
7717 struct mount *mp;
7718 int error, i;
7719 char *dsdirpath;
7720 size_t dsdirsize;
7721
7722 NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
7723 *dsp = NULL;
7724 if (jailed(p->td_ucred)) {
7725 printf("A pNFS nfsd cannot run in a jail\n");
7726 return (EPERM);
7727 }
7728 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
7729 dspathp);
7730 error = namei(&nd);
7731 NFSD_DEBUG(4, "lookup=%d\n", error);
7732 if (error != 0)
7733 return (error);
7734 if (nd.ni_vp->v_type != VDIR) {
7735 vput(nd.ni_vp);
7736 NFSD_DEBUG(4, "dspath not dir\n");
7737 return (ENOTDIR);
7738 }
7739 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7740 vput(nd.ni_vp);
7741 NFSD_DEBUG(4, "dspath not an NFS mount\n");
7742 return (ENXIO);
7743 }
7744
7745 /*
7746 * Allocate a DS server structure with the NFS mounted directory
7747 * vnode reference counted, so that a non-forced dismount will
7748 * fail with EBUSY.
7749 * This structure is always linked into the list, even if an error
7750 * is being returned. The caller will free the entire list upon
7751 * an error return.
7752 */
7753 *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
7754 M_NFSDSTATE, M_WAITOK | M_ZERO);
7755 ds->nfsdev_dvp = nd.ni_vp;
7756 ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
7757 NFSVOPUNLOCK(nd.ni_vp);
7758
7759 dsdirsize = strlen(dspathp) + 16;
7760 dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
7761 /* Now, create the DS directory structures. */
7762 for (i = 0; i < nfsrv_dsdirsize; i++) {
7763 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
7764 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7765 UIO_SYSSPACE, dsdirpath);
7766 error = namei(&nd);
7767 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
7768 if (error != 0)
7769 break;
7770 if (nd.ni_vp->v_type != VDIR) {
7771 vput(nd.ni_vp);
7772 error = ENOTDIR;
7773 NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
7774 break;
7775 }
7776 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7777 vput(nd.ni_vp);
7778 error = ENXIO;
7779 NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
7780 break;
7781 }
7782 ds->nfsdev_dsdir[i] = nd.ni_vp;
7783 NFSVOPUNLOCK(nd.ni_vp);
7784 }
7785 free(dsdirpath, M_TEMP);
7786
7787 if (strlen(mdspathp) > 0) {
7788 /*
7789 * This DS stores file for a specific MDS exported file
7790 * system.
7791 */
7792 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7793 UIO_SYSSPACE, mdspathp);
7794 error = namei(&nd);
7795 NFSD_DEBUG(4, "mds lookup=%d\n", error);
7796 if (error != 0)
7797 goto out;
7798 if (nd.ni_vp->v_type != VDIR) {
7799 vput(nd.ni_vp);
7800 error = ENOTDIR;
7801 NFSD_DEBUG(4, "mdspath not dir\n");
7802 goto out;
7803 }
7804 mp = nd.ni_vp->v_mount;
7805 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
7806 vput(nd.ni_vp);
7807 error = ENXIO;
7808 NFSD_DEBUG(4, "mdspath not an exported fs\n");
7809 goto out;
7810 }
7811 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
7812 ds->nfsdev_mdsisset = 1;
7813 vput(nd.ni_vp);
7814 }
7815
7816 out:
7817 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
7818 atomic_add_int(&nfsrv_devidcnt, 1);
7819 return (error);
7820 }
7821
7822 /*
7823 * Look up the mount path for the DS server and delete it.
7824 */
7825 int
nfsrv_deldsserver(int op,char * dspathp,NFSPROC_T * p)7826 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
7827 {
7828 struct mount *mp;
7829 struct nfsmount *nmp;
7830 struct nfsdevice *ds;
7831 int error;
7832
7833 NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
7834 /*
7835 * Search for the path in the mount list. Avoid looking the path
7836 * up, since this mount point may be hung, with associated locked
7837 * vnodes, etc.
7838 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
7839 * until this completes.
7840 * As noted in the man page, this should be done before any forced
7841 * dismount on the mount point, but at least the handshake on
7842 * NFSMNTP_CANCELRPCS should make it safe.
7843 */
7844 error = 0;
7845 ds = NULL;
7846 nmp = NULL;
7847 mtx_lock(&mountlist_mtx);
7848 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
7849 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
7850 strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
7851 mp->mnt_data != NULL) {
7852 nmp = VFSTONFS(mp);
7853 NFSLOCKMNT(nmp);
7854 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7855 NFSMNTP_CANCELRPCS)) == 0) {
7856 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7857 NFSUNLOCKMNT(nmp);
7858 } else {
7859 NFSUNLOCKMNT(nmp);
7860 nmp = NULL;
7861 }
7862 break;
7863 }
7864 }
7865 mtx_unlock(&mountlist_mtx);
7866
7867 if (nmp != NULL) {
7868 ds = nfsrv_deldsnmp(op, nmp, p);
7869 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
7870 if (ds != NULL) {
7871 nfsrv_killrpcs(nmp);
7872 NFSD_DEBUG(4, "aft killrpcs\n");
7873 } else
7874 error = ENXIO;
7875 NFSLOCKMNT(nmp);
7876 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7877 wakeup(nmp);
7878 NFSUNLOCKMNT(nmp);
7879 } else
7880 error = EINVAL;
7881 return (error);
7882 }
7883
7884 /*
7885 * Search for and remove a DS entry which matches the "nmp" argument.
7886 * The nfsdevice structure pointer is returned so that the caller can
7887 * free it via nfsrv_freeonedevid().
7888 * For the forced case, do not try to do LayoutRecalls, since the server
7889 * must be shut down now anyhow.
7890 */
7891 struct nfsdevice *
nfsrv_deldsnmp(int op,struct nfsmount * nmp,NFSPROC_T * p)7892 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
7893 {
7894 struct nfsdevice *fndds;
7895
7896 NFSD_DEBUG(4, "deldsdvp\n");
7897 NFSDDSLOCK();
7898 if (op == PNFSDOP_FORCEDELDS)
7899 fndds = nfsv4_findmirror(nmp);
7900 else
7901 fndds = nfsrv_findmirroredds(nmp);
7902 if (fndds != NULL)
7903 nfsrv_deleteds(fndds);
7904 NFSDDSUNLOCK();
7905 if (fndds != NULL) {
7906 if (op != PNFSDOP_FORCEDELDS)
7907 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7908 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7909 }
7910 return (fndds);
7911 }
7912
7913 /*
7914 * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
7915 * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
7916 * point.
7917 * Also, returns an error instead of the nfsdevice found.
7918 */
7919 int
nfsrv_delds(char * devid,NFSPROC_T * p)7920 nfsrv_delds(char *devid, NFSPROC_T *p)
7921 {
7922 struct nfsdevice *ds, *fndds;
7923 struct nfsmount *nmp;
7924 int fndmirror;
7925
7926 NFSD_DEBUG(4, "delds\n");
7927 /*
7928 * Search the DS server list for a match with devid.
7929 * Remove the DS entry if found and there is a mirror.
7930 */
7931 fndds = NULL;
7932 nmp = NULL;
7933 fndmirror = 0;
7934 NFSDDSLOCK();
7935 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7936 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
7937 ds->nfsdev_nmp != NULL) {
7938 NFSD_DEBUG(4, "fnd main ds\n");
7939 fndds = ds;
7940 break;
7941 }
7942 }
7943 if (fndds == NULL) {
7944 NFSDDSUNLOCK();
7945 return (ENXIO);
7946 }
7947 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
7948 fndmirror = 1;
7949 else if (fndds->nfsdev_mdsisset != 0) {
7950 /* For the fsid is set case, search for a mirror. */
7951 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7952 if (ds != fndds && ds->nfsdev_nmp != NULL &&
7953 ds->nfsdev_mdsisset != 0 &&
7954 fsidcmp(&ds->nfsdev_mdsfsid,
7955 &fndds->nfsdev_mdsfsid) == 0) {
7956 fndmirror = 1;
7957 break;
7958 }
7959 }
7960 }
7961 if (fndmirror != 0) {
7962 nmp = fndds->nfsdev_nmp;
7963 NFSLOCKMNT(nmp);
7964 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7965 NFSMNTP_CANCELRPCS)) == 0) {
7966 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7967 NFSUNLOCKMNT(nmp);
7968 nfsrv_deleteds(fndds);
7969 } else {
7970 NFSUNLOCKMNT(nmp);
7971 nmp = NULL;
7972 }
7973 }
7974 NFSDDSUNLOCK();
7975 if (nmp != NULL) {
7976 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7977 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7978 nfsrv_killrpcs(nmp);
7979 NFSLOCKMNT(nmp);
7980 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7981 wakeup(nmp);
7982 NFSUNLOCKMNT(nmp);
7983 return (0);
7984 }
7985 return (ENXIO);
7986 }
7987
7988 /*
7989 * Mark a DS as disabled by setting nfsdev_nmp = NULL.
7990 */
7991 static void
nfsrv_deleteds(struct nfsdevice * fndds)7992 nfsrv_deleteds(struct nfsdevice *fndds)
7993 {
7994
7995 NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
7996 fndds->nfsdev_nmp = NULL;
7997 if (fndds->nfsdev_mdsisset == 0)
7998 nfsrv_faildscnt--;
7999 }
8000
8001 /*
8002 * Fill in the addr structures for the File and Flex File layouts.
8003 */
8004 static void
nfsrv_allocdevid(struct nfsdevice * ds,char * addr,char * dnshost)8005 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
8006 {
8007 uint32_t *tl;
8008 char *netprot;
8009 int addrlen;
8010 static uint64_t new_devid = 0;
8011
8012 if (strchr(addr, ':') != NULL)
8013 netprot = "tcp6";
8014 else
8015 netprot = "tcp";
8016
8017 /* Fill in the device id. */
8018 NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
8019 new_devid++;
8020 NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
8021 sizeof(new_devid));
8022
8023 /*
8024 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
8025 * as defined in RFC5661) in XDR.
8026 */
8027 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8028 6 * NFSX_UNSIGNED;
8029 NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
8030 ds->nfsdev_fileaddrlen = addrlen;
8031 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8032 ds->nfsdev_fileaddr = (char *)tl;
8033 *tl++ = txdr_unsigned(1); /* One stripe with index 0. */
8034 *tl++ = 0;
8035 *tl++ = txdr_unsigned(1); /* One multipath list */
8036 *tl++ = txdr_unsigned(1); /* with one entry in it. */
8037 /* The netaddr for this one entry. */
8038 *tl++ = txdr_unsigned(strlen(netprot));
8039 NFSBCOPY(netprot, tl, strlen(netprot));
8040 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8041 *tl++ = txdr_unsigned(strlen(addr));
8042 NFSBCOPY(addr, tl, strlen(addr));
8043
8044 /*
8045 * Fill in the flex file addr (actually the ff_device_addr4
8046 * as defined for Flexible File Layout) in XDR.
8047 */
8048 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8049 14 * NFSX_UNSIGNED;
8050 ds->nfsdev_flexaddrlen = addrlen;
8051 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8052 ds->nfsdev_flexaddr = (char *)tl;
8053 *tl++ = txdr_unsigned(1); /* One multipath entry. */
8054 /* The netaddr for this one entry. */
8055 *tl++ = txdr_unsigned(strlen(netprot));
8056 NFSBCOPY(netprot, tl, strlen(netprot));
8057 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8058 *tl++ = txdr_unsigned(strlen(addr));
8059 NFSBCOPY(addr, tl, strlen(addr));
8060 tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
8061 *tl++ = txdr_unsigned(2); /* Two NFS Versions. */
8062 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
8063 *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
8064 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
8065 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
8066 *tl++ = newnfs_true; /* Tightly coupled. */
8067 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
8068 *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
8069 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
8070 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
8071 *tl = newnfs_true; /* Tightly coupled. */
8072
8073 ds->nfsdev_hostnamelen = strlen(dnshost);
8074 ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
8075 M_WAITOK);
8076 NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
8077 }
8078
8079 /*
8080 * Create the device id list.
8081 * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
8082 * is misconfigured.
8083 */
8084 int
nfsrv_createdevids(struct nfsd_nfsd_args * args,NFSPROC_T * p)8085 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
8086 {
8087 struct nfsdevice *ds;
8088 char *addrp, *dnshostp, *dspathp, *mdspathp;
8089 int error, i;
8090
8091 addrp = args->addr;
8092 dnshostp = args->dnshost;
8093 dspathp = args->dspath;
8094 mdspathp = args->mdspath;
8095 nfsrv_maxpnfsmirror = args->mirrorcnt;
8096 if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
8097 mdspathp == NULL)
8098 return (0);
8099
8100 /*
8101 * Loop around for each nul-terminated string in args->addr,
8102 * args->dnshost, args->dnspath and args->mdspath.
8103 */
8104 while (addrp < (args->addr + args->addrlen) &&
8105 dnshostp < (args->dnshost + args->dnshostlen) &&
8106 dspathp < (args->dspath + args->dspathlen) &&
8107 mdspathp < (args->mdspath + args->mdspathlen)) {
8108 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
8109 if (error != 0) {
8110 /* Free all DS servers. */
8111 nfsrv_freealldevids();
8112 nfsrv_devidcnt = 0;
8113 return (ENXIO);
8114 }
8115 nfsrv_allocdevid(ds, addrp, dnshostp);
8116 addrp += (strlen(addrp) + 1);
8117 dnshostp += (strlen(dnshostp) + 1);
8118 dspathp += (strlen(dspathp) + 1);
8119 mdspathp += (strlen(mdspathp) + 1);
8120 }
8121 if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
8122 /* Free all DS servers. */
8123 nfsrv_freealldevids();
8124 nfsrv_devidcnt = 0;
8125 nfsrv_maxpnfsmirror = 1;
8126 return (ENXIO);
8127 }
8128 /* We can fail at most one less DS than the mirror level. */
8129 nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
8130
8131 /*
8132 * Allocate the nfslayout hash table now, since this is a pNFS server.
8133 * Make it 1% of the high water mark and at least 100.
8134 */
8135 if (nfslayouthash == NULL) {
8136 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
8137 if (nfsrv_layouthashsize < 100)
8138 nfsrv_layouthashsize = 100;
8139 nfslayouthash = mallocarray(nfsrv_layouthashsize,
8140 sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
8141 M_ZERO);
8142 for (i = 0; i < nfsrv_layouthashsize; i++) {
8143 mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
8144 TAILQ_INIT(&nfslayouthash[i].list);
8145 }
8146 }
8147 return (0);
8148 }
8149
8150 /*
8151 * Free all device ids.
8152 */
8153 static void
nfsrv_freealldevids(void)8154 nfsrv_freealldevids(void)
8155 {
8156 struct nfsdevice *ds, *nds;
8157
8158 TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
8159 nfsrv_freedevid(ds);
8160 }
8161
8162 /*
8163 * Check to see if there is a Read/Write Layout plus either:
8164 * - A Write Delegation
8165 * or
8166 * - An Open with Write_access.
8167 * Return 1 if this is the case and 0 otherwise.
8168 * This function is used by nfsrv_proxyds() to decide if doing a Proxy
8169 * Getattr RPC to the Data Server (DS) is necessary.
8170 */
8171 #define NFSCLIDVECSIZE 6
8172 int
nfsrv_checkdsattr(vnode_t vp,NFSPROC_T * p)8173 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
8174 {
8175 fhandle_t fh, *tfhp;
8176 struct nfsstate *stp;
8177 struct nfslayout *lyp;
8178 struct nfslayouthash *lhyp;
8179 struct nfslockhashhead *hp;
8180 struct nfslockfile *lfp;
8181 nfsquad_t clid[NFSCLIDVECSIZE];
8182 int clidcnt, ret;
8183
8184 ret = nfsvno_getfh(vp, &fh, p);
8185 if (ret != 0)
8186 return (0);
8187
8188 /* First check for a Read/Write Layout. */
8189 clidcnt = 0;
8190 lhyp = NFSLAYOUTHASH(&fh);
8191 NFSLOCKLAYOUT(lhyp);
8192 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8193 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8194 ((lyp->lay_flags & NFSLAY_RW) != 0 ||
8195 ((lyp->lay_flags & NFSLAY_READ) != 0 &&
8196 nfsrv_pnfsatime != 0))) {
8197 if (clidcnt < NFSCLIDVECSIZE)
8198 clid[clidcnt].qval = lyp->lay_clientid.qval;
8199 clidcnt++;
8200 }
8201 }
8202 NFSUNLOCKLAYOUT(lhyp);
8203 if (clidcnt == 0) {
8204 /* None found, so return 0. */
8205 return (0);
8206 }
8207
8208 /* Get the nfslockfile for this fh. */
8209 NFSLOCKSTATE();
8210 hp = NFSLOCKHASH(&fh);
8211 LIST_FOREACH(lfp, hp, lf_hash) {
8212 tfhp = &lfp->lf_fh;
8213 if (NFSVNO_CMPFH(&fh, tfhp))
8214 break;
8215 }
8216 if (lfp == NULL) {
8217 /* None found, so return 0. */
8218 NFSUNLOCKSTATE();
8219 return (0);
8220 }
8221
8222 /* Now, look for a Write delegation for this clientid. */
8223 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
8224 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8225 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8226 break;
8227 }
8228 if (stp != NULL) {
8229 /* Found one, so return 1. */
8230 NFSUNLOCKSTATE();
8231 return (1);
8232 }
8233
8234 /* No Write delegation, so look for an Open with Write_access. */
8235 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
8236 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
8237 ("nfsrv_checkdsattr: Non-open in Open list\n"));
8238 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
8239 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8240 break;
8241 }
8242 NFSUNLOCKSTATE();
8243 if (stp != NULL)
8244 return (1);
8245 return (0);
8246 }
8247
8248 /*
8249 * Look for a matching clientid in the vector. Return 1 if one might match.
8250 */
8251 static int
nfsrv_fndclid(nfsquad_t * clidvec,nfsquad_t clid,int clidcnt)8252 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
8253 {
8254 int i;
8255
8256 /* If too many for the vector, return 1 since there might be a match. */
8257 if (clidcnt > NFSCLIDVECSIZE)
8258 return (1);
8259
8260 for (i = 0; i < clidcnt; i++)
8261 if (clidvec[i].qval == clid.qval)
8262 return (1);
8263 return (0);
8264 }
8265
8266 /*
8267 * Check the don't list for "vp" and see if issuing an rw layout is allowed.
8268 * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
8269 */
8270 static int
nfsrv_dontlayout(fhandle_t * fhp)8271 nfsrv_dontlayout(fhandle_t *fhp)
8272 {
8273 struct nfsdontlist *mrp;
8274 int ret;
8275
8276 if (nfsrv_dontlistlen == 0)
8277 return (0);
8278 ret = 0;
8279 NFSDDONTLISTLOCK();
8280 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8281 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
8282 (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
8283 ret = 1;
8284 break;
8285 }
8286 }
8287 NFSDDONTLISTUNLOCK();
8288 return (ret);
8289 }
8290
8291 #define PNFSDS_COPYSIZ 65536
8292 /*
8293 * Create a new file on a DS and copy the contents of an extant DS file to it.
8294 * This can be used for recovery of a DS file onto a recovered DS.
8295 * The steps are:
8296 * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
8297 * - Disable issuing of read/write layouts for the file via the nfsdontlist,
8298 * so that they will be disabled after the MDS file's vnode is unlocked.
8299 * - Set up the nfsrv_recalllist so that recall of read/write layouts can
8300 * be done.
8301 * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
8302 * writes, LayoutCommits and LayoutReturns for the file when completing the
8303 * LayoutReturn requested by the LayoutRecall callback.
8304 * - Issue a LayoutRecall callback for all read/write layouts and wait for
8305 * them to be returned. (If the LayoutRecall callback replies
8306 * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
8307 * - Exclusively lock the MDS file's vnode. This ensures that no proxied
8308 * writes are in progress or can occur during the DS file copy.
8309 * It also blocks Setattr operations.
8310 * - Create the file on the recovered mirror.
8311 * - Copy the file from the operational DS.
8312 * - Copy any ACL from the MDS file to the new DS file.
8313 * - Set the modify time of the new DS file to that of the MDS file.
8314 * - Update the extended attribute for the MDS file.
8315 * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
8316 * - The caller will unlock the MDS file's vnode allowing operations
8317 * to continue normally, since it is now on the mirror again.
8318 */
8319 int
nfsrv_copymr(vnode_t vp,vnode_t fvp,vnode_t dvp,struct nfsdevice * ds,struct pnfsdsfile * pf,struct pnfsdsfile * wpf,int mirrorcnt,struct ucred * cred,NFSPROC_T * p)8320 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
8321 struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
8322 struct ucred *cred, NFSPROC_T *p)
8323 {
8324 struct nfsdontlist *mrp, *nmrp;
8325 struct nfslayouthash *lhyp;
8326 struct nfslayout *lyp, *nlyp;
8327 struct nfslayouthead thl;
8328 struct mount *mp, *tvmp;
8329 struct acl *aclp;
8330 struct vattr va;
8331 struct timespec mtime;
8332 fhandle_t fh;
8333 vnode_t tvp;
8334 off_t rdpos, wrpos;
8335 ssize_t aresid;
8336 char *dat;
8337 int didprintf, ret, retacl, xfer;
8338
8339 ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
8340 ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
8341 /*
8342 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
8343 * so that no more RW layouts will get issued.
8344 */
8345 ret = nfsvno_getfh(vp, &fh, p);
8346 if (ret != 0) {
8347 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
8348 return (ret);
8349 }
8350 nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
8351 nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
8352 NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
8353 NFSDDONTLISTLOCK();
8354 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8355 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
8356 break;
8357 }
8358 if (mrp == NULL) {
8359 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
8360 mrp = nmrp;
8361 nmrp = NULL;
8362 nfsrv_dontlistlen++;
8363 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
8364 } else {
8365 NFSDDONTLISTUNLOCK();
8366 free(nmrp, M_NFSDSTATE);
8367 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
8368 return (ENXIO);
8369 }
8370 NFSDDONTLISTUNLOCK();
8371
8372 /*
8373 * Search for all RW layouts for this file. Move them to the
8374 * recall list, so they can be recalled and their return noted.
8375 */
8376 lhyp = NFSLAYOUTHASH(&fh);
8377 NFSDRECALLLOCK();
8378 NFSLOCKLAYOUT(lhyp);
8379 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
8380 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8381 (lyp->lay_flags & NFSLAY_RW) != 0) {
8382 TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
8383 TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
8384 lyp->lay_trycnt = 0;
8385 }
8386 }
8387 NFSUNLOCKLAYOUT(lhyp);
8388 NFSDRECALLUNLOCK();
8389
8390 ret = 0;
8391 mp = tvmp = NULL;
8392 didprintf = 0;
8393 TAILQ_INIT(&thl);
8394 /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
8395 NFSVOPUNLOCK(vp);
8396 /* Now, do a recall for all layouts not yet recalled. */
8397 tryagain:
8398 NFSDRECALLLOCK();
8399 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8400 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8401 (lyp->lay_flags & NFSLAY_RECALL) == 0) {
8402 lyp->lay_flags |= NFSLAY_RECALL;
8403 /*
8404 * The layout stateid.seqid needs to be incremented
8405 * before doing a LAYOUT_RECALL callback.
8406 */
8407 if (++lyp->lay_stateid.seqid == 0)
8408 lyp->lay_stateid.seqid = 1;
8409 NFSDRECALLUNLOCK();
8410 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
8411 &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
8412 NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
8413 goto tryagain;
8414 }
8415 }
8416
8417 /* Now wait for them to be returned. */
8418 tryagain2:
8419 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8420 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
8421 if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
8422 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
8423 lay_list);
8424 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
8425 NFSD_DEBUG(4,
8426 "nfsrv_copymr: layout returned\n");
8427 } else {
8428 lyp->lay_trycnt++;
8429 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
8430 PVFS | PCATCH, "nfsmrl", hz);
8431 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
8432 ret);
8433 if (ret == EINTR || ret == ERESTART)
8434 break;
8435 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
8436 /*
8437 * Give up after 60sec and return
8438 * ENXIO, failing the copymr.
8439 * This layout will remain on the
8440 * recalllist. It can only be cleared
8441 * by restarting the nfsd.
8442 * This seems the safe way to handle
8443 * it, since it cannot be safely copied
8444 * with an outstanding RW layout.
8445 */
8446 if (lyp->lay_trycnt >= 60) {
8447 ret = ENXIO;
8448 break;
8449 }
8450 if (didprintf == 0) {
8451 printf("nfsrv_copymr: layout "
8452 "not returned\n");
8453 didprintf = 1;
8454 }
8455 }
8456 }
8457 goto tryagain2;
8458 }
8459 }
8460 NFSDRECALLUNLOCK();
8461 /* We can now get rid of the layouts that have been returned. */
8462 TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
8463 nfsrv_freelayout(&thl, lyp);
8464
8465 /*
8466 * Do the vn_start_write() calls here, before the MDS vnode is
8467 * locked and the tvp is created (locked) in the NFS file system
8468 * that dvp is in.
8469 * For tvmp, this probably isn't necessary, since it will be an
8470 * NFS mount and they are not suspendable at this time.
8471 */
8472 if (ret == 0)
8473 ret = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
8474 if (ret == 0) {
8475 tvmp = dvp->v_mount;
8476 ret = vn_start_write(NULL, &tvmp, V_WAIT | V_PCATCH);
8477 }
8478
8479 /*
8480 * LK_EXCLUSIVE lock the MDS vnode, so that any
8481 * proxied writes through the MDS will be blocked until we have
8482 * completed the copy and update of the extended attributes.
8483 * This will also ensure that any attributes and ACL will not be
8484 * changed until the copy is complete.
8485 */
8486 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
8487 if (ret == 0 && VN_IS_DOOMED(vp)) {
8488 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
8489 ret = ESTALE;
8490 }
8491
8492 /* Create the data file on the recovered DS. */
8493 if (ret == 0)
8494 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
8495
8496 /* Copy the DS file, if created successfully. */
8497 if (ret == 0) {
8498 /*
8499 * Get any NFSv4 ACL on the MDS file, so that it can be set
8500 * on the new DS file.
8501 */
8502 aclp = acl_alloc(M_WAITOK | M_ZERO);
8503 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
8504 if (retacl != 0 && retacl != ENOATTR)
8505 NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
8506 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
8507 /* Malloc a block of 0s used to check for holes. */
8508 if (nfsrv_zeropnfsdat == NULL)
8509 nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
8510 M_WAITOK | M_ZERO);
8511 rdpos = wrpos = 0;
8512 ret = VOP_GETATTR(fvp, &va, cred);
8513 aresid = 0;
8514 while (ret == 0 && aresid == 0) {
8515 ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
8516 rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
8517 &aresid, p);
8518 xfer = PNFSDS_COPYSIZ - aresid;
8519 if (ret == 0 && xfer > 0) {
8520 rdpos += xfer;
8521 /*
8522 * Skip the write for holes, except for the
8523 * last block.
8524 */
8525 if (xfer < PNFSDS_COPYSIZ || rdpos ==
8526 va.va_size || NFSBCMP(dat,
8527 nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
8528 ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
8529 wrpos, UIO_SYSSPACE, IO_NODELOCKED,
8530 cred, NULL, NULL, p);
8531 if (ret == 0)
8532 wrpos += xfer;
8533 }
8534 }
8535
8536 /* If there is an ACL and the copy succeeded, set the ACL. */
8537 if (ret == 0 && retacl == 0) {
8538 ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
8539 /*
8540 * Don't consider these as errors, since VOP_GETACL()
8541 * can return an ACL when they are not actually
8542 * supported. For example, for UFS, VOP_GETACL()
8543 * will return a trivial ACL based on the uid/gid/mode
8544 * when there is no ACL on the file.
8545 * This case should be recognized as a trivial ACL
8546 * by UFS's VOP_SETACL() and succeed, but...
8547 */
8548 if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
8549 ret = 0;
8550 }
8551
8552 if (ret == 0)
8553 ret = VOP_FSYNC(tvp, MNT_WAIT, p);
8554
8555 /* Set the DS data file's modify time that of the MDS file. */
8556 if (ret == 0)
8557 ret = VOP_GETATTR(vp, &va, cred);
8558 if (ret == 0) {
8559 mtime = va.va_mtime;
8560 VATTR_NULL(&va);
8561 va.va_mtime = mtime;
8562 ret = VOP_SETATTR(tvp, &va, cred);
8563 }
8564
8565 vput(tvp);
8566 acl_free(aclp);
8567 free(dat, M_TEMP);
8568 }
8569 if (tvmp != NULL)
8570 vn_finished_write(tvmp);
8571
8572 /* Update the extended attributes for the newly created DS file. */
8573 if (ret == 0)
8574 ret = vn_extattr_set(vp, IO_NODELOCKED,
8575 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
8576 sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
8577 if (mp != NULL)
8578 vn_finished_write(mp);
8579
8580 /* Get rid of the dontlist entry, so that Layouts can be issued. */
8581 NFSDDONTLISTLOCK();
8582 LIST_REMOVE(mrp, nfsmr_list);
8583 NFSDDONTLISTUNLOCK();
8584 free(mrp, M_NFSDSTATE);
8585 return (ret);
8586 }
8587
8588 /*
8589 * Create a data storage file on the recovered DS.
8590 */
8591 static int
nfsrv_createdsfile(vnode_t vp,fhandle_t * fhp,struct pnfsdsfile * pf,vnode_t dvp,struct nfsdevice * ds,struct ucred * cred,NFSPROC_T * p,vnode_t * tvpp)8592 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
8593 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
8594 vnode_t *tvpp)
8595 {
8596 struct vattr va, nva;
8597 int error;
8598
8599 /* Make data file name based on FH. */
8600 error = VOP_GETATTR(vp, &va, cred);
8601 if (error == 0) {
8602 /* Set the attributes for "vp" to Setattr the DS vp. */
8603 VATTR_NULL(&nva);
8604 nva.va_uid = va.va_uid;
8605 nva.va_gid = va.va_gid;
8606 nva.va_mode = va.va_mode;
8607 nva.va_size = 0;
8608 VATTR_NULL(&va);
8609 va.va_type = VREG;
8610 va.va_mode = nva.va_mode;
8611 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
8612 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
8613 pf->dsf_filename, cred, p, tvpp);
8614 }
8615 return (error);
8616 }
8617
8618 /*
8619 * Look up the MDS file shared locked, and then get the extended attribute
8620 * to find the extant DS file to be copied to the new mirror.
8621 * If successful, *vpp is set to the MDS file's vp and *nvpp is
8622 * set to a DS data file for the MDS file, both exclusively locked.
8623 * The "buf" argument has the pnfsdsfile structure from the MDS file
8624 * in it and buflen is set to its length.
8625 */
8626 int
nfsrv_mdscopymr(char * mdspathp,char * dspathp,char * curdspathp,char * buf,int * buflenp,char * fname,NFSPROC_T * p,struct vnode ** vpp,struct vnode ** nvpp,struct pnfsdsfile ** pfp,struct nfsdevice ** dsp,struct nfsdevice ** fdsp)8627 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
8628 int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
8629 struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
8630 struct nfsdevice **fdsp)
8631 {
8632 struct nameidata nd;
8633 struct vnode *vp, *curvp;
8634 struct pnfsdsfile *pf;
8635 struct nfsmount *nmp, *curnmp;
8636 int dsdir, error, mirrorcnt, ippos;
8637
8638 vp = NULL;
8639 curvp = NULL;
8640 curnmp = NULL;
8641 *dsp = NULL;
8642 *fdsp = NULL;
8643 if (dspathp == NULL && curdspathp != NULL)
8644 return (EPERM);
8645
8646 /*
8647 * Look up the MDS file shared locked. The lock will be upgraded
8648 * to an exclusive lock after any rw layouts have been returned.
8649 */
8650 NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
8651 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
8652 mdspathp);
8653 error = namei(&nd);
8654 NFSD_DEBUG(4, "lookup=%d\n", error);
8655 if (error != 0)
8656 return (error);
8657 if (nd.ni_vp->v_type != VREG) {
8658 vput(nd.ni_vp);
8659 NFSD_DEBUG(4, "mdspath not reg\n");
8660 return (EISDIR);
8661 }
8662 vp = nd.ni_vp;
8663
8664 if (curdspathp != NULL) {
8665 /*
8666 * Look up the current DS path and find the nfsdev structure for
8667 * it.
8668 */
8669 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
8670 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8671 UIO_SYSSPACE, curdspathp);
8672 error = namei(&nd);
8673 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8674 if (error != 0) {
8675 vput(vp);
8676 return (error);
8677 }
8678 if (nd.ni_vp->v_type != VDIR) {
8679 vput(nd.ni_vp);
8680 vput(vp);
8681 NFSD_DEBUG(4, "curdspath not dir\n");
8682 return (ENOTDIR);
8683 }
8684 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8685 vput(nd.ni_vp);
8686 vput(vp);
8687 NFSD_DEBUG(4, "curdspath not an NFS mount\n");
8688 return (ENXIO);
8689 }
8690 curnmp = VFSTONFS(nd.ni_vp->v_mount);
8691
8692 /* Search the nfsdev list for a match. */
8693 NFSDDSLOCK();
8694 *fdsp = nfsv4_findmirror(curnmp);
8695 NFSDDSUNLOCK();
8696 if (*fdsp == NULL)
8697 curnmp = NULL;
8698 if (curnmp == NULL) {
8699 vput(nd.ni_vp);
8700 vput(vp);
8701 NFSD_DEBUG(4, "mdscopymr: no current ds\n");
8702 return (ENXIO);
8703 }
8704 curvp = nd.ni_vp;
8705 }
8706
8707 if (dspathp != NULL) {
8708 /* Look up the nfsdev path and find the nfsdev structure. */
8709 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
8710 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8711 UIO_SYSSPACE, dspathp);
8712 error = namei(&nd);
8713 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8714 if (error != 0) {
8715 vput(vp);
8716 if (curvp != NULL)
8717 vput(curvp);
8718 return (error);
8719 }
8720 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
8721 vput(nd.ni_vp);
8722 vput(vp);
8723 if (curvp != NULL)
8724 vput(curvp);
8725 NFSD_DEBUG(4, "dspath not dir\n");
8726 if (nd.ni_vp == curvp)
8727 return (EPERM);
8728 return (ENOTDIR);
8729 }
8730 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8731 vput(nd.ni_vp);
8732 vput(vp);
8733 if (curvp != NULL)
8734 vput(curvp);
8735 NFSD_DEBUG(4, "dspath not an NFS mount\n");
8736 return (ENXIO);
8737 }
8738 nmp = VFSTONFS(nd.ni_vp->v_mount);
8739
8740 /*
8741 * Search the nfsdevice list for a match. If curnmp == NULL,
8742 * this is a recovery and there must be a mirror.
8743 */
8744 NFSDDSLOCK();
8745 if (curnmp == NULL)
8746 *dsp = nfsrv_findmirroredds(nmp);
8747 else
8748 *dsp = nfsv4_findmirror(nmp);
8749 NFSDDSUNLOCK();
8750 if (*dsp == NULL) {
8751 vput(nd.ni_vp);
8752 vput(vp);
8753 if (curvp != NULL)
8754 vput(curvp);
8755 NFSD_DEBUG(4, "mdscopymr: no ds\n");
8756 return (ENXIO);
8757 }
8758 } else {
8759 nd.ni_vp = NULL;
8760 nmp = NULL;
8761 }
8762
8763 /*
8764 * Get a vp for an available DS data file using the extended
8765 * attribute on the MDS file.
8766 * If there is a valid entry for the new DS in the extended attribute
8767 * on the MDS file (as checked via the nmp argument),
8768 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
8769 */
8770 error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
8771 NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
8772 if (curvp != NULL)
8773 vput(curvp);
8774 if (nd.ni_vp == NULL) {
8775 if (error == 0 && nmp != NULL) {
8776 /* Search the nfsdev list for a match. */
8777 NFSDDSLOCK();
8778 *dsp = nfsrv_findmirroredds(nmp);
8779 NFSDDSUNLOCK();
8780 }
8781 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
8782 if (nvpp != NULL && *nvpp != NULL) {
8783 vput(*nvpp);
8784 *nvpp = NULL;
8785 }
8786 error = ENXIO;
8787 }
8788 } else
8789 vput(nd.ni_vp);
8790
8791 /*
8792 * When dspathp != NULL and curdspathp == NULL, this is a recovery
8793 * and is only allowed if there is a 0.0.0.0 IP address entry.
8794 * When curdspathp != NULL, the ippos will be set to that entry.
8795 */
8796 if (error == 0 && dspathp != NULL && ippos == -1) {
8797 if (nvpp != NULL && *nvpp != NULL) {
8798 vput(*nvpp);
8799 *nvpp = NULL;
8800 }
8801 error = ENXIO;
8802 }
8803 if (error == 0) {
8804 *vpp = vp;
8805
8806 pf = (struct pnfsdsfile *)buf;
8807 if (ippos == -1) {
8808 /* If no zeroip pnfsdsfile, add one. */
8809 ippos = *buflenp / sizeof(*pf);
8810 *buflenp += sizeof(*pf);
8811 pf += ippos;
8812 pf->dsf_dir = dsdir;
8813 strlcpy(pf->dsf_filename, fname,
8814 sizeof(pf->dsf_filename));
8815 } else
8816 pf += ippos;
8817 *pfp = pf;
8818 } else
8819 vput(vp);
8820 return (error);
8821 }
8822
8823 /*
8824 * Search for a matching pnfsd mirror device structure, base on the nmp arg.
8825 * Return one if found, NULL otherwise.
8826 */
8827 static struct nfsdevice *
nfsrv_findmirroredds(struct nfsmount * nmp)8828 nfsrv_findmirroredds(struct nfsmount *nmp)
8829 {
8830 struct nfsdevice *ds, *fndds;
8831 int fndmirror;
8832
8833 mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
8834 /*
8835 * Search the DS server list for a match with nmp.
8836 * Remove the DS entry if found and there is a mirror.
8837 */
8838 fndds = NULL;
8839 fndmirror = 0;
8840 if (nfsrv_devidcnt == 0)
8841 return (fndds);
8842 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8843 if (ds->nfsdev_nmp == nmp) {
8844 NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
8845 fndds = ds;
8846 break;
8847 }
8848 }
8849 if (fndds == NULL)
8850 return (fndds);
8851 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
8852 fndmirror = 1;
8853 else if (fndds->nfsdev_mdsisset != 0) {
8854 /* For the fsid is set case, search for a mirror. */
8855 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8856 if (ds != fndds && ds->nfsdev_nmp != NULL &&
8857 ds->nfsdev_mdsisset != 0 &&
8858 fsidcmp(&ds->nfsdev_mdsfsid,
8859 &fndds->nfsdev_mdsfsid) == 0) {
8860 fndmirror = 1;
8861 break;
8862 }
8863 }
8864 }
8865 if (fndmirror == 0) {
8866 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
8867 return (NULL);
8868 }
8869 return (fndds);
8870 }
8871
8872 /*
8873 * Mark the appropriate devid and all associated layout as "out of space".
8874 */
8875 void
nfsrv_marknospc(char * devid,bool setit)8876 nfsrv_marknospc(char *devid, bool setit)
8877 {
8878 struct nfsdevice *ds;
8879 struct nfslayout *lyp;
8880 struct nfslayouthash *lhyp;
8881 int i;
8882
8883 NFSDDSLOCK();
8884 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8885 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
8886 NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
8887 ds->nfsdev_nospc = setit;
8888 }
8889 }
8890 NFSDDSUNLOCK();
8891
8892 for (i = 0; i < nfsrv_layouthashsize; i++) {
8893 lhyp = &nfslayouthash[i];
8894 NFSLOCKLAYOUT(lhyp);
8895 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8896 if (NFSBCMP(lyp->lay_deviceid, devid,
8897 NFSX_V4DEVICEID) == 0) {
8898 NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
8899 setit);
8900 if (setit)
8901 lyp->lay_flags |= NFSLAY_NOSPC;
8902 else
8903 lyp->lay_flags &= ~NFSLAY_NOSPC;
8904 }
8905 }
8906 NFSUNLOCKLAYOUT(lhyp);
8907 }
8908 }
8909
8910 /*
8911 * Check to see if SP4_MACH_CRED is in use and, if it is, check that the
8912 * correct machine credential is being used.
8913 */
8914 static int
nfsrv_checkmachcred(int op,struct nfsrv_descript * nd,struct nfsclient * clp)8915 nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
8916 {
8917
8918 if ((clp->lc_flags & LCL_MACHCRED) == 0 ||
8919 !NFSISSET_OPBIT(&clp->lc_mustops, op))
8920 return (0);
8921 KASSERT((nd->nd_flag & ND_NFSV41) != 0,
8922 ("nfsrv_checkmachcred: MachCred for NFSv4.0"));
8923 if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
8924 nd->nd_princlen == clp->lc_namelen &&
8925 !NFSBCMP(nd->nd_principal, clp->lc_name, nd->nd_princlen))
8926 return (0);
8927 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
8928 }
8929
8930 /*
8931 * Issue a delegation and, optionally set rflagsp for why not.
8932 */
8933 static void
nfsrv_issuedelegation(struct vnode * vp,struct nfsclient * clp,struct nfsrv_descript * nd,int delegate,int writedeleg,int readonly,u_quad_t filerev,uint64_t rdonly,struct nfsstate ** new_delegp,struct nfsstate * new_stp,struct nfslockfile * lfp,uint32_t * rflagsp,nfsv4stateid_t * delegstateidp)8934 nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
8935 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
8936 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
8937 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
8938 nfsv4stateid_t *delegstateidp)
8939 {
8940 struct nfsstate *up_deleg, *new_deleg;
8941
8942 new_deleg = *new_delegp;
8943 up_deleg = LIST_FIRST(&lfp->lf_deleg);
8944 if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
8945 *rflagsp |= NFSV4OPEN_WDNOTWANTED;
8946 else if (nfsrv_issuedelegs == 0)
8947 *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
8948 else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
8949 *rflagsp |= NFSV4OPEN_WDRESOURCE;
8950 else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
8951 (writedeleg == 0 && (readonly == 0 ||
8952 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
8953 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
8954 LCL_CALLBACKSON) {
8955 /* Is this a downgrade attempt? */
8956 if (up_deleg != NULL && up_deleg->ls_clp == clp &&
8957 (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8958 (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
8959 *rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
8960 else
8961 *rflagsp |= NFSV4OPEN_WDCONTENTION;
8962 } else if (up_deleg != NULL &&
8963 (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
8964 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
8965 /* This is an atomic upgrade. */
8966 up_deleg->ls_stateid.seqid++;
8967 delegstateidp->seqid = up_deleg->ls_stateid.seqid;
8968 delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
8969 delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
8970 delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
8971 up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8972 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8973 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8974 nfsrv_writedelegcnt++;
8975 } else {
8976 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
8977 new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
8978 = clp->lc_clientid.lval[0];
8979 new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
8980 = clp->lc_clientid.lval[1];
8981 new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
8982 = nfsrv_nextstateindex(clp);
8983 if (writedeleg && !rdonly &&
8984 (nfsrv_writedelegifpos || !readonly) &&
8985 (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
8986 NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
8987 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8988 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8989 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8990 nfsrv_writedelegcnt++;
8991 } else {
8992 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
8993 NFSLCK_READACCESS);
8994 *rflagsp |= NFSV4OPEN_READDELEGATE;
8995 }
8996 new_deleg->ls_uid = new_stp->ls_uid;
8997 new_deleg->ls_lfp = lfp;
8998 new_deleg->ls_clp = clp;
8999 new_deleg->ls_filerev = filerev;
9000 new_deleg->ls_compref = nd->nd_compref;
9001 new_deleg->ls_lastrecall = 0;
9002 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
9003 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
9004 new_deleg, ls_hash);
9005 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
9006 *new_delegp = NULL;
9007 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
9008 nfsrv_openpluslock++;
9009 nfsrv_delegatecnt++;
9010 }
9011 }
9012