1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 * Copyright 2021-2025 Racktop Systems, Inc.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65
66 #include <fs/fs_reparse.h>
67
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90
91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 #define RFS4_LOOKUP_EXP_STATE_MAX 8 /* Limit of loop to clean expired states */
98 static int rfs4_lookup_exp_state_max = RFS4_LOOKUP_EXP_STATE_MAX;
99 /* End of Tunables */
100
101 static int rdma_setup_read_data4(READ4args *, READ4res *);
102
103 /*
104 * Used to bump the stateid4.seqid value and show changes in the stateid
105 */
106 #define next_stateid(sp) (++(sp)->bits.chgseq)
107
108 /*
109 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
110 * This is used to return NFS4ERR_TOOSMALL when clients specify
111 * maxcount that isn't large enough to hold the smallest possible
112 * XDR encoded dirent.
113 *
114 * sizeof cookie (8 bytes) +
115 * sizeof name_len (4 bytes) +
116 * sizeof smallest (padded) name (4 bytes) +
117 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
118 * sizeof attrlist4_len (4 bytes) +
119 * sizeof next boolean (4 bytes)
120 *
121 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
122 * the smallest possible entry4 (assumes no attrs requested).
123 * sizeof nfsstat4 (4 bytes) +
124 * sizeof verifier4 (8 bytes) +
125 * sizeof entry4list bool (4 bytes) +
126 * sizeof entry4 (36 bytes) +
127 * sizeof eof bool (4 bytes)
128 *
129 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
130 * VOP_READDIR. Its value is the size of the maximum possible dirent
131 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
132 * required for a given name length. MAXNAMELEN is the maximum
133 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
134 * macros are to allow for . and .. entries -- just a minor tweak to try
135 * and guarantee that buffer we give to VOP_READDIR will be large enough
136 * to hold ., .., and the largest possible solaris dirent64.
137 */
138 #define RFS4_MINLEN_ENTRY4 36
139 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
140 #define RFS4_MINLEN_RDDIR_BUF \
141 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
142
143 /*
144 * It would be better to pad to 4 bytes since that's what XDR would do,
145 * but the dirents UFS gives us are already padded to 8, so just take
146 * what we're given. Dircount is only a hint anyway. Currently the
147 * solaris kernel is ASCII only, so there's no point in calling the
148 * UTF8 functions.
149 *
150 * dirent64: named padded to provide 8 byte struct alignment
151 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
152 *
153 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
154 *
155 */
156 #define DIRENT64_TO_DIRCOUNT(dp) \
157 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
158
159
160 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
161
162 u_longlong_t nfs4_srv_caller_id;
163 uint_t nfs4_srv_vkey = 0;
164
165 void rfs4_init_compound_state(struct compound_state *);
166
167 static void nullfree(caddr_t);
168 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 struct compound_state *);
178 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
181 struct svc_req *, struct compound_state *);
182 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 struct compound_state *);
184 static void rfs4_op_getattr_free(nfs_resop4 *);
185 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 struct compound_state *);
187 static void rfs4_op_getfh_free(nfs_resop4 *);
188 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 struct compound_state *);
194 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 struct compound_state *);
196 static void lock_denied_free(nfs_resop4 *);
197 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
206 struct svc_req *req, struct compound_state *cs);
207 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 struct compound_state *);
209 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 struct compound_state *);
211 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
212 struct svc_req *, struct compound_state *);
213 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
214 struct svc_req *, struct compound_state *);
215 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_read_free(nfs_resop4 *);
224 static void rfs4_op_readdir_free(nfs_resop4 *resop);
225 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 struct compound_state *);
227 static void rfs4_op_readlink_free(nfs_resop4 *);
228 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
229 struct svc_req *, struct compound_state *);
230 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 struct compound_state *);
244 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
247 struct svc_req *, struct compound_state *);
248 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
249 struct svc_req *req, struct compound_state *);
250 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
251 struct compound_state *);
252 static void rfs4_op_secinfo_free(nfs_resop4 *);
253
254 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
255 struct svc_req *req, struct compound_state *cs);
256 void rfs4x_exchange_id_free(nfs_resop4 *);
257
258 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
259 struct svc_req *req, struct compound_state *cs);
260
261 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
262 struct svc_req *req, compound_state_t *cs);
263
264 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
265 struct svc_req *req, struct compound_state *cs);
266
267 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
268 struct svc_req *req, compound_state_t *cs);
269
270 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
271 struct svc_req *req, compound_state_t *cs);
272
273 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
274 struct svc_req *req, compound_state_t *cs);
275
276 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
277 struct svc_req *req, compound_state_t *cs);
278 void rfs4x_op_free_stateid(nfs_argop4 *argop, nfs_resop4 *resop,
279 struct svc_req *req, compound_state_t *cs);
280
281 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
282 struct svc_req *);
283 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
284 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
285
286 /*
287 * translation table for attrs
288 */
289 struct nfs4_ntov_table {
290 union nfs4_attr_u *na;
291 uint8_t amap[NFS4_MAXNUM_ATTRS];
292 int attrcnt;
293 bool_t vfsstat;
294 };
295
296 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
297 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
298 struct nfs4_svgetit_arg *sargp);
299
300 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
301 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
302 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
303
304 static void hanfsv4_failover(nfs4_srv_t *);
305
306 fem_t *deleg_rdops;
307 fem_t *deleg_wrops;
308
309 /*
310 * NFS4 op dispatch table
311 */
312
313 struct rfsv4disp {
314 void (*dis_proc)(); /* proc to call */
315 void (*dis_resfree)(); /* frees space allocated by proc */
316 int dis_flags; /* OP_IDEMPOTENT, etc... */
317 };
318
319 #define OP_IDEMPOTENT (1 << 0)
320 #define OP_CLEAR_STATEID (1 << 1)
321
322 static struct rfsv4disp rfsv4disptab[] = {
323 /*
324 * NFS VERSION 4
325 */
326
327 /* RFS_NULL = 0 */
328 {rfs4_op_illegal, nullfree, 0},
329
330 /* UNUSED = 1 */
331 {rfs4_op_illegal, nullfree, 0},
332
333 /* UNUSED = 2 */
334 {rfs4_op_illegal, nullfree, 0},
335
336 /* OP_ACCESS = 3 */
337 {rfs4_op_access, nullfree, OP_IDEMPOTENT},
338
339 /* OP_CLOSE = 4 */
340 {rfs4_op_close, nullfree, OP_CLEAR_STATEID},
341
342 /* OP_COMMIT = 5 */
343 {rfs4_op_commit, nullfree, OP_IDEMPOTENT},
344
345 /* OP_CREATE = 6 */
346 {rfs4_op_create, nullfree, OP_CLEAR_STATEID},
347
348 /* OP_DELEGPURGE = 7 */
349 {rfs4_op_delegpurge, nullfree, 0},
350
351 /* OP_DELEGRETURN = 8 */
352 {rfs4_op_delegreturn, nullfree, 0},
353
354 /* OP_GETATTR = 9 */
355 {rfs4_op_getattr, rfs4_op_getattr_free, OP_IDEMPOTENT},
356
357 /* OP_GETFH = 10 */
358 {rfs4_op_getfh, rfs4_op_getfh_free, OP_IDEMPOTENT},
359
360 /* OP_LINK = 11 */
361 {rfs4_op_link, nullfree, 0},
362
363 /* OP_LOCK = 12 */
364 {rfs4_op_lock, lock_denied_free, 0},
365
366 /* OP_LOCKT = 13 */
367 {rfs4_op_lockt, lock_denied_free, 0},
368
369 /* OP_LOCKU = 14 */
370 {rfs4_op_locku, nullfree, 0},
371
372 /* OP_LOOKUP = 15 */
373 {rfs4_op_lookup, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
374
375 /* OP_LOOKUPP = 16 */
376 {rfs4_op_lookupp, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
377
378 /* OP_NVERIFY = 17 */
379 {rfs4_op_nverify, nullfree, OP_IDEMPOTENT},
380
381 /* OP_OPEN = 18 */
382 {rfs4_op_open, rfs4_free_reply, 0},
383
384 /* OP_OPENATTR = 19 */
385 {rfs4_op_openattr, nullfree, 0},
386
387 /* OP_OPEN_CONFIRM = 20 */
388 {rfs4_op_open_confirm, nullfree, 0},
389
390 /* OP_OPEN_DOWNGRADE = 21 */
391 {rfs4_op_open_downgrade, nullfree, 0},
392
393 /* OP_OPEN_PUTFH = 22 */
394 {rfs4_op_putfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
395
396 /* OP_PUTPUBFH = 23 */
397 {rfs4_op_putpubfh, nullfree, OP_IDEMPOTENT},
398
399 /* OP_PUTROOTFH = 24 */
400 {rfs4_op_putrootfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
401
402 /* OP_READ = 25 */
403 {rfs4_op_read, rfs4_op_read_free, OP_IDEMPOTENT},
404
405 /* OP_READDIR = 26 */
406 {rfs4_op_readdir, rfs4_op_readdir_free, OP_IDEMPOTENT},
407
408 /* OP_READLINK = 27 */
409 {rfs4_op_readlink, rfs4_op_readlink_free, OP_IDEMPOTENT},
410
411 /* OP_REMOVE = 28 */
412 {rfs4_op_remove, nullfree, 0},
413
414 /* OP_RENAME = 29 */
415 {rfs4_op_rename, nullfree, 0},
416
417 /* OP_RENEW = 30 */
418 {rfs4_op_renew, nullfree, 0},
419
420 /* OP_RESTOREFH = 31 */
421 {rfs4_op_restorefh, nullfree, OP_IDEMPOTENT},
422
423 /* OP_SAVEFH = 32 */
424 {rfs4_op_savefh, nullfree, OP_IDEMPOTENT},
425
426 /* OP_SECINFO = 33 */
427 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
428
429 /* OP_SETATTR = 34 */
430 {rfs4_op_setattr, nullfree, 0},
431
432 /* OP_SETCLIENTID = 35 */
433 {rfs4_op_setclientid, nullfree, 0},
434
435 /* OP_SETCLIENTID_CONFIRM = 36 */
436 {rfs4_op_setclientid_confirm, nullfree, 0},
437
438 /* OP_VERIFY = 37 */
439 {rfs4_op_verify, nullfree, OP_IDEMPOTENT},
440
441 /* OP_WRITE = 38 */
442 {rfs4_op_write, nullfree, 0},
443
444 /* OP_RELEASE_LOCKOWNER = 39 */
445 {rfs4_op_release_lockowner, nullfree, 0},
446
447 /*
448 * NFSv4.1 operations
449 */
450
451 /* OP_BACKCHANNEL_CTL = 40 */
452 {rfs4_op_notsup, nullfree, 0},
453
454 /* OP_BIND_CONN_TO_SESSION = 41 */
455 {rfs4x_op_bind_conn_to_session, nullfree, 0},
456
457 /* OP_EXCHANGE_ID = 42 */
458 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0},
459
460 /* OP_CREATE_SESSION = 43 */
461 {rfs4x_op_create_session, nullfree, 0},
462
463 /* OP_DESTROY_SESSION = 44 */
464 {rfs4x_op_destroy_session, nullfree, 0},
465
466 /* OP_FREE_STATEID = 45 */
467 {rfs4x_op_free_stateid, nullfree, 0},
468
469 /* OP_GET_DIR_DELEGATION = 46 */
470 {rfs4_op_notsup, nullfree, 0},
471
472 /* OP_GETDEVICEINFO = 47 */
473 {rfs4_op_notsup, nullfree, 0},
474
475 /* OP_GETDEVICELIST = 48 */
476 {rfs4_op_notsup, nullfree, 0},
477
478 /* OP_LAYOUTCOMMIT = 49 */
479 {rfs4_op_notsup, nullfree, 0},
480
481 /* OP_LAYOUTGET = 50 */
482 {rfs4_op_notsup, nullfree, 0},
483
484 /* OP_LAYOUTRETURN = 51 */
485 {rfs4_op_notsup, nullfree, 0},
486
487 /* OP_SECINFO_NO_NAME = 52 */
488 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
489
490 /* OP_SEQUENCE = 53 */
491 {rfs4x_op_sequence, nullfree, 0},
492
493 /* OP_SET_SSV = 54 */
494 {rfs4_op_notsup, nullfree, 0},
495
496 /* OP_TEST_STATEID = 55 */
497 {rfs4_op_notsup, nullfree, 0},
498
499 /* OP_WANT_DELEGATION = 56 */
500 {rfs4_op_notsup, nullfree, 0},
501
502 /* OP_DESTROY_CLIENTID = 57 */
503 {rfs4x_op_destroy_clientid, nullfree, 0},
504
505 /* OP_RECLAIM_COMPLETE = 58 */
506 {rfs4x_op_reclaim_complete, nullfree, 0},
507
508 /*
509 * NFSv4.2 operations
510 */
511 /* OP_ALLOCATE = 59 */
512 {rfs4_op_notsup, nullfree, 0},
513
514 /* OP_COPY = 60 */
515 {rfs4_op_notsup, nullfree, 0},
516
517 /* OP_COPY_NOTIFY = 61 */
518 {rfs4_op_notsup, nullfree, 0},
519
520 /* OP_DEALLOCATE = 62 */
521 {rfs4_op_notsup, nullfree, 0},
522
523 /* OP_IO_ADVISE = 63 */
524 {rfs4_op_notsup, nullfree, 0},
525
526 /* OP_LAYOUTERROR = 64 */
527 {rfs4_op_notsup, nullfree, 0},
528
529 /* OP_LAYOUTSTATS = 65 */
530 {rfs4_op_notsup, nullfree, 0},
531
532 /* OP_OFFLOAD_CANCEL = 66 */
533 {rfs4_op_notsup, nullfree, 0},
534
535 /* OP_OFFLOAD_STATUS = 67 */
536 {rfs4_op_notsup, nullfree, 0},
537
538 /* OP_READ_PLUS = 68 */
539 {rfs4_op_notsup, nullfree, 0},
540
541 /* OP_SEEK = 69 */
542 {rfs4_op_notsup, nullfree, 0},
543
544 /* OP_WRITE_SAME = 70 */
545 {rfs4_op_notsup, nullfree, 0},
546
547 /* OP_CLONE = 71 */
548 {rfs4_op_notsup, nullfree, 0},
549
550 };
551
552 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
553
554 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
555
556 #ifdef DEBUG
557
558 int rfs4_fillone_debug = 0;
559 int rfs4_no_stub_access = 1;
560 int rfs4_rddir_debug = 0;
561
562 static char *rfs4_op_string[] = {
563 "rfs4_op_null",
564 "rfs4_op_1 unused",
565 "rfs4_op_2 unused",
566 "rfs4_op_access",
567 "rfs4_op_close",
568 "rfs4_op_commit",
569 "rfs4_op_create",
570 "rfs4_op_delegpurge",
571 "rfs4_op_delegreturn",
572 "rfs4_op_getattr",
573 "rfs4_op_getfh",
574 "rfs4_op_link",
575 "rfs4_op_lock",
576 "rfs4_op_lockt",
577 "rfs4_op_locku",
578 "rfs4_op_lookup",
579 "rfs4_op_lookupp",
580 "rfs4_op_nverify",
581 "rfs4_op_open",
582 "rfs4_op_openattr",
583 "rfs4_op_open_confirm",
584 "rfs4_op_open_downgrade",
585 "rfs4_op_putfh",
586 "rfs4_op_putpubfh",
587 "rfs4_op_putrootfh",
588 "rfs4_op_read",
589 "rfs4_op_readdir",
590 "rfs4_op_readlink",
591 "rfs4_op_remove",
592 "rfs4_op_rename",
593 "rfs4_op_renew",
594 "rfs4_op_restorefh",
595 "rfs4_op_savefh",
596 "rfs4_op_secinfo",
597 "rfs4_op_setattr",
598 "rfs4_op_setclientid",
599 "rfs4_op_setclient_confirm",
600 "rfs4_op_verify",
601 "rfs4_op_write",
602 "rfs4_op_release_lockowner",
603 /* NFSv4.1 */
604 "backchannel_ctl",
605 "bind_conn_to_session",
606 "exchange_id",
607 "create_session",
608 "destroy_session",
609 "free_stateid",
610 "get_dir_delegation",
611 "getdeviceinfo",
612 "getdevicelist",
613 "layoutcommit",
614 "layoutget",
615 "layoutreturn",
616 "secinfo_no_name",
617 "sequence",
618 "set_ssv",
619 "test_stateid",
620 "want_delegation",
621 "destroy_clientid",
622 "reclaim_complete",
623 /* NFSv4.2 */
624 "allocate",
625 "copy",
626 "copy_notify",
627 "deallocate",
628 "io_advise",
629 "layouterror",
630 "layoutstats",
631 "offload_cancel",
632 "offload_status",
633 "read_plus",
634 "seek",
635 "write_same",
636 "clone",
637
638 "rfs4_op_illegal"
639 };
640
641 #endif
642
643 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
644
645 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
646
647 extern void rfs4_free_fs_locations4(fs_locations4 *);
648
649 #ifdef nextdp
650 #undef nextdp
651 #endif
652 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
653
654 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
655 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
656 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
657 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
658 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
659 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
660 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
661 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
662 NULL, NULL
663 };
664 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
665 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
666 VOPNAME_READ, { .femop_read = deleg_wr_read },
667 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
668 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
669 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
670 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
671 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
672 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
673 NULL, NULL
674 };
675
676
677 nfs4_srv_t *
nfs4_get_srv(void)678 nfs4_get_srv(void)
679 {
680 nfs_globals_t *ng = nfs_srv_getzg();
681 nfs4_srv_t *srv = ng->nfs4_srv;
682 ASSERT(srv != NULL);
683 return (srv);
684 }
685
686 void
rfs4_srv_zone_init(nfs_globals_t * ng)687 rfs4_srv_zone_init(nfs_globals_t *ng)
688 {
689 nfs4_srv_t *nsrv4;
690 timespec32_t verf;
691
692 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
693
694 /*
695 * The following algorithm attempts to find a unique verifier
696 * to be used as the write verifier returned from the server
697 * to the client. It is important that this verifier change
698 * whenever the server reboots. Of secondary importance, it
699 * is important for the verifier to be unique between two
700 * different servers.
701 *
702 * Thus, an attempt is made to use the system hostid and the
703 * current time in seconds when the nfssrv kernel module is
704 * loaded. It is assumed that an NFS server will not be able
705 * to boot and then to reboot in less than a second. If the
706 * hostid has not been set, then the current high resolution
707 * time is used. This will ensure different verifiers each
708 * time the server reboots and minimize the chances that two
709 * different servers will have the same verifier.
710 * XXX - this is broken on LP64 kernels.
711 */
712 verf.tv_sec = (time_t)zone_get_hostid(NULL);
713 if (verf.tv_sec != 0) {
714 verf.tv_nsec = gethrestime_sec();
715 } else {
716 timespec_t tverf;
717
718 gethrestime(&tverf);
719 verf.tv_sec = (time_t)tverf.tv_sec;
720 verf.tv_nsec = tverf.tv_nsec;
721 }
722 nsrv4->write4verf = *(uint64_t *)&verf;
723
724 /* Used to manage create/destroy of server state */
725 nsrv4->nfs4_server_state = NULL;
726 nsrv4->nfs4_cur_servinst = NULL;
727 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
728 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
729 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
730 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
731 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
732
733 ng->nfs4_srv = nsrv4;
734 }
735
736 void
rfs4_srv_zone_fini(nfs_globals_t * ng)737 rfs4_srv_zone_fini(nfs_globals_t *ng)
738 {
739 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
740
741 ng->nfs4_srv = NULL;
742
743 mutex_destroy(&nsrv4->deleg_lock);
744 mutex_destroy(&nsrv4->state_lock);
745 mutex_destroy(&nsrv4->servinst_lock);
746 rw_destroy(&nsrv4->deleg_policy_lock);
747
748 kmem_free(nsrv4, sizeof (*nsrv4));
749 }
750
751 void
rfs4_srvrinit(void)752 rfs4_srvrinit(void)
753 {
754 extern void rfs4_attr_init();
755
756 rfs4_attr_init();
757
758 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
759 rfs4_disable_delegation();
760 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
761 &deleg_wrops) != 0) {
762 rfs4_disable_delegation();
763 fem_free(deleg_rdops);
764 }
765
766 nfs4_srv_caller_id = fs_new_caller_id();
767 lockt_sysid = lm_alloc_sysidt();
768 vsd_create(&nfs4_srv_vkey, NULL);
769 rfs4_state_g_init();
770 }
771
772 void
rfs4_srvrfini(void)773 rfs4_srvrfini(void)
774 {
775 if (lockt_sysid != LM_NOSYSID) {
776 lm_free_sysidt(lockt_sysid);
777 lockt_sysid = LM_NOSYSID;
778 }
779
780 rfs4_state_g_fini();
781
782 fem_free(deleg_rdops);
783 fem_free(deleg_wrops);
784 }
785
786 void
rfs4_do_server_start(int server_upordown,int srv_delegation,nfs4_minor_t nfs4_minor_max,int cluster_booted)787 rfs4_do_server_start(int server_upordown, int srv_delegation,
788 nfs4_minor_t nfs4_minor_max, int cluster_booted)
789 {
790 nfs4_srv_t *nsrv4 = nfs4_get_srv();
791
792 /* Is this a warm start? */
793 if (server_upordown == NFS_SERVER_QUIESCED) {
794 cmn_err(CE_NOTE, "nfs4_srv: "
795 "server was previously quiesced; "
796 "existing NFSv4 state will be re-used");
797
798 /*
799 * HA-NFSv4: this is also the signal
800 * that a Resource Group failover has
801 * occurred.
802 */
803 if (cluster_booted)
804 hanfsv4_failover(nsrv4);
805 } else {
806 /* Cold start */
807 nsrv4->rfs4_start_time = 0;
808 rfs4_state_zone_init(nsrv4);
809 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
810 nfs4_drc_hash);
811
812 /*
813 * The nfsd service was started with the -s option
814 * we need to pull in any state from the paths indicated.
815 */
816 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
817 /* read in the stable storage state from these paths */
818 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
819 rfs4_dss_newpaths);
820 }
821 }
822
823 nsrv4->nfs4_minor_max = nfs4_minor_max;
824
825 /* Check if delegation is to be enabled */
826 if (srv_delegation != FALSE)
827 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
828 }
829
830 void
rfs4_init_compound_state(struct compound_state * cs)831 rfs4_init_compound_state(struct compound_state *cs)
832 {
833 bzero(cs, sizeof (*cs));
834 cs->cont = TRUE;
835 cs->access = CS_ACCESS_DENIED;
836 cs->deleg = FALSE;
837 cs->mandlock = FALSE;
838 cs->fh.nfs_fh4_val = cs->fhbuf;
839 }
840
841 /* Do cleanup of the compound_state */
842 void
rfs4_fini_compound_state(struct compound_state * cs)843 rfs4_fini_compound_state(struct compound_state *cs)
844 {
845 if (cs->vp) {
846 VN_RELE(cs->vp);
847 }
848 if (cs->saved_vp) {
849 VN_RELE(cs->saved_vp);
850 }
851 if (cs->cr) {
852 crfree(cs->cr);
853 }
854 if (cs->saved_fh.nfs_fh4_val) {
855 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
856 }
857 if (cs->sp) {
858 rfs4x_session_rele(cs->sp);
859 }
860 }
861
862 void
rfs4_grace_start(rfs4_servinst_t * sip)863 rfs4_grace_start(rfs4_servinst_t *sip)
864 {
865 rw_enter(&sip->rwlock, RW_WRITER);
866 sip->start_time = nfs_sys_uptime();
867 sip->grace_period = rfs4_grace_period;
868 rw_exit(&sip->rwlock);
869 }
870
871 /*
872 * returns true if the instance's grace period has never been started
873 */
874 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)875 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
876 {
877 time_t start_time;
878
879 rw_enter(&sip->rwlock, RW_READER);
880 start_time = sip->start_time;
881 rw_exit(&sip->rwlock);
882
883 return (start_time == 0);
884 }
885
886 /*
887 * Indicates if server instance is within the
888 * grace period.
889 */
890 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)891 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
892 {
893 time_t grace_expiry;
894
895 /* All clients called reclaim-complete */
896 if (sip->nreclaim == 0 || sip->grace_period == 0)
897 return (0);
898
899 rw_enter(&sip->rwlock, RW_READER);
900 grace_expiry = sip->start_time + sip->grace_period;
901 rw_exit(&sip->rwlock);
902
903 if (nfs_sys_uptime() < grace_expiry)
904 return (1);
905
906 /* Once grace period ends, optimize next calls */
907 sip->grace_period = 0;
908 return (0);
909 }
910
911 int
rfs4_clnt_in_grace(rfs4_client_t * cp)912 rfs4_clnt_in_grace(rfs4_client_t *cp)
913 {
914 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
915
916 return (rfs4_servinst_in_grace(cp->rc_server_instance));
917 }
918
919 /*
920 * reset all currently active grace periods
921 */
922 void
rfs4_grace_reset_all(nfs4_srv_t * nsrv4)923 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
924 {
925 rfs4_servinst_t *sip;
926
927 mutex_enter(&nsrv4->servinst_lock);
928 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
929 if (rfs4_servinst_in_grace(sip))
930 rfs4_grace_start(sip);
931 mutex_exit(&nsrv4->servinst_lock);
932 }
933
934 /*
935 * start any new instances' grace periods
936 */
937 void
rfs4_grace_start_new(nfs4_srv_t * nsrv4)938 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
939 {
940 rfs4_servinst_t *sip;
941
942 mutex_enter(&nsrv4->servinst_lock);
943 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
944 if (rfs4_servinst_grace_new(sip))
945 rfs4_grace_start(sip);
946 mutex_exit(&nsrv4->servinst_lock);
947 }
948
949 static rfs4_dss_path_t *
rfs4_dss_newpath(nfs4_srv_t * nsrv4,rfs4_servinst_t * sip,char * path,unsigned index)950 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
951 char *path, unsigned index)
952 {
953 size_t len;
954 rfs4_dss_path_t *dss_path;
955
956 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
957
958 /*
959 * Take a copy of the string, since the original may be overwritten.
960 * Sadly, no strdup() in the kernel.
961 */
962 /* allow for NUL */
963 len = strlen(path) + 1;
964 dss_path->path = kmem_alloc(len, KM_SLEEP);
965 (void) strlcpy(dss_path->path, path, len);
966
967 /* associate with servinst */
968 dss_path->sip = sip;
969 dss_path->index = index;
970
971 /*
972 * Add to list of served paths.
973 * No locking required, as we're only ever called at startup.
974 */
975 if (nsrv4->dss_pathlist == NULL) {
976 /* this is the first dss_path_t */
977
978 /* needed for insque/remque */
979 dss_path->next = dss_path->prev = dss_path;
980
981 nsrv4->dss_pathlist = dss_path;
982 } else {
983 insque(dss_path, nsrv4->dss_pathlist);
984 }
985
986 return (dss_path);
987 }
988
989 /*
990 * Create a new server instance, and make it the currently active instance.
991 * Note that starting the grace period too early will reduce the clients'
992 * recovery window.
993 */
994 void
rfs4_servinst_create(nfs4_srv_t * nsrv4,int start_grace,int dss_npaths,char ** dss_paths)995 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
996 int dss_npaths, char **dss_paths)
997 {
998 unsigned i;
999 rfs4_servinst_t *sip;
1000 rfs4_oldstate_t *oldstate;
1001
1002 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
1003 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
1004
1005 sip->nreclaim = 0;
1006 sip->start_time = (time_t)0;
1007 sip->grace_period = (time_t)0;
1008 sip->next = NULL;
1009 sip->prev = NULL;
1010
1011 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
1012 /*
1013 * This initial dummy entry is required to setup for insque/remque.
1014 * It must be skipped over whenever the list is traversed.
1015 */
1016 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
1017 /* insque/remque require initial list entry to be self-terminated */
1018 oldstate->next = oldstate;
1019 oldstate->prev = oldstate;
1020 sip->oldstate = oldstate;
1021
1022
1023 sip->dss_npaths = dss_npaths;
1024 sip->dss_paths = kmem_alloc(dss_npaths *
1025 sizeof (rfs4_dss_path_t *), KM_SLEEP);
1026
1027 for (i = 0; i < dss_npaths; i++) {
1028 sip->dss_paths[i] =
1029 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
1030 }
1031
1032 mutex_enter(&nsrv4->servinst_lock);
1033 if (nsrv4->nfs4_cur_servinst != NULL) {
1034 /* add to linked list */
1035 sip->prev = nsrv4->nfs4_cur_servinst;
1036 nsrv4->nfs4_cur_servinst->next = sip;
1037 }
1038 if (start_grace)
1039 rfs4_grace_start(sip);
1040 /* make the new instance "current" */
1041 nsrv4->nfs4_cur_servinst = sip;
1042
1043 mutex_exit(&nsrv4->servinst_lock);
1044 }
1045
1046 /*
1047 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
1048 * all instances directly.
1049 */
1050 void
rfs4_servinst_destroy_all(nfs4_srv_t * nsrv4)1051 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
1052 {
1053 rfs4_servinst_t *sip, *prev, *current;
1054 #ifdef DEBUG
1055 int n = 0;
1056 #endif
1057
1058 mutex_enter(&nsrv4->servinst_lock);
1059 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
1060 current = nsrv4->nfs4_cur_servinst;
1061 nsrv4->nfs4_cur_servinst = NULL;
1062 for (sip = current; sip != NULL; sip = prev) {
1063 prev = sip->prev;
1064 rw_destroy(&sip->rwlock);
1065 if (sip->oldstate)
1066 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1067 if (sip->dss_paths) {
1068 int i = sip->dss_npaths;
1069
1070 while (i > 0) {
1071 i--;
1072 if (sip->dss_paths[i] != NULL) {
1073 char *path = sip->dss_paths[i]->path;
1074
1075 if (path != NULL) {
1076 kmem_free(path,
1077 strlen(path) + 1);
1078 }
1079 kmem_free(sip->dss_paths[i],
1080 sizeof (rfs4_dss_path_t));
1081 }
1082 }
1083 kmem_free(sip->dss_paths,
1084 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1085 }
1086 kmem_free(sip, sizeof (rfs4_servinst_t));
1087 #ifdef DEBUG
1088 n++;
1089 #endif
1090 }
1091 mutex_exit(&nsrv4->servinst_lock);
1092 }
1093
1094 /*
1095 * Assign the current server instance to a client_t.
1096 * Should be called with cp->rc_dbe held.
1097 */
1098 void
rfs4_servinst_assign(nfs4_srv_t * nsrv4,rfs4_client_t * cp,rfs4_servinst_t * sip)1099 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1100 rfs4_servinst_t *sip)
1101 {
1102 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1103
1104 /*
1105 * The lock ensures that if the current instance is in the process
1106 * of changing, we will see the new one.
1107 */
1108 mutex_enter(&nsrv4->servinst_lock);
1109 cp->rc_server_instance = sip;
1110 mutex_exit(&nsrv4->servinst_lock);
1111 }
1112
1113 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)1114 rfs4_servinst(rfs4_client_t *cp)
1115 {
1116 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1117
1118 return (cp->rc_server_instance);
1119 }
1120
1121 /* ARGSUSED */
1122 static void
nullfree(caddr_t resop)1123 nullfree(caddr_t resop)
1124 {
1125 }
1126
1127 /*
1128 * This is a fall-through for invalid or not implemented (yet) ops
1129 */
1130 /* ARGSUSED */
1131 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1132 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1133 struct compound_state *cs)
1134 {
1135 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1136 }
1137
1138 /*
1139 * Check if the security flavor, nfsnum, is in the flavor_list.
1140 */
1141 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)1142 in_flavor_list(int nfsnum, int *flavor_list, int count)
1143 {
1144 int i;
1145
1146 for (i = 0; i < count; i++) {
1147 if (nfsnum == flavor_list[i])
1148 return (TRUE);
1149 }
1150 return (FALSE);
1151 }
1152
1153 /*
1154 * Used by rfs4_op_secinfo to get the security information from the
1155 * export structure associated with the component.
1156 */
1157 /* ARGSUSED */
1158 nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)1159 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1160 {
1161 int error, different_export = 0;
1162 vnode_t *dvp, *vp;
1163 struct exportinfo *exi;
1164 fid_t fid;
1165 uint_t count, i;
1166 secinfo4 *resok_val;
1167 struct secinfo *secp;
1168 seconfig_t *si;
1169 bool_t did_traverse = FALSE;
1170 int dotdot, walk;
1171 nfs_export_t *ne = nfs_get_export();
1172
1173 dvp = cs->vp;
1174 exi = cs->exi;
1175 ASSERT(exi != NULL);
1176 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1177
1178 /*
1179 * If dotdotting, then need to check whether it's above the
1180 * root of a filesystem, or above an export point.
1181 */
1182 if (dotdot) {
1183 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1184
1185 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1186 /*
1187 * If dotdotting at the root of a filesystem, then
1188 * need to traverse back to the mounted-on filesystem
1189 * and do the dotdot lookup there.
1190 */
1191 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1192
1193 /*
1194 * If at the system root, then can
1195 * go up no further.
1196 */
1197 if (VN_CMP(dvp, zone_rootvp))
1198 return (puterrno4(ENOENT));
1199
1200 /*
1201 * Traverse back to the mounted-on filesystem
1202 */
1203 dvp = untraverse(dvp, zone_rootvp);
1204
1205 /*
1206 * Set the different_export flag so we remember
1207 * to pick up a new exportinfo entry for
1208 * this new filesystem.
1209 */
1210 different_export = 1;
1211 } else {
1212
1213 /*
1214 * If dotdotting above an export point then set
1215 * the different_export to get new export info.
1216 */
1217 different_export = nfs_exported(exi, dvp);
1218 }
1219 }
1220
1221 /*
1222 * Get the vnode for the component "nm".
1223 */
1224 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1225 NULL, NULL, NULL);
1226 if (error)
1227 return (puterrno4(error));
1228
1229 /*
1230 * If the vnode is in a pseudo filesystem, or if the security flavor
1231 * used in the request is valid but not an explicitly shared flavor,
1232 * or the access bit indicates that this is a limited access,
1233 * check whether this vnode is visible.
1234 */
1235 if (!different_export &&
1236 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1237 cs->access & CS_ACCESS_LIMITED)) {
1238 if (! nfs_visible(exi, vp, &different_export)) {
1239 VN_RELE(vp);
1240 return (puterrno4(ENOENT));
1241 }
1242 }
1243
1244 /*
1245 * If it's a mountpoint, then traverse it.
1246 */
1247 if (vn_ismntpt(vp)) {
1248 if ((error = traverse(&vp)) != 0) {
1249 VN_RELE(vp);
1250 return (puterrno4(error));
1251 }
1252 /* remember that we had to traverse mountpoint */
1253 did_traverse = TRUE;
1254 different_export = 1;
1255 } else if (vp->v_vfsp != dvp->v_vfsp) {
1256 /*
1257 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1258 * then vp is probably an LOFS object. We don't need the
1259 * realvp, we just need to know that we might have crossed
1260 * a server fs boundary and need to call checkexport4.
1261 * (LOFS lookup hides server fs mountpoints, and actually calls
1262 * traverse)
1263 */
1264 different_export = 1;
1265 }
1266
1267 /*
1268 * Get the export information for it.
1269 */
1270 if (different_export) {
1271
1272 bzero(&fid, sizeof (fid));
1273 fid.fid_len = MAXFIDSZ;
1274 error = vop_fid_pseudo(vp, &fid);
1275 if (error) {
1276 VN_RELE(vp);
1277 return (puterrno4(error));
1278 }
1279
1280 /* We'll need to reassign "exi". */
1281 if (dotdot)
1282 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1283 else
1284 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1285
1286 if (exi == NULL) {
1287 if (did_traverse == TRUE) {
1288 /*
1289 * If this vnode is a mounted-on vnode,
1290 * but the mounted-on file system is not
1291 * exported, send back the secinfo for
1292 * the exported node that the mounted-on
1293 * vnode lives in.
1294 */
1295 exi = cs->exi;
1296 } else {
1297 VN_RELE(vp);
1298 return (puterrno4(EACCES));
1299 }
1300 }
1301 }
1302 ASSERT(exi != NULL);
1303
1304
1305 /*
1306 * Create the secinfo result based on the security information
1307 * from the exportinfo structure (exi).
1308 *
1309 * Return all flavors for a pseudo node.
1310 * For a real export node, return the flavor that the client
1311 * has access with.
1312 */
1313 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1314 if (PSEUDO(exi)) {
1315 count = exi->exi_export.ex_seccnt; /* total sec count */
1316 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1317 secp = exi->exi_export.ex_secinfo;
1318
1319 for (i = 0; i < count; i++) {
1320 si = &secp[i].s_secinfo;
1321 resok_val[i].flavor = si->sc_rpcnum;
1322 if (resok_val[i].flavor == RPCSEC_GSS) {
1323 rpcsec_gss_info *info;
1324
1325 info = &resok_val[i].flavor_info;
1326 info->qop = si->sc_qop;
1327 info->service = (rpc_gss_svc_t)si->sc_service;
1328
1329 /* get oid opaque data */
1330 info->oid.sec_oid4_len =
1331 si->sc_gss_mech_type->length;
1332 info->oid.sec_oid4_val = kmem_alloc(
1333 si->sc_gss_mech_type->length, KM_SLEEP);
1334 bcopy(
1335 si->sc_gss_mech_type->elements,
1336 info->oid.sec_oid4_val,
1337 info->oid.sec_oid4_len);
1338 }
1339 }
1340 resp->SECINFO4resok_len = count;
1341 resp->SECINFO4resok_val = resok_val;
1342 } else {
1343 int ret_cnt = 0, k = 0;
1344 int *flavor_list;
1345
1346 count = exi->exi_export.ex_seccnt; /* total sec count */
1347 secp = exi->exi_export.ex_secinfo;
1348
1349 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1350 /* find out which flavors to return */
1351 for (i = 0; i < count; i ++) {
1352 int access, flavor, perm;
1353
1354 flavor = secp[i].s_secinfo.sc_nfsnum;
1355 perm = secp[i].s_flags;
1356
1357 access = nfsauth4_secinfo_access(exi, cs->req,
1358 flavor, perm, cs->basecr);
1359
1360 if (! (access & NFSAUTH_DENIED) &&
1361 ! (access & NFSAUTH_WRONGSEC)) {
1362 flavor_list[ret_cnt] = flavor;
1363 ret_cnt++;
1364 }
1365 }
1366
1367 /* Create the returning SECINFO value */
1368 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1369
1370 for (i = 0; i < count; i++) {
1371 /*
1372 * If the flavor is in the flavor list,
1373 * fill in resok_val.
1374 */
1375 si = &secp[i].s_secinfo;
1376 if (in_flavor_list(si->sc_nfsnum,
1377 flavor_list, ret_cnt)) {
1378 resok_val[k].flavor = si->sc_rpcnum;
1379 if (resok_val[k].flavor == RPCSEC_GSS) {
1380 rpcsec_gss_info *info;
1381
1382 info = &resok_val[k].flavor_info;
1383 info->qop = si->sc_qop;
1384 info->service = (rpc_gss_svc_t)
1385 si->sc_service;
1386
1387 /* get oid opaque data */
1388 info->oid.sec_oid4_len =
1389 si->sc_gss_mech_type->length;
1390 info->oid.sec_oid4_val = kmem_alloc(
1391 si->sc_gss_mech_type->length,
1392 KM_SLEEP);
1393 bcopy(si->sc_gss_mech_type->elements,
1394 info->oid.sec_oid4_val,
1395 info->oid.sec_oid4_len);
1396 }
1397 k++;
1398 }
1399 if (k >= ret_cnt)
1400 break;
1401 }
1402 resp->SECINFO4resok_len = ret_cnt;
1403 resp->SECINFO4resok_val = resok_val;
1404 kmem_free(flavor_list, count * sizeof (int));
1405 }
1406
1407 VN_RELE(vp);
1408 return (NFS4_OK);
1409 }
1410
1411 /*
1412 * SECINFO (Operation 33): Obtain required security information on
1413 * the component name in the format of (security-mechanism-oid, qop, service)
1414 * triplets.
1415 */
1416 /* ARGSUSED */
1417 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1418 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1419 struct compound_state *cs)
1420 {
1421 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1422 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1423 utf8string *utfnm = &args->name;
1424 uint_t len;
1425 char *nm;
1426 struct sockaddr *ca;
1427 char *name = NULL;
1428 nfsstat4 status = NFS4_OK;
1429
1430 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1431 SECINFO4args *, args);
1432
1433 /*
1434 * Current file handle (cfh) should have been set before getting
1435 * into this function. If not, return error.
1436 */
1437 if (cs->vp == NULL) {
1438 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1439 goto out;
1440 }
1441
1442 if (cs->vp->v_type != VDIR) {
1443 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1444 goto out;
1445 }
1446
1447 /*
1448 * Verify the component name. If failed, error out, but
1449 * do not error out if the component name is a "..".
1450 * SECINFO will return its parents secinfo data for SECINFO "..".
1451 */
1452 status = utf8_dir_verify(utfnm);
1453 if (status != NFS4_OK) {
1454 if (utfnm->utf8string_len != 2 ||
1455 utfnm->utf8string_val[0] != '.' ||
1456 utfnm->utf8string_val[1] != '.') {
1457 *cs->statusp = resp->status = status;
1458 goto out;
1459 }
1460 }
1461
1462 nm = utf8_to_str(utfnm, &len, NULL);
1463 if (nm == NULL) {
1464 *cs->statusp = resp->status = NFS4ERR_INVAL;
1465 goto out;
1466 }
1467
1468 if (len > MAXNAMELEN) {
1469 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1470 kmem_free(nm, len);
1471 goto out;
1472 }
1473
1474 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1475 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1476 MAXPATHLEN + 1);
1477
1478 if (name == NULL) {
1479 *cs->statusp = resp->status = NFS4ERR_INVAL;
1480 kmem_free(nm, len);
1481 goto out;
1482 }
1483
1484 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1485
1486 if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1487 /*
1488 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1489 *
1490 * 2.6.3.1.1.8
1491 * SECINFO and SECINFO_NO_NAME consume the current
1492 * filehandle (note that this is a change from NFSv4.0).
1493 *
1494 * 18.29.3
1495 * On success, the current filehandle is consumed (see
1496 * Section 2.6.3.1.1.8), and if the next operation after
1497 * SECINFO tries to use the current filehandle, that
1498 * operation will fail with the status
1499 * NFS4ERR_NOFILEHANDLE.
1500 */
1501 VN_RELE(cs->vp);
1502 cs->vp = NULL;
1503 }
1504
1505 if (name != nm)
1506 kmem_free(name, MAXPATHLEN + 1);
1507 kmem_free(nm, len);
1508
1509 out:
1510 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1511 SECINFO4res *, resp);
1512 }
1513
1514 /*
1515 * Free SECINFO result.
1516 */
1517 /* ARGSUSED */
1518 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1519 rfs4_op_secinfo_free(nfs_resop4 *resop)
1520 {
1521 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1522 int count, i;
1523 secinfo4 *resok_val;
1524
1525 /* If this is not an Ok result, nothing to free. */
1526 if (resp->status != NFS4_OK) {
1527 return;
1528 }
1529
1530 count = resp->SECINFO4resok_len;
1531 resok_val = resp->SECINFO4resok_val;
1532
1533 for (i = 0; i < count; i++) {
1534 if (resok_val[i].flavor == RPCSEC_GSS) {
1535 rpcsec_gss_info *info;
1536
1537 info = &resok_val[i].flavor_info;
1538 kmem_free(info->oid.sec_oid4_val,
1539 info->oid.sec_oid4_len);
1540 }
1541 }
1542 kmem_free(resok_val, count * sizeof (secinfo4));
1543 resp->SECINFO4resok_len = 0;
1544 resp->SECINFO4resok_val = NULL;
1545 }
1546
1547 /* ARGSUSED */
1548 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1549 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1550 struct compound_state *cs)
1551 {
1552 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1553 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1554 int error;
1555 vnode_t *vp;
1556 struct vattr va;
1557 int checkwriteperm;
1558 cred_t *cr = cs->cr;
1559 bslabel_t *clabel, *slabel;
1560 ts_label_t *tslabel;
1561 boolean_t admin_low_client;
1562
1563 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1564 ACCESS4args *, args);
1565
1566 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1567 if (cs->access == CS_ACCESS_DENIED) {
1568 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1569 goto out;
1570 }
1571 #endif
1572 if (cs->vp == NULL) {
1573 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1574 goto out;
1575 }
1576
1577 ASSERT(cr != NULL);
1578
1579 vp = cs->vp;
1580
1581 /*
1582 * If the file system is exported read only, it is not appropriate
1583 * to check write permissions for regular files and directories.
1584 * Special files are interpreted by the client, so the underlying
1585 * permissions are sent back to the client for interpretation.
1586 */
1587 if (rdonly4(req, cs) &&
1588 (vp->v_type == VREG || vp->v_type == VDIR))
1589 checkwriteperm = 0;
1590 else
1591 checkwriteperm = 1;
1592
1593 /*
1594 * XXX
1595 * We need the mode so that we can correctly determine access
1596 * permissions relative to a mandatory lock file. Access to
1597 * mandatory lock files is denied on the server, so it might
1598 * as well be reflected to the server during the open.
1599 */
1600 va.va_mask = AT_MODE;
1601 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1602 if (error) {
1603 *cs->statusp = resp->status = puterrno4(error);
1604 goto out;
1605 }
1606 resp->access = 0;
1607 resp->supported = 0;
1608
1609 if (is_system_labeled()) {
1610 ASSERT(req->rq_label != NULL);
1611 clabel = req->rq_label;
1612 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1613 "got client label from request(1)",
1614 struct svc_req *, req);
1615 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1616 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1617 *cs->statusp = resp->status = puterrno4(EACCES);
1618 goto out;
1619 }
1620 slabel = label2bslabel(tslabel);
1621 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1622 char *, "got server label(1) for vp(2)",
1623 bslabel_t *, slabel, vnode_t *, vp);
1624
1625 admin_low_client = B_FALSE;
1626 } else
1627 admin_low_client = B_TRUE;
1628 }
1629
1630 if (args->access & ACCESS4_READ) {
1631 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1632 if (!error && !MANDLOCK(vp, va.va_mode) &&
1633 (!is_system_labeled() || admin_low_client ||
1634 bldominates(clabel, slabel)))
1635 resp->access |= ACCESS4_READ;
1636 resp->supported |= ACCESS4_READ;
1637 }
1638 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1639 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1640 if (!error && (!is_system_labeled() || admin_low_client ||
1641 bldominates(clabel, slabel)))
1642 resp->access |= ACCESS4_LOOKUP;
1643 resp->supported |= ACCESS4_LOOKUP;
1644 }
1645 if (checkwriteperm &&
1646 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1647 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1648 if (!error && !MANDLOCK(vp, va.va_mode) &&
1649 (!is_system_labeled() || admin_low_client ||
1650 blequal(clabel, slabel)))
1651 resp->access |=
1652 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1653 resp->supported |=
1654 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1655 }
1656
1657 if (checkwriteperm &&
1658 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1659 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1660 if (!error && (!is_system_labeled() || admin_low_client ||
1661 blequal(clabel, slabel)))
1662 resp->access |= ACCESS4_DELETE;
1663 resp->supported |= ACCESS4_DELETE;
1664 }
1665 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1666 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1667 if (!error && !MANDLOCK(vp, va.va_mode) &&
1668 (!is_system_labeled() || admin_low_client ||
1669 bldominates(clabel, slabel)))
1670 resp->access |= ACCESS4_EXECUTE;
1671 resp->supported |= ACCESS4_EXECUTE;
1672 }
1673
1674 if (is_system_labeled() && !admin_low_client)
1675 label_rele(tslabel);
1676
1677 *cs->statusp = resp->status = NFS4_OK;
1678 out:
1679 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1680 ACCESS4res *, resp);
1681 }
1682
1683 /* ARGSUSED */
1684 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1685 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1686 struct compound_state *cs)
1687 {
1688 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1689 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1690 int error;
1691 vnode_t *vp = cs->vp;
1692 cred_t *cr = cs->cr;
1693 vattr_t va;
1694 nfs4_srv_t *nsrv4;
1695
1696 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1697 COMMIT4args *, args);
1698
1699 if (vp == NULL) {
1700 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1701 goto out;
1702 }
1703 if (cs->access == CS_ACCESS_DENIED) {
1704 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1705 goto out;
1706 }
1707
1708 if (args->offset + args->count < args->offset) {
1709 *cs->statusp = resp->status = NFS4ERR_INVAL;
1710 goto out;
1711 }
1712
1713 va.va_mask = AT_UID;
1714 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1715
1716 /*
1717 * If we can't get the attributes, then we can't do the
1718 * right access checking. So, we'll fail the request.
1719 */
1720 if (error) {
1721 *cs->statusp = resp->status = puterrno4(error);
1722 goto out;
1723 }
1724 if (rdonly4(req, cs)) {
1725 *cs->statusp = resp->status = NFS4ERR_ROFS;
1726 goto out;
1727 }
1728
1729 if (vp->v_type != VREG) {
1730 if (vp->v_type == VDIR)
1731 resp->status = NFS4ERR_ISDIR;
1732 else
1733 resp->status = NFS4ERR_INVAL;
1734 *cs->statusp = resp->status;
1735 goto out;
1736 }
1737
1738 if (crgetuid(cr) != va.va_uid &&
1739 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1740 *cs->statusp = resp->status = puterrno4(error);
1741 goto out;
1742 }
1743
1744 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1745
1746 if (error) {
1747 *cs->statusp = resp->status = puterrno4(error);
1748 goto out;
1749 }
1750
1751 nsrv4 = nfs4_get_srv();
1752 *cs->statusp = resp->status = NFS4_OK;
1753 resp->writeverf = nsrv4->write4verf;
1754 out:
1755 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1756 COMMIT4res *, resp);
1757 }
1758
1759 /*
1760 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1761 * was completed. It does the nfsv4 create for special files.
1762 */
1763 /* ARGSUSED */
1764 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1765 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1766 struct compound_state *cs, vattr_t *vap, char *nm)
1767 {
1768 int error;
1769 cred_t *cr = cs->cr;
1770 vnode_t *dvp = cs->vp;
1771 vnode_t *vp = NULL;
1772 int mode;
1773 enum vcexcl excl;
1774
1775 switch (args->type) {
1776 case NF4CHR:
1777 case NF4BLK:
1778 if (secpolicy_sys_devices(cr) != 0) {
1779 *cs->statusp = resp->status = NFS4ERR_PERM;
1780 return (NULL);
1781 }
1782 if (args->type == NF4CHR)
1783 vap->va_type = VCHR;
1784 else
1785 vap->va_type = VBLK;
1786 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1787 args->ftype4_u.devdata.specdata2);
1788 vap->va_mask |= AT_RDEV;
1789 break;
1790 case NF4SOCK:
1791 vap->va_type = VSOCK;
1792 break;
1793 case NF4FIFO:
1794 vap->va_type = VFIFO;
1795 break;
1796 default:
1797 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1798 return (NULL);
1799 }
1800
1801 /*
1802 * Must specify the mode.
1803 */
1804 if (!(vap->va_mask & AT_MODE)) {
1805 *cs->statusp = resp->status = NFS4ERR_INVAL;
1806 return (NULL);
1807 }
1808
1809 excl = EXCL;
1810
1811 mode = 0;
1812
1813 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1814 if (error) {
1815 *cs->statusp = resp->status = puterrno4(error);
1816 return (NULL);
1817 }
1818 return (vp);
1819 }
1820
1821 /*
1822 * nfsv4 create is used to create non-regular files. For regular files,
1823 * use nfsv4 open.
1824 */
1825 /* ARGSUSED */
1826 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1827 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1828 struct compound_state *cs)
1829 {
1830 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1831 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1832 int error;
1833 struct vattr bva, iva, iva2, ava, *vap;
1834 cred_t *cr = cs->cr;
1835 vnode_t *dvp = cs->vp;
1836 vnode_t *vp = NULL;
1837 vnode_t *realvp;
1838 char *nm, *lnm;
1839 uint_t len, llen;
1840 int syncval = 0;
1841 struct nfs4_svgetit_arg sarg;
1842 struct nfs4_ntov_table ntov;
1843 struct statvfs64 sb;
1844 nfsstat4 status;
1845 struct sockaddr *ca;
1846 char *name = NULL;
1847 char *lname = NULL;
1848
1849 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1850 CREATE4args *, args);
1851
1852 resp->attrset = 0;
1853
1854 if (dvp == NULL) {
1855 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1856 goto out;
1857 }
1858
1859 /*
1860 * If there is an unshared filesystem mounted on this vnode,
1861 * do not allow to create an object in this directory.
1862 */
1863 if (vn_ismntpt(dvp)) {
1864 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1865 goto out;
1866 }
1867
1868 /* Verify that type is correct */
1869 switch (args->type) {
1870 case NF4LNK:
1871 case NF4BLK:
1872 case NF4CHR:
1873 case NF4SOCK:
1874 case NF4FIFO:
1875 case NF4DIR:
1876 break;
1877 default:
1878 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1879 goto out;
1880 };
1881
1882 if (cs->access == CS_ACCESS_DENIED) {
1883 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1884 goto out;
1885 }
1886 if (dvp->v_type != VDIR) {
1887 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1888 goto out;
1889 }
1890 status = utf8_dir_verify(&args->objname);
1891 if (status != NFS4_OK) {
1892 *cs->statusp = resp->status = status;
1893 goto out;
1894 }
1895
1896 if (rdonly4(req, cs)) {
1897 *cs->statusp = resp->status = NFS4ERR_ROFS;
1898 goto out;
1899 }
1900
1901 /*
1902 * Name of newly created object
1903 */
1904 nm = utf8_to_fn(&args->objname, &len, NULL);
1905 if (nm == NULL) {
1906 *cs->statusp = resp->status = NFS4ERR_INVAL;
1907 goto out;
1908 }
1909
1910 if (len > MAXNAMELEN) {
1911 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1912 kmem_free(nm, len);
1913 goto out;
1914 }
1915
1916 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1917 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1918 MAXPATHLEN + 1);
1919
1920 if (name == NULL) {
1921 *cs->statusp = resp->status = NFS4ERR_INVAL;
1922 kmem_free(nm, len);
1923 goto out;
1924 }
1925
1926 resp->attrset = 0;
1927
1928 sarg.sbp = &sb;
1929 sarg.is_referral = B_FALSE;
1930 nfs4_ntov_table_init(&ntov);
1931
1932 status = do_rfs4_set_attrs(&resp->attrset,
1933 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1934
1935 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1936 status = NFS4ERR_INVAL;
1937
1938 if (status != NFS4_OK) {
1939 *cs->statusp = resp->status = status;
1940 if (name != nm)
1941 kmem_free(name, MAXPATHLEN + 1);
1942 kmem_free(nm, len);
1943 nfs4_ntov_table_free(&ntov, &sarg);
1944 resp->attrset = 0;
1945 goto out;
1946 }
1947
1948 /* Get "before" change value */
1949 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1950 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1951 if (error) {
1952 *cs->statusp = resp->status = puterrno4(error);
1953 if (name != nm)
1954 kmem_free(name, MAXPATHLEN + 1);
1955 kmem_free(nm, len);
1956 nfs4_ntov_table_free(&ntov, &sarg);
1957 resp->attrset = 0;
1958 goto out;
1959 }
1960 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1961
1962 vap = sarg.vap;
1963
1964 /*
1965 * Set the default initial values for attributes when the parent
1966 * directory does not have the VSUID/VSGID bit set and they have
1967 * not been specified in createattrs.
1968 */
1969 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1970 vap->va_uid = crgetuid(cr);
1971 vap->va_mask |= AT_UID;
1972 }
1973 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1974 vap->va_gid = crgetgid(cr);
1975 vap->va_mask |= AT_GID;
1976 }
1977
1978 vap->va_mask |= AT_TYPE;
1979 switch (args->type) {
1980 case NF4DIR:
1981 vap->va_type = VDIR;
1982 if ((vap->va_mask & AT_MODE) == 0) {
1983 vap->va_mode = 0700; /* default: owner rwx only */
1984 vap->va_mask |= AT_MODE;
1985 }
1986 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1987 if (error)
1988 break;
1989
1990 /*
1991 * Get the initial "after" sequence number, if it fails,
1992 * set to zero
1993 */
1994 iva.va_mask = AT_SEQ;
1995 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1996 iva.va_seq = 0;
1997 break;
1998 case NF4LNK:
1999 vap->va_type = VLNK;
2000 if ((vap->va_mask & AT_MODE) == 0) {
2001 vap->va_mode = 0700; /* default: owner rwx only */
2002 vap->va_mask |= AT_MODE;
2003 }
2004
2005 /*
2006 * symlink names must be treated as data
2007 */
2008 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
2009 &llen, NULL);
2010
2011 if (lnm == NULL) {
2012 *cs->statusp = resp->status = NFS4ERR_INVAL;
2013 if (name != nm)
2014 kmem_free(name, MAXPATHLEN + 1);
2015 kmem_free(nm, len);
2016 nfs4_ntov_table_free(&ntov, &sarg);
2017 resp->attrset = 0;
2018 goto out;
2019 }
2020
2021 if (llen > MAXPATHLEN) {
2022 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2023 if (name != nm)
2024 kmem_free(name, MAXPATHLEN + 1);
2025 kmem_free(nm, len);
2026 kmem_free(lnm, llen);
2027 nfs4_ntov_table_free(&ntov, &sarg);
2028 resp->attrset = 0;
2029 goto out;
2030 }
2031
2032 lname = nfscmd_convname(ca, cs->exi, lnm,
2033 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2034
2035 if (lname == NULL) {
2036 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
2037 if (name != nm)
2038 kmem_free(name, MAXPATHLEN + 1);
2039 kmem_free(nm, len);
2040 kmem_free(lnm, llen);
2041 nfs4_ntov_table_free(&ntov, &sarg);
2042 resp->attrset = 0;
2043 goto out;
2044 }
2045
2046 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
2047 if (lname != lnm)
2048 kmem_free(lname, MAXPATHLEN + 1);
2049 kmem_free(lnm, llen);
2050 if (error)
2051 break;
2052
2053 /*
2054 * Get the initial "after" sequence number, if it fails,
2055 * set to zero
2056 */
2057 iva.va_mask = AT_SEQ;
2058 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2059 iva.va_seq = 0;
2060
2061 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2062 NULL, NULL, NULL);
2063 if (error)
2064 break;
2065
2066 /*
2067 * va_seq is not safe over VOP calls, check it again
2068 * if it has changed zero out iva to force atomic = FALSE.
2069 */
2070 iva2.va_mask = AT_SEQ;
2071 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2072 iva2.va_seq != iva.va_seq)
2073 iva.va_seq = 0;
2074 break;
2075 default:
2076 /*
2077 * probably a special file.
2078 */
2079 if ((vap->va_mask & AT_MODE) == 0) {
2080 vap->va_mode = 0600; /* default: owner rw only */
2081 vap->va_mask |= AT_MODE;
2082 }
2083 syncval = FNODSYNC;
2084 /*
2085 * We know this will only generate one VOP call
2086 */
2087 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2088
2089 if (vp == NULL) {
2090 if (name != nm)
2091 kmem_free(name, MAXPATHLEN + 1);
2092 kmem_free(nm, len);
2093 nfs4_ntov_table_free(&ntov, &sarg);
2094 resp->attrset = 0;
2095 goto out;
2096 }
2097
2098 /*
2099 * Get the initial "after" sequence number, if it fails,
2100 * set to zero
2101 */
2102 iva.va_mask = AT_SEQ;
2103 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2104 iva.va_seq = 0;
2105
2106 break;
2107 }
2108 if (name != nm)
2109 kmem_free(name, MAXPATHLEN + 1);
2110 kmem_free(nm, len);
2111
2112 if (error) {
2113 *cs->statusp = resp->status = puterrno4(error);
2114 }
2115
2116 /*
2117 * Force modified data and metadata out to stable storage.
2118 */
2119 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2120
2121 if (resp->status != NFS4_OK) {
2122 if (vp != NULL)
2123 VN_RELE(vp);
2124 nfs4_ntov_table_free(&ntov, &sarg);
2125 resp->attrset = 0;
2126 goto out;
2127 }
2128
2129 /*
2130 * Finish setup of cinfo response, "before" value already set.
2131 * Get "after" change value, if it fails, simply return the
2132 * before value.
2133 */
2134 ava.va_mask = AT_CTIME|AT_SEQ;
2135 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2136 ava.va_ctime = bva.va_ctime;
2137 ava.va_seq = 0;
2138 }
2139 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2140
2141 /*
2142 * True verification that object was created with correct
2143 * attrs is impossible. The attrs could have been changed
2144 * immediately after object creation. If attributes did
2145 * not verify, the only recourse for the server is to
2146 * destroy the object. Maybe if some attrs (like gid)
2147 * are set incorrectly, the object should be destroyed;
2148 * however, seems bad as a default policy. Do we really
2149 * want to destroy an object over one of the times not
2150 * verifying correctly? For these reasons, the server
2151 * currently sets bits in attrset for createattrs
2152 * that were set; however, no verification is done.
2153 *
2154 * vmask_to_nmask accounts for vattr bits set on create
2155 * [do_rfs4_set_attrs() only sets resp bits for
2156 * non-vattr/vfs bits.]
2157 * Mask off any bits set by default so as not to return
2158 * more attrset bits than were requested in createattrs
2159 */
2160 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2161 resp->attrset &= args->createattrs.attrmask;
2162 nfs4_ntov_table_free(&ntov, &sarg);
2163
2164 error = makefh4(&cs->fh, vp, cs->exi);
2165 if (error) {
2166 *cs->statusp = resp->status = puterrno4(error);
2167 }
2168
2169 /*
2170 * The cinfo.atomic = TRUE only if we got no errors, we have
2171 * non-zero va_seq's, and it has incremented by exactly one
2172 * during the creation and it didn't change during the VOP_LOOKUP
2173 * or VOP_FSYNC.
2174 */
2175 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2176 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2177 resp->cinfo.atomic = TRUE;
2178 else
2179 resp->cinfo.atomic = FALSE;
2180
2181 /*
2182 * Force modified metadata out to stable storage.
2183 *
2184 * if a underlying vp exists, pass it to VOP_FSYNC
2185 */
2186 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2187 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
2188 else
2189 (void) VOP_FSYNC(vp, syncval, cr, NULL);
2190
2191 if (resp->status != NFS4_OK) {
2192 VN_RELE(vp);
2193 goto out;
2194 }
2195 if (cs->vp)
2196 VN_RELE(cs->vp);
2197
2198 cs->vp = vp;
2199 *cs->statusp = resp->status = NFS4_OK;
2200 out:
2201 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2202 CREATE4res *, resp);
2203 }
2204
2205 /*ARGSUSED*/
2206 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2207 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2208 struct compound_state *cs)
2209 {
2210 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2211 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2212
2213 rfs4_op_inval(argop, resop, req, cs);
2214
2215 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2216 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2217 }
2218
2219 /*ARGSUSED*/
2220 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2221 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2222 struct compound_state *cs)
2223 {
2224 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2225 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2226 rfs4_deleg_state_t *dsp;
2227 nfsstat4 status;
2228
2229 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2230 DELEGRETURN4args *, args);
2231
2232 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2233 resp->status = *cs->statusp = status;
2234 if (status != NFS4_OK)
2235 goto out;
2236
2237 /* Ensure specified filehandle matches */
2238 if (cs->vp != dsp->rds_finfo->rf_vp) {
2239 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2240 } else
2241 rfs4_return_deleg(dsp, FALSE);
2242
2243 rfs4_update_lease(dsp->rds_client);
2244
2245 rfs4_deleg_state_rele(dsp);
2246 out:
2247 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2248 DELEGRETURN4res *, resp);
2249 }
2250
2251 /*
2252 * Check to see if a given "flavor" is an explicitly shared flavor.
2253 * The assumption of this routine is the "flavor" is already a valid
2254 * flavor in the secinfo list of "exi".
2255 *
2256 * e.g.
2257 * # share -o sec=flavor1 /export
2258 * # share -o sec=flavor2 /export/home
2259 *
2260 * flavor2 is not an explicitly shared flavor for /export,
2261 * however it is in the secinfo list for /export thru the
2262 * server namespace setup.
2263 */
2264 int
is_exported_sec(int flavor,struct exportinfo * exi)2265 is_exported_sec(int flavor, struct exportinfo *exi)
2266 {
2267 int i;
2268 struct secinfo *sp;
2269
2270 sp = exi->exi_export.ex_secinfo;
2271 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2272 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2273 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2274 return (SEC_REF_EXPORTED(&sp[i]));
2275 }
2276 }
2277
2278 /* Should not reach this point based on the assumption */
2279 return (0);
2280 }
2281
2282 /*
2283 * Check if the security flavor used in the request matches what is
2284 * required at the export point or at the root pseudo node (exi_root).
2285 *
2286 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2287 *
2288 */
2289 static int
secinfo_match_or_authnone(struct compound_state * cs)2290 secinfo_match_or_authnone(struct compound_state *cs)
2291 {
2292 int i;
2293 struct secinfo *sp;
2294
2295 /*
2296 * Check cs->nfsflavor (from the request) against
2297 * the current export data in cs->exi.
2298 */
2299 sp = cs->exi->exi_export.ex_secinfo;
2300 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2301 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2302 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2303 return (1);
2304 }
2305
2306 return (0);
2307 }
2308
2309 /*
2310 * Check the access authority for the client and return the correct error.
2311 */
2312 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)2313 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2314 {
2315 int authres;
2316
2317 /*
2318 * First, check if the security flavor used in the request
2319 * are among the flavors set in the server namespace.
2320 */
2321 if (!secinfo_match_or_authnone(cs)) {
2322 *cs->statusp = NFS4ERR_WRONGSEC;
2323 return (*cs->statusp);
2324 }
2325
2326 authres = checkauth4(cs, req);
2327
2328 if (authres > 0) {
2329 *cs->statusp = NFS4_OK;
2330 if (! (cs->access & CS_ACCESS_LIMITED))
2331 cs->access = CS_ACCESS_OK;
2332 } else if (authres == 0) {
2333 *cs->statusp = NFS4ERR_ACCESS;
2334 } else if (authres == -2) {
2335 *cs->statusp = NFS4ERR_WRONGSEC;
2336 } else {
2337 *cs->statusp = NFS4ERR_DELAY;
2338 }
2339 return (*cs->statusp);
2340 }
2341
2342 /*
2343 * bitmap4_to_attrmask is called by getattr and readdir.
2344 * It sets up the vattr mask and determines whether vfsstat call is needed
2345 * based on the input bitmap.
2346 * Returns nfsv4 status.
2347 */
2348 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2349 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2350 {
2351 int i;
2352 uint_t va_mask;
2353 struct statvfs64 *sbp = sargp->sbp;
2354
2355 sargp->sbp = NULL;
2356 sargp->flag = 0;
2357 sargp->rdattr_error = NFS4_OK;
2358 sargp->mntdfid_set = FALSE;
2359 if (sargp->cs->vp)
2360 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2361 FH4_ATTRDIR | FH4_NAMEDATTR);
2362 else
2363 sargp->xattr = 0;
2364
2365 /*
2366 * Set rdattr_error_req to true if return error per
2367 * failed entry rather than fail the readdir.
2368 */
2369 if (breq & FATTR4_RDATTR_ERROR_MASK)
2370 sargp->rdattr_error_req = 1;
2371 else
2372 sargp->rdattr_error_req = 0;
2373
2374 /*
2375 * generate the va_mask
2376 * Handle the easy cases first
2377 */
2378 switch (breq) {
2379 case NFS4_NTOV_ATTR_MASK:
2380 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2381 return (NFS4_OK);
2382
2383 case NFS4_FS_ATTR_MASK:
2384 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2385 sargp->sbp = sbp;
2386 return (NFS4_OK);
2387
2388 case NFS4_NTOV_ATTR_CACHE_MASK:
2389 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2390 return (NFS4_OK);
2391
2392 case FATTR4_LEASE_TIME_MASK:
2393 sargp->vap->va_mask = 0;
2394 return (NFS4_OK);
2395
2396 default:
2397 va_mask = 0;
2398 for (i = 0; i < nfs4_ntov_map_size; i++) {
2399 if ((breq & nfs4_ntov_map[i].fbit) &&
2400 nfs4_ntov_map[i].vbit)
2401 va_mask |= nfs4_ntov_map[i].vbit;
2402 }
2403
2404 /*
2405 * Check is vfsstat is needed
2406 */
2407 if (breq & NFS4_FS_ATTR_MASK)
2408 sargp->sbp = sbp;
2409
2410 sargp->vap->va_mask = va_mask;
2411 return (NFS4_OK);
2412 }
2413 /* NOTREACHED */
2414 }
2415
2416 /*
2417 * bitmap4_get_sysattrs is called by getattr and readdir.
2418 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2419 * Returns nfsv4 status.
2420 */
2421 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2422 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2423 {
2424 int error;
2425 struct compound_state *cs = sargp->cs;
2426 vnode_t *vp = cs->vp;
2427
2428 if (sargp->sbp != NULL) {
2429 error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2430 if (error != 0) {
2431 sargp->sbp = NULL; /* to identify error */
2432 return (puterrno4(error));
2433 }
2434 }
2435
2436 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2437 }
2438
2439 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2440 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2441 {
2442 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2443 KM_SLEEP);
2444 ntovp->attrcnt = 0;
2445 ntovp->vfsstat = FALSE;
2446 }
2447
2448 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2449 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2450 struct nfs4_svgetit_arg *sargp)
2451 {
2452 int i;
2453 union nfs4_attr_u *na;
2454 uint8_t *amap;
2455
2456 /*
2457 * XXX Should do the same checks for whether the bit is set
2458 */
2459 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2460 i < ntovp->attrcnt; i++, na++, amap++) {
2461 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2462 NFS4ATTR_FREEIT, sargp, na);
2463 }
2464 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2465 /*
2466 * xdr_free for getattr will be done later
2467 */
2468 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2469 i < ntovp->attrcnt; i++, na++, amap++) {
2470 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2471 }
2472 }
2473 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2474 }
2475
2476 /*
2477 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2478 */
2479 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2480 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2481 struct nfs4_svgetit_arg *sargp)
2482 {
2483 int error = 0;
2484 int i, k;
2485 struct nfs4_ntov_table ntov;
2486 XDR xdr;
2487 ulong_t xdr_size;
2488 char *xdr_attrs;
2489 nfsstat4 status = NFS4_OK;
2490 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2491 union nfs4_attr_u *na;
2492 uint8_t *amap;
2493
2494 sargp->op = NFS4ATTR_GETIT;
2495 sargp->flag = 0;
2496
2497 fattrp->attrmask = 0;
2498 /* if no bits requested, then return empty fattr4 */
2499 if (breq == 0) {
2500 fattrp->attrlist4_len = 0;
2501 fattrp->attrlist4 = NULL;
2502 return (NFS4_OK);
2503 }
2504
2505 /*
2506 * return NFS4ERR_INVAL when client requests write-only attrs
2507 */
2508 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2509 return (NFS4ERR_INVAL);
2510
2511 nfs4_ntov_table_init(&ntov);
2512 na = ntov.na;
2513 amap = ntov.amap;
2514
2515 /*
2516 * Now loop to get or verify the attrs
2517 */
2518 for (i = 0; i < nfs4_ntov_map_size; i++) {
2519 if (breq & nfs4_ntov_map[i].fbit) {
2520 if ((*nfs4_ntov_map[i].sv_getit)(
2521 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2522
2523 error = (*nfs4_ntov_map[i].sv_getit)(
2524 NFS4ATTR_GETIT, sargp, na);
2525
2526 /*
2527 * Possible error values:
2528 * >0 if sv_getit failed to
2529 * get the attr; 0 if succeeded;
2530 * <0 if rdattr_error and the
2531 * attribute cannot be returned.
2532 */
2533 if (error && !(sargp->rdattr_error_req))
2534 goto done;
2535 /*
2536 * If error then just for entry
2537 */
2538 if (error == 0) {
2539 fattrp->attrmask |=
2540 nfs4_ntov_map[i].fbit;
2541 *amap++ =
2542 (uint8_t)nfs4_ntov_map[i].nval;
2543 na++;
2544 (ntov.attrcnt)++;
2545 } else if ((error > 0) &&
2546 (sargp->rdattr_error == NFS4_OK)) {
2547 sargp->rdattr_error = puterrno4(error);
2548 }
2549 error = 0;
2550 }
2551 }
2552 }
2553
2554 /*
2555 * If rdattr_error was set after the return value for it was assigned,
2556 * update it.
2557 */
2558 if (prev_rdattr_error != sargp->rdattr_error) {
2559 na = ntov.na;
2560 amap = ntov.amap;
2561 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2562 k = *amap;
2563 if (k < FATTR4_RDATTR_ERROR) {
2564 continue;
2565 }
2566 if ((k == FATTR4_RDATTR_ERROR) &&
2567 ((*nfs4_ntov_map[k].sv_getit)(
2568 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2569
2570 (void) (*nfs4_ntov_map[k].sv_getit)(
2571 NFS4ATTR_GETIT, sargp, na);
2572 }
2573 break;
2574 }
2575 }
2576
2577 xdr_size = 0;
2578 na = ntov.na;
2579 amap = ntov.amap;
2580 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2581 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2582 }
2583
2584 fattrp->attrlist4_len = xdr_size;
2585 if (xdr_size) {
2586 /* freed by rfs4_op_getattr_free() */
2587 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2588
2589 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2590
2591 na = ntov.na;
2592 amap = ntov.amap;
2593 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2594 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2595 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2596 int, *amap);
2597 status = NFS4ERR_SERVERFAULT;
2598 break;
2599 }
2600 }
2601 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2602 } else {
2603 fattrp->attrlist4 = NULL;
2604 }
2605 done:
2606
2607 nfs4_ntov_table_free(&ntov, sargp);
2608
2609 if (error != 0)
2610 status = puterrno4(error);
2611
2612 return (status);
2613 }
2614
2615 /* ARGSUSED */
2616 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2617 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2618 struct compound_state *cs)
2619 {
2620 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2621 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2622 struct nfs4_svgetit_arg sarg;
2623 struct statvfs64 sb;
2624 nfsstat4 status;
2625
2626 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2627 GETATTR4args *, args);
2628
2629 if (cs->vp == NULL) {
2630 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2631 goto out;
2632 }
2633
2634 if (cs->access == CS_ACCESS_DENIED) {
2635 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2636 goto out;
2637 }
2638
2639 sarg.sbp = &sb;
2640 sarg.cs = cs;
2641 sarg.is_referral = B_FALSE;
2642
2643 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2644 if (status == NFS4_OK) {
2645
2646 status = bitmap4_get_sysattrs(&sarg);
2647 if (status == NFS4_OK) {
2648
2649 /* Is this a referral? */
2650 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2651 /* Older V4 Solaris client sees a link */
2652 if (client_is_downrev(req))
2653 sarg.vap->va_type = VLNK;
2654 else
2655 sarg.is_referral = B_TRUE;
2656 }
2657
2658 status = do_rfs4_op_getattr(args->attr_request,
2659 &resp->obj_attributes, &sarg);
2660 }
2661 }
2662 *cs->statusp = resp->status = status;
2663 out:
2664 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2665 GETATTR4res *, resp);
2666 }
2667
2668 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2669 rfs4_op_getattr_free(nfs_resop4 *resop)
2670 {
2671 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2672
2673 nfs4_fattr4_free(&resp->obj_attributes);
2674 }
2675
2676 /* ARGSUSED */
2677 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2678 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2679 struct compound_state *cs)
2680 {
2681 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2682
2683 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2684
2685 if (cs->vp == NULL) {
2686 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2687 goto out;
2688 }
2689 if (cs->access == CS_ACCESS_DENIED) {
2690 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2691 goto out;
2692 }
2693
2694 /* check for reparse point at the share point */
2695 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2696 /* it's all bad */
2697 cs->exi->exi_moved = 1;
2698 *cs->statusp = resp->status = NFS4ERR_MOVED;
2699 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2700 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2701 return;
2702 }
2703
2704 /* check for reparse point at vp */
2705 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2706 /* it's not all bad */
2707 *cs->statusp = resp->status = NFS4ERR_MOVED;
2708 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2709 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2710 return;
2711 }
2712
2713 resp->object.nfs_fh4_val =
2714 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2715 nfs_fh4_copy(&cs->fh, &resp->object);
2716 *cs->statusp = resp->status = NFS4_OK;
2717 out:
2718 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2719 GETFH4res *, resp);
2720 }
2721
2722 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2723 rfs4_op_getfh_free(nfs_resop4 *resop)
2724 {
2725 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2726
2727 if (resp->status == NFS4_OK &&
2728 resp->object.nfs_fh4_val != NULL) {
2729 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2730 resp->object.nfs_fh4_val = NULL;
2731 resp->object.nfs_fh4_len = 0;
2732 }
2733 }
2734
2735 /*
2736 * illegal: args: void
2737 * res : status (NFS4ERR_OP_ILLEGAL)
2738 */
2739 /* ARGSUSED */
2740 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2741 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2742 struct svc_req *req, struct compound_state *cs)
2743 {
2744 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2745
2746 resop->resop = OP_ILLEGAL;
2747 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2748 }
2749
2750 /* ARGSUSED */
2751 static void
rfs4_op_notsup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2752 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2753 struct compound_state *cs)
2754 {
2755 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2756 }
2757
2758 /*
2759 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2760 * res: status. If success - CURRENT_FH unchanged, return change_info
2761 */
2762 /* ARGSUSED */
2763 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2764 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2765 struct compound_state *cs)
2766 {
2767 LINK4args *args = &argop->nfs_argop4_u.oplink;
2768 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2769 int error;
2770 vnode_t *vp;
2771 vnode_t *dvp;
2772 struct vattr bdva, idva, adva;
2773 char *nm;
2774 uint_t len;
2775 struct sockaddr *ca;
2776 char *name = NULL;
2777 nfsstat4 status;
2778
2779 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2780 LINK4args *, args);
2781
2782 /* SAVED_FH: source object */
2783 vp = cs->saved_vp;
2784 if (vp == NULL) {
2785 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2786 goto out;
2787 }
2788
2789 /* CURRENT_FH: target directory */
2790 dvp = cs->vp;
2791 if (dvp == NULL) {
2792 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2793 goto out;
2794 }
2795
2796 /*
2797 * If there is a non-shared filesystem mounted on this vnode,
2798 * do not allow to link any file in this directory.
2799 */
2800 if (vn_ismntpt(dvp)) {
2801 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2802 goto out;
2803 }
2804
2805 if (cs->access == CS_ACCESS_DENIED) {
2806 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2807 goto out;
2808 }
2809
2810 /* Check source object's type validity */
2811 if (vp->v_type == VDIR) {
2812 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2813 goto out;
2814 }
2815
2816 /* Check target directory's type */
2817 if (dvp->v_type != VDIR) {
2818 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2819 goto out;
2820 }
2821
2822 if (cs->saved_exi != cs->exi) {
2823 *cs->statusp = resp->status = NFS4ERR_XDEV;
2824 goto out;
2825 }
2826
2827 status = utf8_dir_verify(&args->newname);
2828 if (status != NFS4_OK) {
2829 *cs->statusp = resp->status = status;
2830 goto out;
2831 }
2832
2833 nm = utf8_to_fn(&args->newname, &len, NULL);
2834 if (nm == NULL) {
2835 *cs->statusp = resp->status = NFS4ERR_INVAL;
2836 goto out;
2837 }
2838
2839 if (len > MAXNAMELEN) {
2840 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2841 kmem_free(nm, len);
2842 goto out;
2843 }
2844
2845 if (rdonly4(req, cs)) {
2846 *cs->statusp = resp->status = NFS4ERR_ROFS;
2847 kmem_free(nm, len);
2848 goto out;
2849 }
2850
2851 /* Get "before" change value */
2852 bdva.va_mask = AT_CTIME|AT_SEQ;
2853 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2854 if (error) {
2855 *cs->statusp = resp->status = puterrno4(error);
2856 kmem_free(nm, len);
2857 goto out;
2858 }
2859
2860 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2861 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2862 MAXPATHLEN + 1);
2863
2864 if (name == NULL) {
2865 *cs->statusp = resp->status = NFS4ERR_INVAL;
2866 kmem_free(nm, len);
2867 goto out;
2868 }
2869
2870 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2871
2872 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2873
2874 if (nm != name)
2875 kmem_free(name, MAXPATHLEN + 1);
2876 kmem_free(nm, len);
2877
2878 /*
2879 * Get the initial "after" sequence number, if it fails, set to zero
2880 */
2881 idva.va_mask = AT_SEQ;
2882 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2883 idva.va_seq = 0;
2884
2885 /*
2886 * Force modified data and metadata out to stable storage.
2887 */
2888 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2889 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2890
2891 if (error) {
2892 *cs->statusp = resp->status = puterrno4(error);
2893 goto out;
2894 }
2895
2896 /*
2897 * Get "after" change value, if it fails, simply return the
2898 * before value.
2899 */
2900 adva.va_mask = AT_CTIME|AT_SEQ;
2901 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2902 adva.va_ctime = bdva.va_ctime;
2903 adva.va_seq = 0;
2904 }
2905
2906 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2907
2908 /*
2909 * The cinfo.atomic = TRUE only if we have
2910 * non-zero va_seq's, and it has incremented by exactly one
2911 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2912 */
2913 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2914 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2915 resp->cinfo.atomic = TRUE;
2916 else
2917 resp->cinfo.atomic = FALSE;
2918
2919 *cs->statusp = resp->status = NFS4_OK;
2920 out:
2921 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2922 LINK4res *, resp);
2923 }
2924
2925 /*
2926 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2927 */
2928
2929 /* ARGSUSED */
2930 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2931 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2932 {
2933 int error;
2934 int different_export = 0;
2935 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2936 struct exportinfo *exi = NULL, *pre_exi = NULL;
2937 nfsstat4 stat;
2938 fid_t fid;
2939 int attrdir, dotdot, walk;
2940 bool_t is_newvp = FALSE;
2941
2942 if (cs->vp->v_flag & V_XATTRDIR) {
2943 attrdir = 1;
2944 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2945 } else {
2946 attrdir = 0;
2947 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2948 }
2949
2950 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2951
2952 /*
2953 * If dotdotting, then need to check whether it's
2954 * above the root of a filesystem, or above an
2955 * export point.
2956 */
2957 if (dotdot) {
2958 vnode_t *zone_rootvp;
2959
2960 ASSERT(cs->exi != NULL);
2961 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2962 /*
2963 * If dotdotting at the root of a filesystem, then
2964 * need to traverse back to the mounted-on filesystem
2965 * and do the dotdot lookup there.
2966 */
2967 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2968
2969 /*
2970 * If at the system root, then can
2971 * go up no further.
2972 */
2973 if (VN_CMP(cs->vp, zone_rootvp))
2974 return (puterrno4(ENOENT));
2975
2976 /*
2977 * Traverse back to the mounted-on filesystem
2978 */
2979 cs->vp = untraverse(cs->vp, zone_rootvp);
2980
2981 /*
2982 * Set the different_export flag so we remember
2983 * to pick up a new exportinfo entry for
2984 * this new filesystem.
2985 */
2986 different_export = 1;
2987 } else {
2988
2989 /*
2990 * If dotdotting above an export point then set
2991 * the different_export to get new export info.
2992 */
2993 different_export = nfs_exported(cs->exi, cs->vp);
2994 }
2995 }
2996
2997 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2998 NULL, NULL, NULL);
2999 if (error)
3000 return (puterrno4(error));
3001
3002 /*
3003 * If the vnode is in a pseudo filesystem, check whether it is visible.
3004 *
3005 * XXX if the vnode is a symlink and it is not visible in
3006 * a pseudo filesystem, return ENOENT (not following symlink).
3007 * V4 client can not mount such symlink. This is a regression
3008 * from V2/V3.
3009 *
3010 * In the same exported filesystem, if the security flavor used
3011 * is not an explicitly shared flavor, limit the view to the visible
3012 * list entries only. This is not a WRONGSEC case because it's already
3013 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
3014 */
3015 if (!different_export &&
3016 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
3017 cs->access & CS_ACCESS_LIMITED)) {
3018 if (! nfs_visible(cs->exi, vp, &different_export)) {
3019 VN_RELE(vp);
3020 return (puterrno4(ENOENT));
3021 }
3022 }
3023
3024 /*
3025 * If it's a mountpoint, then traverse it.
3026 */
3027 if (vn_ismntpt(vp)) {
3028 pre_exi = cs->exi; /* save pre-traversed exportinfo */
3029 pre_tvp = vp; /* save pre-traversed vnode */
3030
3031 /*
3032 * hold pre_tvp to counteract rele by traverse. We will
3033 * need pre_tvp below if checkexport4 fails
3034 */
3035 VN_HOLD(pre_tvp);
3036 if ((error = traverse(&vp)) != 0) {
3037 VN_RELE(vp);
3038 VN_RELE(pre_tvp);
3039 return (puterrno4(error));
3040 }
3041 different_export = 1;
3042 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
3043 /*
3044 * The vfsp comparison is to handle the case where
3045 * a LOFS mount is shared. lo_lookup traverses mount points,
3046 * and NFS is unaware of local fs transistions because
3047 * v_vfsmountedhere isn't set. For this special LOFS case,
3048 * the dir and the obj returned by lookup will have different
3049 * vfs ptrs.
3050 */
3051 different_export = 1;
3052 }
3053
3054 if (different_export) {
3055
3056 bzero(&fid, sizeof (fid));
3057 fid.fid_len = MAXFIDSZ;
3058 error = vop_fid_pseudo(vp, &fid);
3059 if (error) {
3060 VN_RELE(vp);
3061 if (pre_tvp)
3062 VN_RELE(pre_tvp);
3063 return (puterrno4(error));
3064 }
3065
3066 if (dotdot)
3067 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3068 else
3069 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3070
3071 if (exi == NULL) {
3072 if (pre_tvp) {
3073 /*
3074 * If this vnode is a mounted-on vnode,
3075 * but the mounted-on file system is not
3076 * exported, send back the filehandle for
3077 * the mounted-on vnode, not the root of
3078 * the mounted-on file system.
3079 */
3080 VN_RELE(vp);
3081 vp = pre_tvp;
3082 exi = pre_exi;
3083 } else {
3084 VN_RELE(vp);
3085 return (puterrno4(EACCES));
3086 }
3087 } else if (pre_tvp) {
3088 /* we're done with pre_tvp now. release extra hold */
3089 VN_RELE(pre_tvp);
3090 }
3091
3092 cs->exi = exi;
3093
3094 /*
3095 * Now we do a checkauth4. The reason is that
3096 * this client/user may not have access to the new
3097 * exported file system, and if they do,
3098 * the client/user may be mapped to a different uid.
3099 *
3100 * We start with a new cr, because the checkauth4 done
3101 * in the PUT*FH operation over wrote the cred's uid,
3102 * gid, etc, and we want the real thing before calling
3103 * checkauth4()
3104 */
3105 crfree(cs->cr);
3106 cs->cr = crdup(cs->basecr);
3107
3108 oldvp = cs->vp;
3109 cs->vp = vp;
3110 is_newvp = TRUE;
3111
3112 stat = call_checkauth4(cs, req);
3113 if (stat != NFS4_OK) {
3114 VN_RELE(cs->vp);
3115 cs->vp = oldvp;
3116 return (stat);
3117 }
3118 }
3119
3120 /*
3121 * After various NFS checks, do a label check on the path
3122 * component. The label on this path should either be the
3123 * global zone's label or a zone's label. We are only
3124 * interested in the zone's label because exported files
3125 * in global zone is accessible (though read-only) to
3126 * clients. The exportability/visibility check is already
3127 * done before reaching this code.
3128 */
3129 if (is_system_labeled()) {
3130 bslabel_t *clabel;
3131
3132 ASSERT(req->rq_label != NULL);
3133 clabel = req->rq_label;
3134 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3135 "got client label from request(1)", struct svc_req *, req);
3136
3137 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3138 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3139 cs->exi)) {
3140 error = EACCES;
3141 goto err_out;
3142 }
3143 } else {
3144 /*
3145 * We grant access to admin_low label clients
3146 * only if the client is trusted, i.e. also
3147 * running Solaris Trusted Extension.
3148 */
3149 struct sockaddr *ca;
3150 int addr_type;
3151 void *ipaddr;
3152 tsol_tpc_t *tp;
3153
3154 ca = (struct sockaddr *)svc_getrpccaller(
3155 req->rq_xprt)->buf;
3156 if (ca->sa_family == AF_INET) {
3157 addr_type = IPV4_VERSION;
3158 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3159 } else if (ca->sa_family == AF_INET6) {
3160 addr_type = IPV6_VERSION;
3161 ipaddr = &((struct sockaddr_in6 *)
3162 ca)->sin6_addr;
3163 }
3164 tp = find_tpc(ipaddr, addr_type, B_FALSE);
3165 if (tp == NULL || tp->tpc_tp.tp_doi !=
3166 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3167 SUN_CIPSO) {
3168 if (tp != NULL)
3169 TPC_RELE(tp);
3170 error = EACCES;
3171 goto err_out;
3172 }
3173 TPC_RELE(tp);
3174 }
3175 }
3176
3177 error = makefh4(&cs->fh, vp, cs->exi);
3178
3179 err_out:
3180 if (error) {
3181 if (is_newvp) {
3182 VN_RELE(cs->vp);
3183 cs->vp = oldvp;
3184 } else
3185 VN_RELE(vp);
3186 return (puterrno4(error));
3187 }
3188
3189 if (!is_newvp) {
3190 if (cs->vp)
3191 VN_RELE(cs->vp);
3192 cs->vp = vp;
3193 } else if (oldvp)
3194 VN_RELE(oldvp);
3195
3196 /*
3197 * if did lookup on attrdir and didn't lookup .., set named
3198 * attr fh flag
3199 */
3200 if (attrdir && ! dotdot)
3201 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3202
3203 /* Assume false for now, open proc will set this */
3204 cs->mandlock = FALSE;
3205
3206 return (NFS4_OK);
3207 }
3208
3209 /* ARGSUSED */
3210 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3211 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3212 struct compound_state *cs)
3213 {
3214 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3215 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3216 char *nm;
3217 uint_t len;
3218 struct sockaddr *ca;
3219 char *name = NULL;
3220 nfsstat4 status;
3221
3222 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3223 LOOKUP4args *, args);
3224
3225 if (cs->vp == NULL) {
3226 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227 goto out;
3228 }
3229
3230 if (cs->vp->v_type == VLNK) {
3231 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
3232 goto out;
3233 }
3234
3235 if (cs->vp->v_type != VDIR) {
3236 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3237 goto out;
3238 }
3239
3240 status = utf8_dir_verify(&args->objname);
3241 if (status != NFS4_OK) {
3242 *cs->statusp = resp->status = status;
3243 goto out;
3244 }
3245
3246 nm = utf8_to_str(&args->objname, &len, NULL);
3247 if (nm == NULL) {
3248 *cs->statusp = resp->status = NFS4ERR_INVAL;
3249 goto out;
3250 }
3251
3252 if (len > MAXNAMELEN) {
3253 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3254 kmem_free(nm, len);
3255 goto out;
3256 }
3257
3258 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3259 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3260 MAXPATHLEN + 1);
3261
3262 if (name == NULL) {
3263 *cs->statusp = resp->status = NFS4ERR_INVAL;
3264 kmem_free(nm, len);
3265 goto out;
3266 }
3267
3268 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3269
3270 if (name != nm)
3271 kmem_free(name, MAXPATHLEN + 1);
3272 kmem_free(nm, len);
3273
3274 out:
3275 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3276 LOOKUP4res *, resp);
3277 }
3278
3279 /* ARGSUSED */
3280 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3281 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3282 struct compound_state *cs)
3283 {
3284 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3285
3286 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3287
3288 if (cs->vp == NULL) {
3289 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3290 goto out;
3291 }
3292
3293 if (cs->vp->v_type == VLNK) {
3294 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
3295 goto out;
3296 }
3297
3298 if (cs->vp->v_type != VDIR) {
3299 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3300 goto out;
3301 }
3302
3303 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3304
3305 /*
3306 * From NFSV4 Specification, LOOKUPP should not check for
3307 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3308 */
3309 if (resp->status == NFS4ERR_WRONGSEC) {
3310 *cs->statusp = resp->status = NFS4_OK;
3311 }
3312
3313 out:
3314 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3315 LOOKUPP4res *, resp);
3316 }
3317
3318
3319 /*ARGSUSED2*/
3320 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3321 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3322 struct compound_state *cs)
3323 {
3324 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3325 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3326 vnode_t *avp = NULL;
3327 int lookup_flags = LOOKUP_XATTR, error;
3328 int exp_ro = 0;
3329
3330 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3331 OPENATTR4args *, args);
3332
3333 if (cs->vp == NULL) {
3334 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3335 goto out;
3336 }
3337
3338 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3339 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3340 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3341 goto out;
3342 }
3343
3344 /*
3345 * If file system supports passing ACE mask to VOP_ACCESS then
3346 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3347 */
3348
3349 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3350 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3351 V_ACE_MASK, cs->cr, NULL);
3352 else
3353 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3354 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3355 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3356
3357 if (error) {
3358 *cs->statusp = resp->status = puterrno4(EACCES);
3359 goto out;
3360 }
3361
3362 /*
3363 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3364 * the file system is exported read-only -- regardless of
3365 * createdir flag. Otherwise the attrdir would be created
3366 * (assuming server fs isn't mounted readonly locally). If
3367 * VOP_LOOKUP returns ENOENT in this case, the error will
3368 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3369 * because specfs has no VOP_LOOKUP op, so the macro would
3370 * return ENOSYS. EINVAL is returned by all (current)
3371 * Solaris file system implementations when any of their
3372 * restrictions are violated (xattr(dir) can't have xattrdir).
3373 * Returning NOTSUPP is more appropriate in this case
3374 * because the object will never be able to have an attrdir.
3375 */
3376 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3377 lookup_flags |= CREATE_XATTR_DIR;
3378
3379 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3380 NULL, NULL, NULL);
3381
3382 if (error) {
3383 if (error == ENOENT && args->createdir && exp_ro)
3384 *cs->statusp = resp->status = puterrno4(EROFS);
3385 else if (error == EINVAL || error == ENOSYS)
3386 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3387 else
3388 *cs->statusp = resp->status = puterrno4(error);
3389 goto out;
3390 }
3391
3392 ASSERT(avp->v_flag & V_XATTRDIR);
3393
3394 error = makefh4(&cs->fh, avp, cs->exi);
3395
3396 if (error) {
3397 VN_RELE(avp);
3398 *cs->statusp = resp->status = puterrno4(error);
3399 goto out;
3400 }
3401
3402 VN_RELE(cs->vp);
3403 cs->vp = avp;
3404
3405 /*
3406 * There is no requirement for an attrdir fh flag
3407 * because the attrdir has a vnode flag to distinguish
3408 * it from regular (non-xattr) directories. The
3409 * FH4_ATTRDIR flag is set for future sanity checks.
3410 */
3411 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3412 *cs->statusp = resp->status = NFS4_OK;
3413
3414 out:
3415 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3416 OPENATTR4res *, resp);
3417 }
3418
3419 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3420 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3421 caller_context_t *ct)
3422 {
3423 int error;
3424 int i;
3425 clock_t delaytime;
3426
3427 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3428
3429 /*
3430 * Don't block on mandatory locks. If this routine returns
3431 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3432 */
3433 uio->uio_fmode = FNONBLOCK;
3434
3435 for (i = 0; i < rfs4_maxlock_tries; i++) {
3436
3437
3438 if (direction == FREAD) {
3439 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3440 error = VOP_READ(vp, uio, ioflag, cred, ct);
3441 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3442 } else {
3443 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3444 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3445 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3446 }
3447
3448 if (error != EAGAIN)
3449 break;
3450
3451 if (i < rfs4_maxlock_tries - 1) {
3452 delay(delaytime);
3453 delaytime *= 2;
3454 }
3455 }
3456
3457 return (error);
3458 }
3459
3460 /* ARGSUSED */
3461 static void
rfs4_op_read(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3462 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3463 struct compound_state *cs)
3464 {
3465 READ4args *args = &argop->nfs_argop4_u.opread;
3466 READ4res *resp = &resop->nfs_resop4_u.opread;
3467 int error;
3468 int verror;
3469 vnode_t *vp;
3470 struct vattr va;
3471 struct iovec iov, *iovp = NULL;
3472 int iovcnt;
3473 struct uio uio;
3474 u_offset_t offset;
3475 bool_t *deleg = &cs->deleg;
3476 nfsstat4 stat;
3477 int in_crit = 0;
3478 mblk_t *mp = NULL;
3479 int alloc_err = 0;
3480 int rdma_used = 0;
3481 int loaned_buffers;
3482 caller_context_t ct;
3483 struct uio *uiop;
3484
3485 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3486 READ4args, args);
3487
3488 vp = cs->vp;
3489 if (vp == NULL) {
3490 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3491 goto out;
3492 }
3493 if (cs->access == CS_ACCESS_DENIED) {
3494 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3495 goto out;
3496 }
3497
3498 get_stateid4(cs, &args->stateid);
3499
3500 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3501 deleg, TRUE, &ct, cs)) != NFS4_OK) {
3502 *cs->statusp = resp->status = stat;
3503 goto out;
3504 }
3505
3506 /*
3507 * Enter the critical region before calling VOP_RWLOCK
3508 * to avoid a deadlock with write requests.
3509 */
3510 if (nbl_need_check(vp)) {
3511 nbl_start_crit(vp, RW_READER);
3512 in_crit = 1;
3513 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3514 &ct)) {
3515 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3516 goto out;
3517 }
3518 }
3519
3520 if (args->wlist) {
3521 if (args->count > clist_len(args->wlist)) {
3522 *cs->statusp = resp->status = NFS4ERR_INVAL;
3523 goto out;
3524 }
3525 rdma_used = 1;
3526 }
3527
3528 /* use loaned buffers for TCP */
3529 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3530
3531 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3532 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3533
3534 /*
3535 * If we can't get the attributes, then we can't do the
3536 * right access checking. So, we'll fail the request.
3537 */
3538 if (verror) {
3539 *cs->statusp = resp->status = puterrno4(verror);
3540 goto out;
3541 }
3542
3543 if (vp->v_type != VREG) {
3544 *cs->statusp = resp->status =
3545 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3546 goto out;
3547 }
3548
3549 if (crgetuid(cs->cr) != va.va_uid &&
3550 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3551 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3552 *cs->statusp = resp->status = puterrno4(error);
3553 goto out;
3554 }
3555
3556 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3557 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3558 goto out;
3559 }
3560
3561 offset = args->offset;
3562 if (offset >= va.va_size) {
3563 *cs->statusp = resp->status = NFS4_OK;
3564 resp->eof = TRUE;
3565 resp->data_len = 0;
3566 resp->data_val = NULL;
3567 resp->mblk = NULL;
3568 /* RDMA */
3569 resp->wlist = args->wlist;
3570 resp->wlist_len = resp->data_len;
3571 *cs->statusp = resp->status = NFS4_OK;
3572 if (resp->wlist)
3573 clist_zero_len(resp->wlist);
3574 goto out;
3575 }
3576
3577 if (args->count == 0) {
3578 *cs->statusp = resp->status = NFS4_OK;
3579 resp->eof = FALSE;
3580 resp->data_len = 0;
3581 resp->data_val = NULL;
3582 resp->mblk = NULL;
3583 /* RDMA */
3584 resp->wlist = args->wlist;
3585 resp->wlist_len = resp->data_len;
3586 if (resp->wlist)
3587 clist_zero_len(resp->wlist);
3588 goto out;
3589 }
3590
3591 /*
3592 * Do not allocate memory more than maximum allowed
3593 * transfer size
3594 */
3595 if (args->count > rfs4_tsize(req))
3596 args->count = rfs4_tsize(req);
3597
3598 if (loaned_buffers) {
3599 uiop = (uio_t *)rfs_setup_xuio(vp);
3600 ASSERT(uiop != NULL);
3601 uiop->uio_segflg = UIO_SYSSPACE;
3602 uiop->uio_loffset = args->offset;
3603 uiop->uio_resid = args->count;
3604
3605 /* Jump to do the read if successful */
3606 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3607 /*
3608 * Need to hold the vnode until after VOP_RETZCBUF()
3609 * is called.
3610 */
3611 VN_HOLD(vp);
3612 goto doio_read;
3613 }
3614
3615 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3616 uiop->uio_loffset, int, uiop->uio_resid);
3617
3618 uiop->uio_extflg = 0;
3619
3620 /* failure to setup for zero copy */
3621 rfs_free_xuio((void *)uiop);
3622 loaned_buffers = 0;
3623 }
3624
3625 /*
3626 * If returning data via RDMA Write, then grab the chunk list. If we
3627 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3628 */
3629 if (rdma_used) {
3630 mp = NULL;
3631 (void) rdma_get_wchunk(req, &iov, args->wlist);
3632 uio.uio_iov = &iov;
3633 uio.uio_iovcnt = 1;
3634 } else {
3635 /*
3636 * mp will contain the data to be sent out in the read reply.
3637 * It will be freed after the reply has been sent.
3638 */
3639 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3640 ASSERT(mp != NULL);
3641 ASSERT(alloc_err == 0);
3642 uio.uio_iov = iovp;
3643 uio.uio_iovcnt = iovcnt;
3644 }
3645
3646 uio.uio_segflg = UIO_SYSSPACE;
3647 uio.uio_extflg = UIO_COPY_CACHED;
3648 uio.uio_loffset = args->offset;
3649 uio.uio_resid = args->count;
3650 uiop = &uio;
3651
3652 doio_read:
3653 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3654
3655 va.va_mask = AT_SIZE;
3656 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3657
3658 if (error) {
3659 if (mp)
3660 freemsg(mp);
3661 *cs->statusp = resp->status = puterrno4(error);
3662 goto out;
3663 }
3664
3665 /* make mblk using zc buffers */
3666 if (loaned_buffers) {
3667 mp = uio_to_mblk(uiop);
3668 ASSERT(mp != NULL);
3669 }
3670
3671 *cs->statusp = resp->status = NFS4_OK;
3672
3673 ASSERT(uiop->uio_resid >= 0);
3674 resp->data_len = args->count - uiop->uio_resid;
3675 if (mp) {
3676 resp->data_val = (char *)mp->b_datap->db_base;
3677 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3678 } else {
3679 resp->data_val = (caddr_t)iov.iov_base;
3680 }
3681
3682 resp->mblk = mp;
3683
3684 if (!verror && offset + resp->data_len == va.va_size)
3685 resp->eof = TRUE;
3686 else
3687 resp->eof = FALSE;
3688
3689 if (rdma_used) {
3690 if (!rdma_setup_read_data4(args, resp)) {
3691 *cs->statusp = resp->status = NFS4ERR_INVAL;
3692 }
3693 } else {
3694 resp->wlist = NULL;
3695 }
3696
3697 out:
3698 if (in_crit)
3699 nbl_end_crit(vp);
3700
3701 if (iovp != NULL)
3702 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3703
3704 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3705 READ4res *, resp);
3706 }
3707
3708 static void
rfs4_op_read_free(nfs_resop4 * resop)3709 rfs4_op_read_free(nfs_resop4 *resop)
3710 {
3711 READ4res *resp = &resop->nfs_resop4_u.opread;
3712
3713 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3714 freemsg(resp->mblk);
3715 resp->mblk = NULL;
3716 resp->data_val = NULL;
3717 resp->data_len = 0;
3718 }
3719 }
3720
3721 static void
rfs4_op_readdir_free(nfs_resop4 * resop)3722 rfs4_op_readdir_free(nfs_resop4 * resop)
3723 {
3724 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3725
3726 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3727 freeb(resp->mblk);
3728 resp->mblk = NULL;
3729 resp->data_len = 0;
3730 }
3731 }
3732
3733
3734 /* ARGSUSED */
3735 static void
rfs4_op_putpubfh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3736 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3737 struct compound_state *cs)
3738 {
3739 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3740 int error;
3741 vnode_t *vp;
3742 struct exportinfo *exi, *sav_exi;
3743 nfs_fh4_fmt_t *fh_fmtp;
3744 nfs_export_t *ne = nfs_get_export();
3745
3746 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3747
3748 if (cs->vp) {
3749 VN_RELE(cs->vp);
3750 cs->vp = NULL;
3751 }
3752
3753 if (cs->cr)
3754 crfree(cs->cr);
3755
3756 cs->cr = crdup(cs->basecr);
3757
3758 vp = ne->exi_public->exi_vp;
3759 if (vp == NULL) {
3760 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3761 goto out;
3762 }
3763
3764 error = makefh4(&cs->fh, vp, ne->exi_public);
3765 if (error != 0) {
3766 *cs->statusp = resp->status = puterrno4(error);
3767 goto out;
3768 }
3769 sav_exi = cs->exi;
3770 if (ne->exi_public == ne->exi_root) {
3771 /*
3772 * No filesystem is actually shared public, so we default
3773 * to exi_root. In this case, we must check whether root
3774 * is exported.
3775 */
3776 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3777
3778 /*
3779 * if root filesystem is exported, the exportinfo struct that we
3780 * should use is what checkexport4 returns, because root_exi is
3781 * actually a mostly empty struct.
3782 */
3783 exi = checkexport4(&fh_fmtp->fh4_fsid,
3784 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3785 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3786 } else {
3787 /*
3788 * it's a properly shared filesystem
3789 */
3790 cs->exi = ne->exi_public;
3791 }
3792
3793 if (is_system_labeled()) {
3794 bslabel_t *clabel;
3795
3796 ASSERT(req->rq_label != NULL);
3797 clabel = req->rq_label;
3798 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3799 "got client label from request(1)",
3800 struct svc_req *, req);
3801 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3802 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3803 cs->exi)) {
3804 *cs->statusp = resp->status =
3805 NFS4ERR_SERVERFAULT;
3806 goto out;
3807 }
3808 }
3809 }
3810
3811 VN_HOLD(vp);
3812 cs->vp = vp;
3813
3814 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3815 VN_RELE(cs->vp);
3816 cs->vp = NULL;
3817 cs->exi = sav_exi;
3818 goto out;
3819 }
3820
3821 *cs->statusp = resp->status = NFS4_OK;
3822 out:
3823 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3824 PUTPUBFH4res *, resp);
3825 }
3826
3827 /*
3828 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3829 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3830 * or joe have restrictive search permissions, then we shouldn't let
3831 * the client get a file handle. This is easy to enforce. However, we
3832 * don't know what security flavor should be used until we resolve the
3833 * path name. Another complication is uid mapping. If root is
3834 * the user, then it will be mapped to the anonymous user by default,
3835 * but we won't know that till we've resolved the path name. And we won't
3836 * know what the anonymous user is.
3837 * Luckily, SECINFO is specified to take a full filename.
3838 * So what we will have to in rfs4_op_lookup is check that flavor of
3839 * the target object matches that of the request, and if root was the
3840 * caller, check for the root= and anon= options, and if necessary,
3841 * repeat the lookup using the right cred_t. But that's not done yet.
3842 */
3843 /* ARGSUSED */
3844 static void
rfs4_op_putfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3845 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3846 struct compound_state *cs)
3847 {
3848 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3849 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3850 nfs_fh4_fmt_t *fh_fmtp;
3851
3852 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3853 PUTFH4args *, args);
3854
3855 if (cs->vp) {
3856 VN_RELE(cs->vp);
3857 cs->vp = NULL;
3858 }
3859
3860 if (cs->cr) {
3861 crfree(cs->cr);
3862 cs->cr = NULL;
3863 }
3864
3865
3866 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3867 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3868 goto out;
3869 }
3870
3871 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3872 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3873 NULL);
3874
3875 if (cs->exi == NULL) {
3876 *cs->statusp = resp->status = NFS4ERR_STALE;
3877 goto out;
3878 }
3879
3880 cs->cr = crdup(cs->basecr);
3881
3882 ASSERT(cs->cr != NULL);
3883
3884 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3885 *cs->statusp = resp->status;
3886 goto out;
3887 }
3888
3889 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3890 VN_RELE(cs->vp);
3891 cs->vp = NULL;
3892 goto out;
3893 }
3894
3895 nfs_fh4_copy(&args->object, &cs->fh);
3896 *cs->statusp = resp->status = NFS4_OK;
3897 cs->deleg = FALSE;
3898
3899 out:
3900 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3901 PUTFH4res *, resp);
3902 }
3903
3904 /* ARGSUSED */
3905 static void
rfs4_op_putrootfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3906 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3907 struct compound_state *cs)
3908 {
3909 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3910 int error;
3911 fid_t fid;
3912 struct exportinfo *exi, *sav_exi;
3913
3914 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3915
3916 if (cs->vp) {
3917 VN_RELE(cs->vp);
3918 cs->vp = NULL;
3919 }
3920
3921 if (cs->cr)
3922 crfree(cs->cr);
3923
3924 cs->cr = crdup(cs->basecr);
3925
3926 /*
3927 * Using rootdir, the system root vnode,
3928 * get its fid.
3929 */
3930 bzero(&fid, sizeof (fid));
3931 fid.fid_len = MAXFIDSZ;
3932 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3933 if (error != 0) {
3934 *cs->statusp = resp->status = puterrno4(error);
3935 goto out;
3936 }
3937
3938 /*
3939 * Then use the root fsid & fid it to find out if it's exported
3940 *
3941 * If the server root isn't exported directly, then
3942 * it should at least be a pseudo export based on
3943 * one or more exports further down in the server's
3944 * file tree.
3945 */
3946 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3947 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3948 NFS4_DEBUG(rfs4_debug,
3949 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3950 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3951 goto out;
3952 }
3953
3954 /*
3955 * Now make a filehandle based on the root
3956 * export and root vnode.
3957 */
3958 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3959 if (error != 0) {
3960 *cs->statusp = resp->status = puterrno4(error);
3961 goto out;
3962 }
3963
3964 sav_exi = cs->exi;
3965 cs->exi = exi;
3966
3967 VN_HOLD(ZONE_ROOTVP());
3968 cs->vp = ZONE_ROOTVP();
3969
3970 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3971 VN_RELE(cs->vp);
3972 cs->vp = NULL;
3973 cs->exi = sav_exi;
3974 goto out;
3975 }
3976
3977 *cs->statusp = resp->status = NFS4_OK;
3978 cs->deleg = FALSE;
3979 out:
3980 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3981 PUTROOTFH4res *, resp);
3982 }
3983
3984 /*
3985 * readlink: args: CURRENT_FH.
3986 * res: status. If success - CURRENT_FH unchanged, return linktext.
3987 */
3988
3989 /* ARGSUSED */
3990 static void
rfs4_op_readlink(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3991 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3992 struct compound_state *cs)
3993 {
3994 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3995 int error;
3996 vnode_t *vp;
3997 struct iovec iov;
3998 struct vattr va;
3999 struct uio uio;
4000 char *data;
4001 struct sockaddr *ca;
4002 char *name = NULL;
4003 int is_referral;
4004
4005 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
4006
4007 /* CURRENT_FH: directory */
4008 vp = cs->vp;
4009 if (vp == NULL) {
4010 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4011 goto out;
4012 }
4013
4014 if (cs->access == CS_ACCESS_DENIED) {
4015 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4016 goto out;
4017 }
4018
4019 /* Is it a referral? */
4020 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
4021
4022 is_referral = 1;
4023
4024 } else {
4025
4026 is_referral = 0;
4027
4028 if (vp->v_type == VDIR) {
4029 *cs->statusp = resp->status = NFS4ERR_ISDIR;
4030 goto out;
4031 }
4032
4033 if (vp->v_type != VLNK) {
4034 *cs->statusp = resp->status = NFS4ERR_INVAL;
4035 goto out;
4036 }
4037
4038 }
4039
4040 va.va_mask = AT_MODE;
4041 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
4042 if (error) {
4043 *cs->statusp = resp->status = puterrno4(error);
4044 goto out;
4045 }
4046
4047 if (MANDLOCK(vp, va.va_mode)) {
4048 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4049 goto out;
4050 }
4051
4052 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
4053
4054 if (is_referral) {
4055 char *s;
4056 size_t strsz;
4057 kstat_named_t *stat =
4058 cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
4059
4060 /* Get an artificial symlink based on a referral */
4061 s = build_symlink(vp, cs->cr, &strsz);
4062 stat[NFS_REFERLINKS].value.ui64++;
4063 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
4064 vnode_t *, vp, char *, s);
4065 if (s == NULL)
4066 error = EINVAL;
4067 else {
4068 error = 0;
4069 (void) strlcpy(data, s, MAXPATHLEN + 1);
4070 kmem_free(s, strsz);
4071 }
4072
4073 } else {
4074
4075 iov.iov_base = data;
4076 iov.iov_len = MAXPATHLEN;
4077 uio.uio_iov = &iov;
4078 uio.uio_iovcnt = 1;
4079 uio.uio_segflg = UIO_SYSSPACE;
4080 uio.uio_extflg = UIO_COPY_CACHED;
4081 uio.uio_loffset = 0;
4082 uio.uio_resid = MAXPATHLEN;
4083
4084 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4085
4086 if (!error)
4087 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
4088 }
4089
4090 if (error) {
4091 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4092 *cs->statusp = resp->status = puterrno4(error);
4093 goto out;
4094 }
4095
4096 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4097 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4098 MAXPATHLEN + 1);
4099
4100 if (name == NULL) {
4101 /*
4102 * Even though the conversion failed, we return
4103 * something. We just don't translate it.
4104 */
4105 name = data;
4106 }
4107
4108 /*
4109 * treat link name as data
4110 */
4111 (void) str_to_utf8(name, (utf8string *)&resp->link);
4112
4113 if (name != data)
4114 kmem_free(name, MAXPATHLEN + 1);
4115 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4116 *cs->statusp = resp->status = NFS4_OK;
4117
4118 out:
4119 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4120 READLINK4res *, resp);
4121 }
4122
4123 static void
rfs4_op_readlink_free(nfs_resop4 * resop)4124 rfs4_op_readlink_free(nfs_resop4 *resop)
4125 {
4126 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4127 utf8string *symlink = (utf8string *)&resp->link;
4128
4129 if (symlink->utf8string_val) {
4130 UTF8STRING_FREE(*symlink)
4131 }
4132 }
4133
4134 /*
4135 * release_lockowner:
4136 * Release any state associated with the supplied
4137 * lockowner. Note if any lo_state is holding locks we will not
4138 * rele that lo_state and thus the lockowner will not be destroyed.
4139 * A client using lock after the lock owner stateid has been released
4140 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4141 * to reissue the lock with new_lock_owner set to TRUE.
4142 * args: lock_owner
4143 * res: status
4144 */
4145 /* ARGSUSED */
4146 static void
rfs4_op_release_lockowner(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4147 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4148 struct svc_req *req, struct compound_state *cs)
4149 {
4150 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4151 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4152 rfs4_lockowner_t *lo;
4153 rfs4_openowner_t *oo;
4154 rfs4_state_t *sp;
4155 rfs4_lo_state_t *lsp;
4156 rfs4_client_t *cp;
4157 bool_t create = FALSE;
4158 locklist_t *llist;
4159 sysid_t sysid;
4160
4161 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4162 cs, RELEASE_LOCKOWNER4args *, ap);
4163
4164 /* Make sure there is a clientid around for this request */
4165 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4166
4167 if (cp == NULL) {
4168 *cs->statusp = resp->status =
4169 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4170 goto out;
4171 }
4172 rfs4_client_rele(cp);
4173
4174 lo = rfs4_findlockowner(&ap->lock_owner, &create);
4175 if (lo == NULL) {
4176 *cs->statusp = resp->status = NFS4_OK;
4177 goto out;
4178 }
4179 ASSERT(lo->rl_client != NULL);
4180
4181 /*
4182 * Check for EXPIRED client. If so will reap state with in a lease
4183 * period or on next set_clientid_confirm step
4184 */
4185 if (rfs4_lease_expired(lo->rl_client)) {
4186 rfs4_lockowner_rele(lo);
4187 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4188 goto out;
4189 }
4190
4191 /*
4192 * If no sysid has been assigned, then no locks exist; just return.
4193 */
4194 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4195 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4196 rfs4_lockowner_rele(lo);
4197 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4198 goto out;
4199 }
4200
4201 sysid = lo->rl_client->rc_sysidt;
4202 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4203
4204 /*
4205 * Mark the lockowner invalid.
4206 */
4207 rfs4_dbe_hide(lo->rl_dbe);
4208
4209 /*
4210 * sysid-pid pair should now not be used since the lockowner is
4211 * invalid. If the client were to instantiate the lockowner again
4212 * it would be assigned a new pid. Thus we can get the list of
4213 * current locks.
4214 */
4215
4216 llist = flk_get_active_locks(sysid, lo->rl_pid);
4217 /* If we are still holding locks fail */
4218 if (llist != NULL) {
4219
4220 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4221
4222 flk_free_locklist(llist);
4223 /*
4224 * We need to unhide the lockowner so the client can
4225 * try it again. The bad thing here is if the client
4226 * has a logic error that took it here in the first place
4227 * they probably have lost accounting of the locks that it
4228 * is holding. So we may have dangling state until the
4229 * open owner state is reaped via close. One scenario
4230 * that could possibly occur is that the client has
4231 * sent the unlock request(s) in separate threads
4232 * and has not waited for the replies before sending the
4233 * RELEASE_LOCKOWNER request. Presumably, it would expect
4234 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4235 * reissuing the request.
4236 */
4237 rfs4_dbe_unhide(lo->rl_dbe);
4238 rfs4_lockowner_rele(lo);
4239 goto out;
4240 }
4241
4242 /*
4243 * For the corresponding client we need to check each open
4244 * owner for any opens that have lockowner state associated
4245 * with this lockowner.
4246 */
4247
4248 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4249 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4250 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4251
4252 rfs4_dbe_lock(oo->ro_dbe);
4253 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4254 sp = list_next(&oo->ro_statelist, sp)) {
4255
4256 rfs4_dbe_lock(sp->rs_dbe);
4257 for (lsp = list_head(&sp->rs_lostatelist);
4258 lsp != NULL;
4259 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4260 if (lsp->rls_locker == lo) {
4261 rfs4_dbe_lock(lsp->rls_dbe);
4262 rfs4_dbe_invalidate(lsp->rls_dbe);
4263 rfs4_dbe_unlock(lsp->rls_dbe);
4264 }
4265 }
4266 rfs4_dbe_unlock(sp->rs_dbe);
4267 }
4268 rfs4_dbe_unlock(oo->ro_dbe);
4269 }
4270 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4271
4272 rfs4_lockowner_rele(lo);
4273
4274 *cs->statusp = resp->status = NFS4_OK;
4275
4276 out:
4277 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4278 cs, RELEASE_LOCKOWNER4res *, resp);
4279 }
4280
4281 /*
4282 * short utility function to lookup a file and recall the delegation
4283 */
4284 static rfs4_file_t *
rfs4_lookup_and_findfile(vnode_t * dvp,char * nm,vnode_t ** vpp,int * lkup_error,cred_t * cr)4285 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4286 int *lkup_error, cred_t *cr)
4287 {
4288 vnode_t *vp;
4289 rfs4_file_t *fp = NULL;
4290 bool_t fcreate = FALSE;
4291 int error;
4292
4293 if (vpp)
4294 *vpp = NULL;
4295
4296 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4297 NULL)) == 0) {
4298 if (vp->v_type == VREG)
4299 fp = rfs4_findfile(vp, NULL, &fcreate);
4300 if (vpp)
4301 *vpp = vp;
4302 else
4303 VN_RELE(vp);
4304 }
4305
4306 if (lkup_error)
4307 *lkup_error = error;
4308
4309 return (fp);
4310 }
4311
4312 /*
4313 * remove: args: CURRENT_FH: directory; name.
4314 * res: status. If success - CURRENT_FH unchanged, return change_info
4315 * for directory.
4316 */
4317 /* ARGSUSED */
4318 static void
rfs4_op_remove(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4319 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4320 struct compound_state *cs)
4321 {
4322 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4323 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4324 int error;
4325 vnode_t *dvp, *vp;
4326 struct vattr bdva, idva, adva;
4327 char *nm;
4328 uint_t len;
4329 rfs4_file_t *fp;
4330 int in_crit = 0;
4331 bslabel_t *clabel;
4332 struct sockaddr *ca;
4333 char *name = NULL;
4334 nfsstat4 status;
4335
4336 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4337 REMOVE4args *, args);
4338
4339 /* CURRENT_FH: directory */
4340 dvp = cs->vp;
4341 if (dvp == NULL) {
4342 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4343 goto out;
4344 }
4345
4346 if (cs->access == CS_ACCESS_DENIED) {
4347 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4348 goto out;
4349 }
4350
4351 /*
4352 * If there is an unshared filesystem mounted on this vnode,
4353 * Do not allow to remove anything in this directory.
4354 */
4355 if (vn_ismntpt(dvp)) {
4356 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4357 goto out;
4358 }
4359
4360 if (dvp->v_type != VDIR) {
4361 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4362 goto out;
4363 }
4364
4365 status = utf8_dir_verify(&args->target);
4366 if (status != NFS4_OK) {
4367 *cs->statusp = resp->status = status;
4368 goto out;
4369 }
4370
4371 /*
4372 * Lookup the file so that we can check if it's a directory
4373 */
4374 nm = utf8_to_fn(&args->target, &len, NULL);
4375 if (nm == NULL) {
4376 *cs->statusp = resp->status = NFS4ERR_INVAL;
4377 goto out;
4378 }
4379
4380 if (len > MAXNAMELEN) {
4381 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4382 kmem_free(nm, len);
4383 goto out;
4384 }
4385
4386 if (rdonly4(req, cs)) {
4387 *cs->statusp = resp->status = NFS4ERR_ROFS;
4388 kmem_free(nm, len);
4389 goto out;
4390 }
4391
4392 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4393 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4394 MAXPATHLEN + 1);
4395
4396 if (name == NULL) {
4397 *cs->statusp = resp->status = NFS4ERR_INVAL;
4398 kmem_free(nm, len);
4399 goto out;
4400 }
4401
4402 /*
4403 * Lookup the file to determine type and while we are see if
4404 * there is a file struct around and check for delegation.
4405 * We don't need to acquire va_seq before this lookup, if
4406 * it causes an update, cinfo.before will not match, which will
4407 * trigger a cache flush even if atomic is TRUE.
4408 */
4409 fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4410 if (fp != NULL) {
4411 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4412 NULL)) {
4413 VN_RELE(vp);
4414 rfs4_file_rele(fp);
4415 *cs->statusp = resp->status = NFS4ERR_DELAY;
4416 if (nm != name)
4417 kmem_free(name, MAXPATHLEN + 1);
4418 kmem_free(nm, len);
4419 goto out;
4420 }
4421 }
4422
4423 /* Didn't find anything to remove */
4424 if (vp == NULL) {
4425 *cs->statusp = resp->status = error;
4426 if (nm != name)
4427 kmem_free(name, MAXPATHLEN + 1);
4428 kmem_free(nm, len);
4429 goto out;
4430 }
4431
4432 if (nbl_need_check(vp)) {
4433 nbl_start_crit(vp, RW_READER);
4434 in_crit = 1;
4435 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4436 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4437 if (nm != name)
4438 kmem_free(name, MAXPATHLEN + 1);
4439 kmem_free(nm, len);
4440 nbl_end_crit(vp);
4441 VN_RELE(vp);
4442 if (fp) {
4443 rfs4_clear_dont_grant(fp);
4444 rfs4_file_rele(fp);
4445 }
4446 goto out;
4447 }
4448 }
4449
4450 /* check label before allowing removal */
4451 if (is_system_labeled()) {
4452 ASSERT(req->rq_label != NULL);
4453 clabel = req->rq_label;
4454 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4455 "got client label from request(1)",
4456 struct svc_req *, req);
4457 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4458 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4459 cs->exi)) {
4460 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4461 if (name != nm)
4462 kmem_free(name, MAXPATHLEN + 1);
4463 kmem_free(nm, len);
4464 if (in_crit)
4465 nbl_end_crit(vp);
4466 VN_RELE(vp);
4467 if (fp) {
4468 rfs4_clear_dont_grant(fp);
4469 rfs4_file_rele(fp);
4470 }
4471 goto out;
4472 }
4473 }
4474 }
4475
4476 /* Get dir "before" change value */
4477 bdva.va_mask = AT_CTIME|AT_SEQ;
4478 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4479 if (error) {
4480 *cs->statusp = resp->status = puterrno4(error);
4481 if (nm != name)
4482 kmem_free(name, MAXPATHLEN + 1);
4483 kmem_free(nm, len);
4484 if (in_crit)
4485 nbl_end_crit(vp);
4486 VN_RELE(vp);
4487 if (fp) {
4488 rfs4_clear_dont_grant(fp);
4489 rfs4_file_rele(fp);
4490 }
4491 goto out;
4492 }
4493 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4494
4495 /* Actually do the REMOVE operation */
4496 if (vp->v_type == VDIR) {
4497 /*
4498 * Can't remove a directory that has a mounted-on filesystem.
4499 */
4500 if (vn_ismntpt(vp)) {
4501 error = EACCES;
4502 } else {
4503 /*
4504 * System V defines rmdir to return EEXIST,
4505 * not ENOTEMPTY, if the directory is not
4506 * empty. A System V NFS server needs to map
4507 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4508 * transmit over the wire.
4509 */
4510 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4511 NULL, 0)) == EEXIST)
4512 error = ENOTEMPTY;
4513 }
4514 } else {
4515 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4516 fp != NULL) {
4517 struct vattr va;
4518 vnode_t *tvp;
4519
4520 rfs4_dbe_lock(fp->rf_dbe);
4521 tvp = fp->rf_vp;
4522 if (tvp)
4523 VN_HOLD(tvp);
4524 rfs4_dbe_unlock(fp->rf_dbe);
4525
4526 if (tvp) {
4527 /*
4528 * This is va_seq safe because we are not
4529 * manipulating dvp.
4530 */
4531 va.va_mask = AT_NLINK;
4532 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4533 va.va_nlink == 0) {
4534 /* Remove state on file remove */
4535 if (in_crit) {
4536 nbl_end_crit(vp);
4537 in_crit = 0;
4538 }
4539 rfs4_close_all_state(fp);
4540 }
4541 VN_RELE(tvp);
4542 }
4543 }
4544 }
4545
4546 if (in_crit)
4547 nbl_end_crit(vp);
4548 VN_RELE(vp);
4549
4550 if (fp) {
4551 rfs4_clear_dont_grant(fp);
4552 rfs4_file_rele(fp);
4553 }
4554 if (nm != name)
4555 kmem_free(name, MAXPATHLEN + 1);
4556 kmem_free(nm, len);
4557
4558 if (error) {
4559 *cs->statusp = resp->status = puterrno4(error);
4560 goto out;
4561 }
4562
4563 /*
4564 * Get the initial "after" sequence number, if it fails, set to zero
4565 */
4566 idva.va_mask = AT_SEQ;
4567 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4568 idva.va_seq = 0;
4569
4570 /*
4571 * Force modified data and metadata out to stable storage.
4572 */
4573 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4574
4575 /*
4576 * Get "after" change value, if it fails, simply return the
4577 * before value.
4578 */
4579 adva.va_mask = AT_CTIME|AT_SEQ;
4580 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4581 adva.va_ctime = bdva.va_ctime;
4582 adva.va_seq = 0;
4583 }
4584
4585 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4586
4587 /*
4588 * The cinfo.atomic = TRUE only if we have
4589 * non-zero va_seq's, and it has incremented by exactly one
4590 * during the VOP_REMOVE/RMDIR and it didn't change during
4591 * the VOP_FSYNC.
4592 */
4593 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4594 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4595 resp->cinfo.atomic = TRUE;
4596 else
4597 resp->cinfo.atomic = FALSE;
4598
4599 *cs->statusp = resp->status = NFS4_OK;
4600
4601 out:
4602 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4603 REMOVE4res *, resp);
4604 }
4605
4606 /*
4607 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4608 * oldname and newname.
4609 * res: status. If success - CURRENT_FH unchanged, return change_info
4610 * for both from and target directories.
4611 */
4612 /* ARGSUSED */
4613 static void
rfs4_op_rename(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4614 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4615 struct compound_state *cs)
4616 {
4617 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4618 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4619 int error;
4620 vnode_t *odvp;
4621 vnode_t *ndvp;
4622 vnode_t *srcvp, *targvp, *tvp;
4623 struct vattr obdva, oidva, oadva;
4624 struct vattr nbdva, nidva, nadva;
4625 char *onm, *nnm;
4626 uint_t olen, nlen;
4627 rfs4_file_t *fp, *sfp;
4628 int in_crit_src, in_crit_targ;
4629 int fp_rele_grant_hold, sfp_rele_grant_hold;
4630 int unlinked;
4631 bslabel_t *clabel;
4632 struct sockaddr *ca;
4633 char *converted_onm = NULL;
4634 char *converted_nnm = NULL;
4635 nfsstat4 status;
4636
4637 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4638 RENAME4args *, args);
4639
4640 fp = sfp = NULL;
4641 srcvp = targvp = tvp = NULL;
4642 in_crit_src = in_crit_targ = 0;
4643 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4644 unlinked = 0;
4645
4646 /* CURRENT_FH: target directory */
4647 ndvp = cs->vp;
4648 if (ndvp == NULL) {
4649 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4650 goto out;
4651 }
4652
4653 /* SAVED_FH: from directory */
4654 odvp = cs->saved_vp;
4655 if (odvp == NULL) {
4656 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4657 goto out;
4658 }
4659
4660 if (cs->access == CS_ACCESS_DENIED) {
4661 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4662 goto out;
4663 }
4664
4665 /*
4666 * If there is an unshared filesystem mounted on this vnode,
4667 * do not allow to rename objects in this directory.
4668 */
4669 if (vn_ismntpt(odvp)) {
4670 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4671 goto out;
4672 }
4673
4674 /*
4675 * If there is an unshared filesystem mounted on this vnode,
4676 * do not allow to rename to this directory.
4677 */
4678 if (vn_ismntpt(ndvp)) {
4679 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4680 goto out;
4681 }
4682
4683 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4684 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4685 goto out;
4686 }
4687
4688 if (cs->saved_exi != cs->exi) {
4689 *cs->statusp = resp->status = NFS4ERR_XDEV;
4690 goto out;
4691 }
4692
4693 status = utf8_dir_verify(&args->oldname);
4694 if (status != NFS4_OK) {
4695 *cs->statusp = resp->status = status;
4696 goto out;
4697 }
4698
4699 status = utf8_dir_verify(&args->newname);
4700 if (status != NFS4_OK) {
4701 *cs->statusp = resp->status = status;
4702 goto out;
4703 }
4704
4705 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4706 if (onm == NULL) {
4707 *cs->statusp = resp->status = NFS4ERR_INVAL;
4708 goto out;
4709 }
4710 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4711 nlen = MAXPATHLEN + 1;
4712 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4713 nlen);
4714
4715 if (converted_onm == NULL) {
4716 *cs->statusp = resp->status = NFS4ERR_INVAL;
4717 kmem_free(onm, olen);
4718 goto out;
4719 }
4720
4721 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4722 if (nnm == NULL) {
4723 *cs->statusp = resp->status = NFS4ERR_INVAL;
4724 if (onm != converted_onm)
4725 kmem_free(converted_onm, MAXPATHLEN + 1);
4726 kmem_free(onm, olen);
4727 goto out;
4728 }
4729 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4730 MAXPATHLEN + 1);
4731
4732 if (converted_nnm == NULL) {
4733 *cs->statusp = resp->status = NFS4ERR_INVAL;
4734 kmem_free(nnm, nlen);
4735 nnm = NULL;
4736 if (onm != converted_onm)
4737 kmem_free(converted_onm, MAXPATHLEN + 1);
4738 kmem_free(onm, olen);
4739 goto out;
4740 }
4741
4742
4743 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4744 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4745 kmem_free(onm, olen);
4746 kmem_free(nnm, nlen);
4747 goto out;
4748 }
4749
4750
4751 if (rdonly4(req, cs)) {
4752 *cs->statusp = resp->status = NFS4ERR_ROFS;
4753 if (onm != converted_onm)
4754 kmem_free(converted_onm, MAXPATHLEN + 1);
4755 kmem_free(onm, olen);
4756 if (nnm != converted_nnm)
4757 kmem_free(converted_nnm, MAXPATHLEN + 1);
4758 kmem_free(nnm, nlen);
4759 goto out;
4760 }
4761
4762 /* check label of the target dir */
4763 if (is_system_labeled()) {
4764 ASSERT(req->rq_label != NULL);
4765 clabel = req->rq_label;
4766 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4767 "got client label from request(1)",
4768 struct svc_req *, req);
4769 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4770 if (!do_rfs_label_check(clabel, ndvp,
4771 EQUALITY_CHECK, cs->exi)) {
4772 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4773 goto err_out;
4774 }
4775 }
4776 }
4777
4778 /*
4779 * Is the source a file and have a delegation?
4780 * We don't need to acquire va_seq before these lookups, if
4781 * it causes an update, cinfo.before will not match, which will
4782 * trigger a cache flush even if atomic is TRUE.
4783 */
4784 sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4785 &error, cs->cr);
4786 if (sfp != NULL) {
4787 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4788 NULL)) {
4789 *cs->statusp = resp->status = NFS4ERR_DELAY;
4790 goto err_out;
4791 }
4792 }
4793
4794 if (srcvp == NULL) {
4795 *cs->statusp = resp->status = puterrno4(error);
4796 if (onm != converted_onm)
4797 kmem_free(converted_onm, MAXPATHLEN + 1);
4798 kmem_free(onm, olen);
4799 if (nnm != converted_nnm)
4800 kmem_free(converted_nnm, MAXPATHLEN + 1);
4801 kmem_free(nnm, nlen);
4802 goto out;
4803 }
4804
4805 sfp_rele_grant_hold = 1;
4806
4807 /* Does the destination exist and a file and have a delegation? */
4808 fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4809 cs->cr);
4810 if (fp != NULL) {
4811 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4812 NULL)) {
4813 *cs->statusp = resp->status = NFS4ERR_DELAY;
4814 goto err_out;
4815 }
4816 }
4817 fp_rele_grant_hold = 1;
4818
4819 /* Check for NBMAND lock on both source and target */
4820 if (nbl_need_check(srcvp)) {
4821 nbl_start_crit(srcvp, RW_READER);
4822 in_crit_src = 1;
4823 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4824 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4825 goto err_out;
4826 }
4827 }
4828
4829 if (targvp && nbl_need_check(targvp)) {
4830 nbl_start_crit(targvp, RW_READER);
4831 in_crit_targ = 1;
4832 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4833 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4834 goto err_out;
4835 }
4836 }
4837
4838 /* Get source "before" change value */
4839 obdva.va_mask = AT_CTIME|AT_SEQ;
4840 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4841 if (!error) {
4842 nbdva.va_mask = AT_CTIME|AT_SEQ;
4843 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4844 }
4845 if (error) {
4846 *cs->statusp = resp->status = puterrno4(error);
4847 goto err_out;
4848 }
4849
4850 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4851 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4852
4853 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4854 NULL, 0);
4855
4856 /*
4857 * If target existed and was unlinked by VOP_RENAME, state will need
4858 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4859 * any necessary nbl_end_crit on srcvp and tgtvp.
4860 */
4861 if (error == 0 && fp != NULL) {
4862 rfs4_dbe_lock(fp->rf_dbe);
4863 tvp = fp->rf_vp;
4864 if (tvp)
4865 VN_HOLD(tvp);
4866 rfs4_dbe_unlock(fp->rf_dbe);
4867
4868 if (tvp) {
4869 struct vattr va;
4870 va.va_mask = AT_NLINK;
4871
4872 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4873 va.va_nlink == 0) {
4874 unlinked = 1;
4875
4876 /* DEBUG data */
4877 if ((srcvp == targvp) || (tvp != targvp)) {
4878 cmn_err(CE_WARN, "rfs4_op_rename: "
4879 "srcvp %p, targvp: %p, tvp: %p",
4880 (void *)srcvp, (void *)targvp,
4881 (void *)tvp);
4882 }
4883 } else {
4884 VN_RELE(tvp);
4885 }
4886 }
4887 }
4888 if (error == 0)
4889 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4890
4891 if (in_crit_src)
4892 nbl_end_crit(srcvp);
4893 if (srcvp)
4894 VN_RELE(srcvp);
4895 if (in_crit_targ)
4896 nbl_end_crit(targvp);
4897 if (targvp)
4898 VN_RELE(targvp);
4899
4900 if (unlinked) {
4901 ASSERT(fp != NULL);
4902 ASSERT(tvp != NULL);
4903
4904 /* DEBUG data */
4905 if (RW_READ_HELD(&tvp->v_nbllock)) {
4906 cmn_err(CE_WARN, "rfs4_op_rename: "
4907 "RW_READ_HELD(%p)", (void *)tvp);
4908 }
4909
4910 /* The file is gone and so should the state */
4911 rfs4_close_all_state(fp);
4912 VN_RELE(tvp);
4913 }
4914
4915 if (sfp) {
4916 rfs4_clear_dont_grant(sfp);
4917 rfs4_file_rele(sfp);
4918 }
4919 if (fp) {
4920 rfs4_clear_dont_grant(fp);
4921 rfs4_file_rele(fp);
4922 }
4923
4924 if (converted_onm != onm)
4925 kmem_free(converted_onm, MAXPATHLEN + 1);
4926 kmem_free(onm, olen);
4927 if (converted_nnm != nnm)
4928 kmem_free(converted_nnm, MAXPATHLEN + 1);
4929 kmem_free(nnm, nlen);
4930
4931 /*
4932 * Get the initial "after" sequence number, if it fails, set to zero
4933 */
4934 oidva.va_mask = AT_SEQ;
4935 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4936 oidva.va_seq = 0;
4937
4938 nidva.va_mask = AT_SEQ;
4939 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4940 nidva.va_seq = 0;
4941
4942 /*
4943 * Force modified data and metadata out to stable storage.
4944 */
4945 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4946 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4947
4948 if (error) {
4949 *cs->statusp = resp->status = puterrno4(error);
4950 goto out;
4951 }
4952
4953 /*
4954 * Get "after" change values, if it fails, simply return the
4955 * before value.
4956 */
4957 oadva.va_mask = AT_CTIME|AT_SEQ;
4958 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4959 oadva.va_ctime = obdva.va_ctime;
4960 oadva.va_seq = 0;
4961 }
4962
4963 nadva.va_mask = AT_CTIME|AT_SEQ;
4964 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4965 nadva.va_ctime = nbdva.va_ctime;
4966 nadva.va_seq = 0;
4967 }
4968
4969 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4970 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4971
4972 /*
4973 * The cinfo.atomic = TRUE only if we have
4974 * non-zero va_seq's, and it has incremented by exactly one
4975 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4976 */
4977 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4978 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4979 resp->source_cinfo.atomic = TRUE;
4980 else
4981 resp->source_cinfo.atomic = FALSE;
4982
4983 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4984 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4985 resp->target_cinfo.atomic = TRUE;
4986 else
4987 resp->target_cinfo.atomic = FALSE;
4988
4989 #ifdef VOLATILE_FH_TEST
4990 {
4991 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4992
4993 /*
4994 * Add the renamed file handle to the volatile rename list
4995 */
4996 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4997 /* file handles may expire on rename */
4998 vnode_t *vp;
4999
5000 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
5001 /*
5002 * Already know that nnm will be a valid string
5003 */
5004 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
5005 NULL, NULL, NULL);
5006 kmem_free(nnm, nlen);
5007 if (!error) {
5008 add_volrnm_fh(cs->exi, vp);
5009 VN_RELE(vp);
5010 }
5011 }
5012 }
5013 #endif /* VOLATILE_FH_TEST */
5014
5015 *cs->statusp = resp->status = NFS4_OK;
5016 out:
5017 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
5018 RENAME4res *, resp);
5019 return;
5020
5021 err_out:
5022 if (onm != converted_onm)
5023 kmem_free(converted_onm, MAXPATHLEN + 1);
5024 if (onm != NULL)
5025 kmem_free(onm, olen);
5026 if (nnm != converted_nnm)
5027 kmem_free(converted_nnm, MAXPATHLEN + 1);
5028 if (nnm != NULL)
5029 kmem_free(nnm, nlen);
5030
5031 if (in_crit_src) nbl_end_crit(srcvp);
5032 if (in_crit_targ) nbl_end_crit(targvp);
5033 if (targvp) VN_RELE(targvp);
5034 if (srcvp) VN_RELE(srcvp);
5035 if (sfp) {
5036 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
5037 rfs4_file_rele(sfp);
5038 }
5039 if (fp) {
5040 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
5041 rfs4_file_rele(fp);
5042 }
5043
5044 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
5045 RENAME4res *, resp);
5046 }
5047
5048 /* ARGSUSED */
5049 static void
rfs4_op_renew(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5050 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5051 struct compound_state *cs)
5052 {
5053 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
5054 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
5055 rfs4_client_t *cp;
5056
5057 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
5058 RENEW4args *, args);
5059
5060 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
5061 *cs->statusp = resp->status =
5062 rfs4_check_clientid(&args->clientid, 0);
5063 goto out;
5064 }
5065
5066 if (rfs4_lease_expired(cp)) {
5067 rfs4_client_rele(cp);
5068 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
5069 goto out;
5070 }
5071
5072 rfs4_update_lease(cp);
5073
5074 mutex_enter(cp->rc_cbinfo.cb_lock);
5075 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5076 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5077 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5078 } else {
5079 *cs->statusp = resp->status = NFS4_OK;
5080 }
5081 mutex_exit(cp->rc_cbinfo.cb_lock);
5082
5083 rfs4_client_rele(cp);
5084
5085 out:
5086 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5087 RENEW4res *, resp);
5088 }
5089
5090 /* ARGSUSED */
5091 static void
rfs4_op_restorefh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5092 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5093 struct compound_state *cs)
5094 {
5095 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5096
5097 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5098
5099 /* No need to check cs->access - we are not accessing any object */
5100 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5101 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5102 goto out;
5103 }
5104 if (cs->vp != NULL) {
5105 VN_RELE(cs->vp);
5106 }
5107 cs->vp = cs->saved_vp;
5108 cs->saved_vp = NULL;
5109 cs->exi = cs->saved_exi;
5110 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5111 *cs->statusp = resp->status = NFS4_OK;
5112 cs->deleg = FALSE;
5113
5114 if (cs->cs_flags & RFS4_SAVED_STATEID) {
5115 cs->current_stateid = cs->save_stateid;
5116 cs->cs_flags |= RFS4_CURRENT_STATEID;
5117 }
5118 out:
5119 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5120 RESTOREFH4res *, resp);
5121 }
5122
5123 /* ARGSUSED */
5124 static void
rfs4_op_savefh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5125 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5126 struct compound_state *cs)
5127 {
5128 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5129
5130 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5131
5132 /* No need to check cs->access - we are not accessing any object */
5133 if (cs->vp == NULL) {
5134 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5135 goto out;
5136 }
5137 if (cs->saved_vp != NULL) {
5138 VN_RELE(cs->saved_vp);
5139 }
5140 cs->saved_vp = cs->vp;
5141 VN_HOLD(cs->saved_vp);
5142 cs->saved_exi = cs->exi;
5143 /*
5144 * since SAVEFH is fairly rare, don't alloc space for its fh
5145 * unless necessary.
5146 */
5147 if (cs->saved_fh.nfs_fh4_val == NULL) {
5148 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5149 }
5150 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5151 *cs->statusp = resp->status = NFS4_OK;
5152
5153 if (cs->cs_flags & RFS4_CURRENT_STATEID) {
5154 cs->save_stateid = cs->current_stateid;
5155 cs->cs_flags |= RFS4_SAVED_STATEID;
5156 }
5157 out:
5158 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5159 SAVEFH4res *, resp);
5160 }
5161
5162 /*
5163 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5164 * return the bitmap of attrs that were set successfully. It is also
5165 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5166 * always be called only after rfs4_do_set_attrs().
5167 *
5168 * Verify that the attributes are same as the expected ones. sargp->vap
5169 * and sargp->sbp contain the input attributes as translated from fattr4.
5170 *
5171 * This function verifies only the attrs that correspond to a vattr or
5172 * vfsstat struct. That is because of the extra step needed to get the
5173 * corresponding system structs. Other attributes have already been set or
5174 * verified by do_rfs4_set_attrs.
5175 *
5176 * Return 0 if all attrs match, -1 if some don't, error if error processing.
5177 */
5178 static int
rfs4_verify_attr(struct nfs4_svgetit_arg * sargp,bitmap4 * resp,struct nfs4_ntov_table * ntovp)5179 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5180 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5181 {
5182 int error, ret_error = 0;
5183 int i, k;
5184 uint_t sva_mask = sargp->vap->va_mask;
5185 uint_t vbit;
5186 union nfs4_attr_u *na;
5187 uint8_t *amap;
5188 bool_t getsb = ntovp->vfsstat;
5189
5190 if (sva_mask != 0) {
5191 /*
5192 * Okay to overwrite sargp->vap because we verify based
5193 * on the incoming values.
5194 */
5195 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5196 sargp->cs->cr, NULL);
5197 if (ret_error) {
5198 if (resp == NULL)
5199 return (ret_error);
5200 /*
5201 * Must return bitmap of successful attrs
5202 */
5203 sva_mask = 0; /* to prevent checking vap later */
5204 } else {
5205 /*
5206 * Some file systems clobber va_mask. it is probably
5207 * wrong of them to do so, nonethless we practice
5208 * defensive coding.
5209 * See bug id 4276830.
5210 */
5211 sargp->vap->va_mask = sva_mask;
5212 }
5213 }
5214
5215 if (getsb) {
5216 /*
5217 * Now get the superblock and loop on the bitmap, as there is
5218 * no simple way of translating from superblock to bitmap4.
5219 */
5220 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5221 if (ret_error) {
5222 if (resp == NULL)
5223 goto errout;
5224 getsb = FALSE;
5225 }
5226 }
5227
5228 /*
5229 * Now loop and verify each attribute which getattr returned
5230 * whether it's the same as the input.
5231 */
5232 if (resp == NULL && !getsb && (sva_mask == 0))
5233 goto errout;
5234
5235 na = ntovp->na;
5236 amap = ntovp->amap;
5237 k = 0;
5238 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5239 k = *amap;
5240 ASSERT(nfs4_ntov_map[k].nval == k);
5241 vbit = nfs4_ntov_map[k].vbit;
5242
5243 /*
5244 * If vattr attribute but VOP_GETATTR failed, or it's
5245 * superblock attribute but VFS_STATVFS failed, skip
5246 */
5247 if (vbit) {
5248 if ((vbit & sva_mask) == 0)
5249 continue;
5250 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5251 continue;
5252 }
5253 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5254 if (resp != NULL) {
5255 if (error)
5256 ret_error = -1; /* not all match */
5257 else /* update response bitmap */
5258 *resp |= nfs4_ntov_map[k].fbit;
5259 continue;
5260 }
5261 if (error) {
5262 ret_error = -1; /* not all match */
5263 break;
5264 }
5265 }
5266 errout:
5267 return (ret_error);
5268 }
5269
5270 /*
5271 * Decode the attribute to be set/verified. If the attr requires a sys op
5272 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5273 * call the sv_getit function for it, because the sys op hasn't yet been done.
5274 * Return 0 for success, error code if failed.
5275 *
5276 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5277 */
5278 static int
decode_fattr4_attr(nfs4_attr_cmd_t cmd,struct nfs4_svgetit_arg * sargp,int k,XDR * xdrp,bitmap4 * resp_bval,union nfs4_attr_u * nap)5279 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5280 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5281 {
5282 int error = 0;
5283 bool_t set_later;
5284
5285 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5286
5287 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5288 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5289 /*
5290 * don't verify yet if a vattr or sb dependent attr,
5291 * because we don't have their sys values yet.
5292 * Will be done later.
5293 */
5294 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5295 /*
5296 * ACLs are a special case, since setting the MODE
5297 * conflicts with setting the ACL. We delay setting
5298 * the ACL until all other attributes have been set.
5299 * The ACL gets set in do_rfs4_op_setattr().
5300 */
5301 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5302 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5303 sargp, nap);
5304 if (error) {
5305 xdr_free(nfs4_ntov_map[k].xfunc,
5306 (caddr_t)nap);
5307 }
5308 }
5309 }
5310 } else {
5311 #ifdef DEBUG
5312 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5313 "decoding attribute %d\n", k);
5314 #endif
5315 error = EINVAL;
5316 }
5317 if (!error && resp_bval && !set_later) {
5318 *resp_bval |= nfs4_ntov_map[k].fbit;
5319 }
5320
5321 return (error);
5322 }
5323
5324 /*
5325 * Set vattr based on incoming fattr4 attrs - used by setattr.
5326 * Set response mask. Ignore any values that are not writable vattr attrs.
5327 */
5328 static nfsstat4
do_rfs4_set_attrs(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,struct nfs4_svgetit_arg * sargp,struct nfs4_ntov_table * ntovp,nfs4_attr_cmd_t cmd)5329 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5330 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5331 nfs4_attr_cmd_t cmd)
5332 {
5333 int error = 0;
5334 int i;
5335 char *attrs = fattrp->attrlist4;
5336 uint32_t attrslen = fattrp->attrlist4_len;
5337 XDR xdr;
5338 nfsstat4 status = NFS4_OK;
5339 vnode_t *vp = cs->vp;
5340 union nfs4_attr_u *na;
5341 uint8_t *amap;
5342
5343 #ifndef lint
5344 /*
5345 * Make sure that maximum attribute number can be expressed as an
5346 * 8 bit quantity.
5347 */
5348 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5349 #endif
5350
5351 if (vp == NULL) {
5352 if (resp)
5353 *resp = 0;
5354 return (NFS4ERR_NOFILEHANDLE);
5355 }
5356 if (cs->access == CS_ACCESS_DENIED) {
5357 if (resp)
5358 *resp = 0;
5359 return (NFS4ERR_ACCESS);
5360 }
5361
5362 sargp->op = cmd;
5363 sargp->cs = cs;
5364 sargp->flag = 0; /* may be set later */
5365 sargp->vap->va_mask = 0;
5366 sargp->rdattr_error = NFS4_OK;
5367 sargp->rdattr_error_req = FALSE;
5368 /* sargp->sbp is set by the caller */
5369
5370 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5371
5372 na = ntovp->na;
5373 amap = ntovp->amap;
5374
5375 /*
5376 * The following loop iterates on the nfs4_ntov_map checking
5377 * if the fbit is set in the requested bitmap.
5378 * If set then we process the arguments using the
5379 * rfs4_fattr4 conversion functions to populate the setattr
5380 * vattr and va_mask. Any settable attrs that are not using vattr
5381 * will be set in this loop.
5382 */
5383 for (i = 0; i < nfs4_ntov_map_size; i++) {
5384 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5385 continue;
5386 }
5387 /*
5388 * If setattr, must be a writable attr.
5389 * If verify/nverify, must be a readable attr.
5390 */
5391 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5392 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5393 /*
5394 * Client tries to set/verify an
5395 * unsupported attribute, tries to set
5396 * a read only attr or verify a write
5397 * only one - error!
5398 */
5399 break;
5400 }
5401 /*
5402 * Decode the attribute to set/verify
5403 */
5404 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5405 &xdr, resp ? resp : NULL, na);
5406 if (error)
5407 break;
5408 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5409 na++;
5410 (ntovp->attrcnt)++;
5411 if (nfs4_ntov_map[i].vfsstat)
5412 ntovp->vfsstat = TRUE;
5413 }
5414
5415 if (error != 0)
5416 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5417 puterrno4(error));
5418 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5419 return (status);
5420 }
5421
5422 static nfsstat4
do_rfs4_op_setattr(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,stateid4 * stateid)5423 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5424 stateid4 *stateid)
5425 {
5426 int error = 0;
5427 struct nfs4_svgetit_arg sarg;
5428 bool_t trunc;
5429
5430 nfsstat4 status = NFS4_OK;
5431 cred_t *cr = cs->cr;
5432 vnode_t *vp = cs->vp;
5433 struct nfs4_ntov_table ntov;
5434 struct statvfs64 sb;
5435 struct vattr bva;
5436 struct flock64 bf;
5437 int in_crit = 0;
5438 uint_t saved_mask = 0;
5439 caller_context_t ct;
5440
5441 *resp = 0;
5442 sarg.sbp = &sb;
5443 sarg.is_referral = B_FALSE;
5444 nfs4_ntov_table_init(&ntov);
5445 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5446 NFS4ATTR_SETIT);
5447 if (status != NFS4_OK) {
5448 /*
5449 * failed set attrs
5450 */
5451 goto done;
5452 }
5453
5454 if ((sarg.vap->va_mask == 0) &&
5455 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5456 /*
5457 * no further work to be done
5458 */
5459 goto done;
5460 }
5461
5462 /*
5463 * If we got a request to set the ACL and the MODE, only
5464 * allow changing VSUID, VSGID, and VSVTX. Attempting
5465 * to change any other bits, along with setting an ACL,
5466 * gives NFS4ERR_INVAL.
5467 */
5468 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5469 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5470 vattr_t va;
5471
5472 va.va_mask = AT_MODE;
5473 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5474 if (error) {
5475 status = puterrno4(error);
5476 goto done;
5477 }
5478 if ((sarg.vap->va_mode ^ va.va_mode) &
5479 ~(VSUID | VSGID | VSVTX)) {
5480 status = NFS4ERR_INVAL;
5481 goto done;
5482 }
5483 }
5484
5485 /* Check stateid only if size has been set */
5486 if (sarg.vap->va_mask & AT_SIZE) {
5487 trunc = (sarg.vap->va_size == 0);
5488 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5489 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5490 if (status != NFS4_OK)
5491 goto done;
5492 } else {
5493 ct.cc_sysid = 0;
5494 ct.cc_pid = 0;
5495 ct.cc_caller_id = nfs4_srv_caller_id;
5496 ct.cc_flags = CC_DONTBLOCK;
5497 }
5498
5499 /* XXX start of possible race with delegations */
5500
5501 /*
5502 * We need to specially handle size changes because it is
5503 * possible for the client to create a file with read-only
5504 * modes, but with the file opened for writing. If the client
5505 * then tries to set the file size, e.g. ftruncate(3C),
5506 * fcntl(F_FREESP), the normal access checking done in
5507 * VOP_SETATTR would prevent the client from doing it even though
5508 * it should be allowed to do so. To get around this, we do the
5509 * access checking for ourselves and use VOP_SPACE which doesn't
5510 * do the access checking.
5511 * Also the client should not be allowed to change the file
5512 * size if there is a conflicting non-blocking mandatory lock in
5513 * the region of the change.
5514 */
5515 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5516 u_offset_t offset;
5517 ssize_t length;
5518
5519 /*
5520 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5521 * before returning, sarg.vap->va_mask is used to
5522 * generate the setattr reply bitmap. We also clear
5523 * AT_SIZE below before calling VOP_SPACE. For both
5524 * of these cases, the va_mask needs to be saved here
5525 * and restored after calling VOP_SETATTR.
5526 */
5527 saved_mask = sarg.vap->va_mask;
5528
5529 /*
5530 * Check any possible conflict due to NBMAND locks.
5531 * Get into critical region before VOP_GETATTR, so the
5532 * size attribute is valid when checking conflicts.
5533 */
5534 if (nbl_need_check(vp)) {
5535 nbl_start_crit(vp, RW_READER);
5536 in_crit = 1;
5537 }
5538
5539 bva.va_mask = AT_UID|AT_SIZE;
5540 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5541 if (error != 0) {
5542 status = puterrno4(error);
5543 goto done;
5544 }
5545
5546 if (in_crit) {
5547 if (sarg.vap->va_size < bva.va_size) {
5548 offset = sarg.vap->va_size;
5549 length = bva.va_size - sarg.vap->va_size;
5550 } else {
5551 offset = bva.va_size;
5552 length = sarg.vap->va_size - bva.va_size;
5553 }
5554 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5555 &ct)) {
5556 status = NFS4ERR_LOCKED;
5557 goto done;
5558 }
5559 }
5560
5561 if (crgetuid(cr) == bva.va_uid) {
5562 sarg.vap->va_mask &= ~AT_SIZE;
5563 bf.l_type = F_WRLCK;
5564 bf.l_whence = 0;
5565 bf.l_start = (off64_t)sarg.vap->va_size;
5566 bf.l_len = 0;
5567 bf.l_sysid = 0;
5568 bf.l_pid = 0;
5569 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5570 (offset_t)sarg.vap->va_size, cr, &ct);
5571 }
5572 }
5573
5574 if (!error && sarg.vap->va_mask != 0)
5575 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5576
5577 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5578 if (saved_mask & AT_SIZE)
5579 sarg.vap->va_mask |= AT_SIZE;
5580
5581 /*
5582 * If an ACL was being set, it has been delayed until now,
5583 * in order to set the mode (via the VOP_SETATTR() above) first.
5584 */
5585 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5586 int i;
5587
5588 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5589 if (ntov.amap[i] == FATTR4_ACL)
5590 break;
5591 if (i < NFS4_MAXNUM_ATTRS) {
5592 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5593 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5594 if (error == 0) {
5595 *resp |= FATTR4_ACL_MASK;
5596 } else if (error == ENOTSUP) {
5597 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5598 status = NFS4ERR_ATTRNOTSUPP;
5599 goto done;
5600 }
5601 } else {
5602 NFS4_DEBUG(rfs4_debug,
5603 (CE_NOTE, "do_rfs4_op_setattr: "
5604 "unable to find ACL in fattr4"));
5605 error = EINVAL;
5606 }
5607 }
5608
5609 if (error) {
5610 /* check if a monitor detected a delegation conflict */
5611 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5612 status = NFS4ERR_DELAY;
5613 else
5614 status = puterrno4(error);
5615
5616 /*
5617 * Set the response bitmap when setattr failed.
5618 * If VOP_SETATTR partially succeeded, test by doing a
5619 * VOP_GETATTR on the object and comparing the data
5620 * to the setattr arguments.
5621 */
5622 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5623 } else {
5624 /*
5625 * Force modified metadata out to stable storage.
5626 */
5627 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5628 /*
5629 * Set response bitmap
5630 */
5631 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5632 }
5633
5634 /* Return early and already have a NFSv4 error */
5635 done:
5636 /*
5637 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5638 * conversion sets both readable and writeable NFS4 attrs
5639 * for AT_MTIME and AT_ATIME. The line below masks out
5640 * unrequested attrs from the setattr result bitmap. This
5641 * is placed after the done: label to catch the ATTRNOTSUP
5642 * case.
5643 */
5644 *resp &= fattrp->attrmask;
5645
5646 if (in_crit)
5647 nbl_end_crit(vp);
5648
5649 nfs4_ntov_table_free(&ntov, &sarg);
5650
5651 return (status);
5652 }
5653
5654 /* ARGSUSED */
5655 static void
rfs4_op_setattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5656 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5657 struct compound_state *cs)
5658 {
5659 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5660 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5661 bslabel_t *clabel;
5662
5663 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5664 SETATTR4args *, args);
5665
5666 if (cs->vp == NULL) {
5667 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5668 goto out;
5669 }
5670
5671 /*
5672 * If there is an unshared filesystem mounted on this vnode,
5673 * do not allow to setattr on this vnode.
5674 */
5675 if (vn_ismntpt(cs->vp)) {
5676 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5677 goto out;
5678 }
5679
5680 resp->attrsset = 0;
5681
5682 if (rdonly4(req, cs)) {
5683 *cs->statusp = resp->status = NFS4ERR_ROFS;
5684 goto out;
5685 }
5686
5687 /* check label before setting attributes */
5688 if (is_system_labeled()) {
5689 ASSERT(req->rq_label != NULL);
5690 clabel = req->rq_label;
5691 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5692 "got client label from request(1)",
5693 struct svc_req *, req);
5694 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5695 if (!do_rfs_label_check(clabel, cs->vp,
5696 EQUALITY_CHECK, cs->exi)) {
5697 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5698 goto out;
5699 }
5700 }
5701 }
5702
5703 get_stateid4(cs, &args->stateid);
5704 *cs->statusp = resp->status =
5705 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5706 &args->stateid);
5707
5708 out:
5709 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5710 SETATTR4res *, resp);
5711 }
5712
5713 /* ARGSUSED */
5714 static void
rfs4_op_verify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5715 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5716 struct compound_state *cs)
5717 {
5718 /*
5719 * verify and nverify are exactly the same, except that nverify
5720 * succeeds when some argument changed, and verify succeeds when
5721 * when none changed.
5722 */
5723
5724 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5725 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5726
5727 int error;
5728 struct nfs4_svgetit_arg sarg;
5729 struct statvfs64 sb;
5730 struct nfs4_ntov_table ntov;
5731
5732 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5733 VERIFY4args *, args);
5734
5735 if (cs->vp == NULL) {
5736 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5737 goto out;
5738 }
5739
5740 sarg.sbp = &sb;
5741 sarg.is_referral = B_FALSE;
5742 nfs4_ntov_table_init(&ntov);
5743 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5744 &sarg, &ntov, NFS4ATTR_VERIT);
5745 if (resp->status != NFS4_OK) {
5746 /*
5747 * do_rfs4_set_attrs will try to verify systemwide attrs,
5748 * so could return -1 for "no match".
5749 */
5750 if (resp->status == -1)
5751 resp->status = NFS4ERR_NOT_SAME;
5752 goto done;
5753 }
5754 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5755 switch (error) {
5756 case 0:
5757 resp->status = NFS4_OK;
5758 break;
5759 case -1:
5760 resp->status = NFS4ERR_NOT_SAME;
5761 break;
5762 default:
5763 resp->status = puterrno4(error);
5764 break;
5765 }
5766 done:
5767 *cs->statusp = resp->status;
5768 nfs4_ntov_table_free(&ntov, &sarg);
5769 out:
5770 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5771 VERIFY4res *, resp);
5772 }
5773
5774 /* ARGSUSED */
5775 static void
rfs4_op_nverify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5776 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5777 struct compound_state *cs)
5778 {
5779 /*
5780 * verify and nverify are exactly the same, except that nverify
5781 * succeeds when some argument changed, and verify succeeds when
5782 * when none changed.
5783 */
5784
5785 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5786 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5787
5788 int error;
5789 struct nfs4_svgetit_arg sarg;
5790 struct statvfs64 sb;
5791 struct nfs4_ntov_table ntov;
5792
5793 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5794 NVERIFY4args *, args);
5795
5796 if (cs->vp == NULL) {
5797 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5798 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5799 NVERIFY4res *, resp);
5800 return;
5801 }
5802 sarg.sbp = &sb;
5803 sarg.is_referral = B_FALSE;
5804 nfs4_ntov_table_init(&ntov);
5805 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5806 &sarg, &ntov, NFS4ATTR_VERIT);
5807 if (resp->status != NFS4_OK) {
5808 /*
5809 * do_rfs4_set_attrs will try to verify systemwide attrs,
5810 * so could return -1 for "no match".
5811 */
5812 if (resp->status == -1)
5813 resp->status = NFS4_OK;
5814 goto done;
5815 }
5816 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5817 switch (error) {
5818 case 0:
5819 resp->status = NFS4ERR_SAME;
5820 break;
5821 case -1:
5822 resp->status = NFS4_OK;
5823 break;
5824 default:
5825 resp->status = puterrno4(error);
5826 break;
5827 }
5828 done:
5829 *cs->statusp = resp->status;
5830 nfs4_ntov_table_free(&ntov, &sarg);
5831
5832 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5833 NVERIFY4res *, resp);
5834 }
5835
5836 /*
5837 * XXX - This should live in an NFS header file.
5838 */
5839 #define MAX_IOVECS 12
5840
5841 /* ARGSUSED */
5842 static void
rfs4_op_write(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5843 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5844 struct compound_state *cs)
5845 {
5846 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5847 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5848 int error;
5849 vnode_t *vp;
5850 struct vattr bva;
5851 u_offset_t rlimit;
5852 struct uio uio;
5853 struct iovec iov[MAX_IOVECS];
5854 struct iovec *iovp;
5855 int iovcnt;
5856 int ioflag;
5857 cred_t *savecred, *cr;
5858 bool_t *deleg = &cs->deleg;
5859 nfsstat4 stat;
5860 int in_crit = 0;
5861 caller_context_t ct;
5862 nfs4_srv_t *nsrv4;
5863
5864 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5865 WRITE4args *, args);
5866
5867 vp = cs->vp;
5868 if (vp == NULL) {
5869 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5870 goto out;
5871 }
5872
5873 if (cs->access == CS_ACCESS_DENIED) {
5874 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5875 goto out;
5876 }
5877
5878 get_stateid4(cs, &args->stateid);
5879
5880 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5881 deleg, TRUE, &ct, cs)) != NFS4_OK) {
5882 *cs->statusp = resp->status = stat;
5883 goto out;
5884 }
5885
5886 /*
5887 * We have to enter the critical region before calling VOP_RWLOCK
5888 * to avoid a deadlock with ufs.
5889 */
5890 if (nbl_need_check(vp)) {
5891 nbl_start_crit(vp, RW_READER);
5892 in_crit = 1;
5893 if (nbl_conflict(vp, NBL_WRITE,
5894 args->offset, args->data_len, 0, &ct)) {
5895 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5896 goto out;
5897 }
5898 }
5899
5900 cr = cs->cr;
5901 bva.va_mask = AT_MODE | AT_UID;
5902 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5903
5904 /*
5905 * If we can't get the attributes, then we can't do the
5906 * right access checking. So, we'll fail the request.
5907 */
5908 if (error) {
5909 *cs->statusp = resp->status = puterrno4(error);
5910 goto out;
5911 }
5912
5913 if (rdonly4(req, cs)) {
5914 *cs->statusp = resp->status = NFS4ERR_ROFS;
5915 goto out;
5916 }
5917
5918 if (vp->v_type != VREG) {
5919 *cs->statusp = resp->status =
5920 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5921 goto out;
5922 }
5923
5924 if (crgetuid(cr) != bva.va_uid &&
5925 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5926 *cs->statusp = resp->status = puterrno4(error);
5927 goto out;
5928 }
5929
5930 if (MANDLOCK(vp, bva.va_mode)) {
5931 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5932 goto out;
5933 }
5934
5935 nsrv4 = nfs4_get_srv();
5936 if (args->data_len == 0) {
5937 *cs->statusp = resp->status = NFS4_OK;
5938 resp->count = 0;
5939 resp->committed = args->stable;
5940 resp->writeverf = nsrv4->write4verf;
5941 goto out;
5942 }
5943
5944 if (args->mblk != NULL) {
5945 mblk_t *m;
5946 uint_t bytes, round_len;
5947
5948 iovcnt = 0;
5949 bytes = 0;
5950 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5951 for (m = args->mblk;
5952 m != NULL && bytes < round_len;
5953 m = m->b_cont) {
5954 iovcnt++;
5955 bytes += MBLKL(m);
5956 }
5957 #ifdef DEBUG
5958 /* should have ended on an mblk boundary */
5959 if (bytes != round_len) {
5960 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5961 bytes, round_len, args->data_len);
5962 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5963 (void *)args->mblk, (void *)m);
5964 ASSERT(bytes == round_len);
5965 }
5966 #endif
5967 if (iovcnt <= MAX_IOVECS) {
5968 iovp = iov;
5969 } else {
5970 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5971 }
5972 mblk_to_iov(args->mblk, iovcnt, iovp);
5973 } else if (args->rlist != NULL) {
5974 iovcnt = 1;
5975 iovp = iov;
5976 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5977 iovp->iov_len = args->data_len;
5978 } else {
5979 iovcnt = 1;
5980 iovp = iov;
5981 iovp->iov_base = args->data_val;
5982 iovp->iov_len = args->data_len;
5983 }
5984
5985 uio.uio_iov = iovp;
5986 uio.uio_iovcnt = iovcnt;
5987
5988 uio.uio_segflg = UIO_SYSSPACE;
5989 uio.uio_extflg = UIO_COPY_DEFAULT;
5990 uio.uio_loffset = args->offset;
5991 uio.uio_resid = args->data_len;
5992 uio.uio_llimit = curproc->p_fsz_ctl;
5993 rlimit = uio.uio_llimit - args->offset;
5994 if (rlimit < (u_offset_t)uio.uio_resid)
5995 uio.uio_resid = (int)rlimit;
5996
5997 if (args->stable == UNSTABLE4)
5998 ioflag = 0;
5999 else if (args->stable == FILE_SYNC4)
6000 ioflag = FSYNC;
6001 else if (args->stable == DATA_SYNC4)
6002 ioflag = FDSYNC;
6003 else {
6004 if (iovp != iov)
6005 kmem_free(iovp, sizeof (*iovp) * iovcnt);
6006 *cs->statusp = resp->status = NFS4ERR_INVAL;
6007 goto out;
6008 }
6009
6010 /*
6011 * We're changing creds because VM may fault and we need
6012 * the cred of the current thread to be used if quota
6013 * checking is enabled.
6014 */
6015 savecred = curthread->t_cred;
6016 curthread->t_cred = cr;
6017 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
6018 curthread->t_cred = savecred;
6019
6020 if (iovp != iov)
6021 kmem_free(iovp, sizeof (*iovp) * iovcnt);
6022
6023 if (error) {
6024 *cs->statusp = resp->status = puterrno4(error);
6025 goto out;
6026 }
6027
6028 *cs->statusp = resp->status = NFS4_OK;
6029 resp->count = args->data_len - uio.uio_resid;
6030
6031 if (ioflag == 0)
6032 resp->committed = UNSTABLE4;
6033 else
6034 resp->committed = FILE_SYNC4;
6035
6036 resp->writeverf = nsrv4->write4verf;
6037
6038 out:
6039 if (in_crit)
6040 nbl_end_crit(vp);
6041
6042 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
6043 WRITE4res *, resp);
6044 }
6045
6046 static inline int
rfs4_opnum_in_range(const compound_state_t * cs,int opnum)6047 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
6048 {
6049 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
6050 return (0);
6051 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
6052 return (0);
6053 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
6054 return (0);
6055 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
6056 return (0);
6057 return (1);
6058 }
6059
6060 void
rfs4_compound(COMPOUND4args * args,COMPOUND4res * resp,compound_state_t * cs,struct svc_req * req,int * rv)6061 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
6062 struct svc_req *req, int *rv)
6063 {
6064 uint_t i;
6065 cred_t *cr;
6066 nfs4_srv_t *nsrv4;
6067 nfs_export_t *ne = nfs_get_export();
6068
6069 if (rv != NULL)
6070 *rv = 0;
6071 /*
6072 * Form a reply tag by copying over the request tag.
6073 */
6074 resp->tag.utf8string_len = args->tag.utf8string_len;
6075 if (args->tag.utf8string_len != 0) {
6076 resp->tag.utf8string_val =
6077 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
6078 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
6079 resp->tag.utf8string_len);
6080 } else {
6081 resp->tag.utf8string_val = NULL;
6082 }
6083
6084 cs->statusp = &resp->status;
6085 cs->req = req;
6086 cs->minorversion = args->minorversion;
6087 resp->array = NULL;
6088 resp->array_len = 0;
6089
6090 if (args->array_len == 0) {
6091 resp->status = NFS4_OK;
6092 return;
6093 }
6094
6095 cr = svc_xprt_cred(req->rq_xprt);
6096 ASSERT(cr != NULL);
6097
6098 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6099 DTRACE_NFSV4_2(compound__start, struct compound_state *,
6100 cs, COMPOUND4args *, args);
6101 DTRACE_NFSV4_2(compound__done, struct compound_state *,
6102 cs, COMPOUND4res *, resp);
6103 svcerr_badcred(req->rq_xprt);
6104 if (rv != NULL)
6105 *rv = 1;
6106 return;
6107 }
6108
6109 resp->array_len = args->array_len;
6110 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6111 KM_SLEEP);
6112
6113 cs->op_len = args->array_len;
6114 cs->basecr = cr;
6115 nsrv4 = nfs4_get_srv();
6116
6117 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6118 COMPOUND4args *, args);
6119
6120 /*
6121 * For now, NFS4 compound processing must be protected by
6122 * exported_lock because it can access more than one exportinfo
6123 * per compound and share/unshare can now change multiple
6124 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
6125 * per proc (excluding public exinfo), and exi_count design
6126 * is sufficient to protect concurrent execution of NFS2/3
6127 * ops along with unexport. This lock will be removed as
6128 * part of the NFSv4 phase 2 namespace redesign work.
6129 */
6130 rw_enter(&ne->exported_lock, RW_READER);
6131
6132 /*
6133 * If this is the first compound we've seen, we need to start all
6134 * new instances' grace periods.
6135 */
6136 if (nsrv4->seen_first_compound == 0) {
6137 rfs4_grace_start_new(nsrv4);
6138 /*
6139 * This must be set after rfs4_grace_start_new(), otherwise
6140 * another thread could proceed past here before the former
6141 * is finished.
6142 */
6143 nsrv4->seen_first_compound = 1;
6144 }
6145
6146 for (i = 0; i < args->array_len && cs->cont; i++) {
6147 nfs_argop4 *argop;
6148 nfs_resop4 *resop;
6149 uint_t op;
6150 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6151
6152 argop = &args->array[i];
6153 resop = &resp->array[i];
6154 resop->resop = argop->argop;
6155 op = (uint_t)resop->resop;
6156
6157 cs->op_pos = i;
6158 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6159 /*
6160 * Count the individual ops here; NULL and COMPOUND
6161 * are counted in common_dispatch()
6162 */
6163 stat[op].value.ui64++;
6164
6165 NFS4_DEBUG(rfs4_debug > 1,
6166 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6167 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6168 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6169 rfs4_op_string[op], *cs->statusp));
6170 if (*cs->statusp != NFS4_OK)
6171 cs->cont = FALSE;
6172 if (rfsv4disptab[op].dis_flags & OP_CLEAR_STATEID)
6173 cs->cs_flags &= ~RFS4_CURRENT_STATEID;
6174 } else {
6175 /*
6176 * This is effectively dead code since XDR code
6177 * will have already returned BADXDR if op doesn't
6178 * decode to legal value. This only done for a
6179 * day when XDR code doesn't verify v4 opcodes.
6180 */
6181 op = OP_ILLEGAL;
6182 stat[OP_ILLEGAL_IDX].value.ui64++;
6183
6184 rfs4_op_illegal(argop, resop, req, cs);
6185 cs->cont = FALSE;
6186 }
6187
6188 /*
6189 * If not at last op, and if we are to stop, then
6190 * compact the results array.
6191 */
6192 if ((i + 1) < args->array_len && !cs->cont) {
6193 nfs_resop4 *new_res = kmem_alloc(
6194 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6195 bcopy(resp->array,
6196 new_res, (i+1) * sizeof (nfs_resop4));
6197 kmem_free(resp->array,
6198 args->array_len * sizeof (nfs_resop4));
6199
6200 resp->array_len = i + 1;
6201 resp->array = new_res;
6202 }
6203 }
6204
6205 rw_exit(&ne->exported_lock);
6206
6207 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6208 COMPOUND4res *, resp);
6209
6210 /*
6211 * done with this compound request, free the label
6212 */
6213
6214 if (req->rq_label != NULL) {
6215 kmem_free(req->rq_label, sizeof (bslabel_t));
6216 req->rq_label = NULL;
6217 }
6218 }
6219
6220 /*
6221 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6222 * XXX zero out the tag and array values. Need to investigate why the
6223 * XXX calls occur, but at least prevent the panic for now.
6224 */
6225 void
rfs4_compound_free(COMPOUND4res * resp)6226 rfs4_compound_free(COMPOUND4res *resp)
6227 {
6228 uint_t i;
6229
6230 if (resp->tag.utf8string_val) {
6231 UTF8STRING_FREE(resp->tag)
6232 }
6233
6234 for (i = 0; i < resp->array_len; i++) {
6235 nfs_resop4 *resop;
6236 uint_t op;
6237
6238 resop = &resp->array[i];
6239 op = (uint_t)resop->resop;
6240 if (op < rfsv4disp_cnt) {
6241 (*rfsv4disptab[op].dis_resfree)(resop);
6242 }
6243 }
6244 if (resp->array != NULL) {
6245 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6246 }
6247 }
6248
6249 /*
6250 * Check if entire requst is idempotent
6251 */
6252 bool_t
rfs4_idempotent_req(const COMPOUND4args * args)6253 rfs4_idempotent_req(const COMPOUND4args *args)
6254 {
6255 int i;
6256
6257 for (i = 0; i < args->array_len; i++) {
6258 uint_t op;
6259
6260 op = (uint_t)args->array[i].argop;
6261
6262 if (op >= rfsv4disp_cnt ||
6263 !(rfsv4disptab[op].dis_flags & OP_IDEMPOTENT)) {
6264 return (FALSE);
6265 }
6266 }
6267 return (TRUE);
6268 }
6269
6270 nfsstat4
rfs4_client_sysid(rfs4_client_t * cp,sysid_t * sp)6271 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6272 {
6273 nfsstat4 e;
6274
6275 rfs4_dbe_lock(cp->rc_dbe);
6276
6277 if (cp->rc_sysidt != LM_NOSYSID) {
6278 *sp = cp->rc_sysidt;
6279 e = NFS4_OK;
6280
6281 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6282 *sp = cp->rc_sysidt;
6283 e = NFS4_OK;
6284
6285 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6286 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6287 } else
6288 e = NFS4ERR_DELAY;
6289
6290 rfs4_dbe_unlock(cp->rc_dbe);
6291 return (e);
6292 }
6293
6294 #if defined(DEBUG) && ! defined(lint)
lock_print(char * str,int operation,struct flock64 * flk)6295 static void lock_print(char *str, int operation, struct flock64 *flk)
6296 {
6297 char *op, *type;
6298
6299 switch (operation) {
6300 case F_GETLK: op = "F_GETLK";
6301 break;
6302 case F_SETLK: op = "F_SETLK";
6303 break;
6304 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6305 break;
6306 default: op = "F_UNKNOWN";
6307 break;
6308 }
6309 switch (flk->l_type) {
6310 case F_UNLCK: type = "F_UNLCK";
6311 break;
6312 case F_RDLCK: type = "F_RDLCK";
6313 break;
6314 case F_WRLCK: type = "F_WRLCK";
6315 break;
6316 default: type = "F_UNKNOWN";
6317 break;
6318 }
6319
6320 ASSERT(flk->l_whence == 0);
6321 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6322 str, op, type, (longlong_t)flk->l_start,
6323 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6324 }
6325
6326 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6327 #else
6328 #define LOCK_PRINT(d, s, t, f)
6329 #endif
6330
6331 /*ARGSUSED*/
6332 static bool_t
creds_ok(cred_set_t * cr_set,struct svc_req * req,struct compound_state * cs)6333 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6334 {
6335 return (TRUE);
6336 }
6337
6338 /*
6339 * Look up the pathname using the vp in cs as the directory vnode.
6340 * cs->vp will be the vnode for the file on success
6341 */
6342
6343 static nfsstat4
rfs4_lookup(component4 * component,struct svc_req * req,struct compound_state * cs)6344 rfs4_lookup(component4 *component, struct svc_req *req,
6345 struct compound_state *cs)
6346 {
6347 char *nm;
6348 uint32_t len;
6349 nfsstat4 status;
6350 struct sockaddr *ca;
6351 char *name;
6352
6353 if (cs->vp == NULL) {
6354 return (NFS4ERR_NOFILEHANDLE);
6355 }
6356 if (cs->vp->v_type != VDIR) {
6357 return (NFS4ERR_NOTDIR);
6358 }
6359
6360 status = utf8_dir_verify(component);
6361 if (status != NFS4_OK)
6362 return (status);
6363
6364 nm = utf8_to_fn(component, &len, NULL);
6365 if (nm == NULL) {
6366 return (NFS4ERR_INVAL);
6367 }
6368
6369 if (len > MAXNAMELEN) {
6370 kmem_free(nm, len);
6371 return (NFS4ERR_NAMETOOLONG);
6372 }
6373
6374 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6375 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6376 MAXPATHLEN + 1);
6377
6378 if (name == NULL) {
6379 kmem_free(nm, len);
6380 return (NFS4ERR_INVAL);
6381 }
6382
6383 status = do_rfs4_op_lookup(name, req, cs);
6384
6385 if (name != nm)
6386 kmem_free(name, MAXPATHLEN + 1);
6387
6388 kmem_free(nm, len);
6389
6390 return (status);
6391 }
6392
6393 static nfsstat4
rfs4_lookupfile(component4 * component,struct svc_req * req,struct compound_state * cs,uint32_t access,change_info4 * cinfo)6394 rfs4_lookupfile(component4 *component, struct svc_req *req,
6395 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6396 {
6397 nfsstat4 status;
6398 vnode_t *dvp = cs->vp;
6399 vattr_t bva, ava, fva;
6400 int error;
6401
6402 /* Get "before" change value */
6403 bva.va_mask = AT_CTIME|AT_SEQ;
6404 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6405 if (error)
6406 return (puterrno4(error));
6407
6408 /* rfs4_lookup may VN_RELE directory */
6409 VN_HOLD(dvp);
6410
6411 status = rfs4_lookup(component, req, cs);
6412 if (status != NFS4_OK) {
6413 VN_RELE(dvp);
6414 return (status);
6415 }
6416
6417 /*
6418 * Get "after" change value, if it fails, simply return the
6419 * before value.
6420 */
6421 ava.va_mask = AT_CTIME|AT_SEQ;
6422 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6423 ava.va_ctime = bva.va_ctime;
6424 ava.va_seq = 0;
6425 }
6426 VN_RELE(dvp);
6427
6428 /*
6429 * Validate the file is a file
6430 */
6431 fva.va_mask = AT_TYPE|AT_MODE;
6432 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6433 if (error)
6434 return (puterrno4(error));
6435
6436 if (fva.va_type != VREG) {
6437 if (fva.va_type == VDIR)
6438 return (NFS4ERR_ISDIR);
6439 if (fva.va_type == VLNK)
6440 return (NFS4ERR_SYMLINK);
6441 return (NFS4ERR_INVAL);
6442 }
6443
6444 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6445 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6446
6447 /*
6448 * It is undefined if VOP_LOOKUP will change va_seq, so
6449 * cinfo.atomic = TRUE only if we have
6450 * non-zero va_seq's, and they have not changed.
6451 */
6452 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6453 cinfo->atomic = TRUE;
6454 else
6455 cinfo->atomic = FALSE;
6456
6457 /* Check for mandatory locking */
6458 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6459 return (check_open_access(access, cs, req));
6460 }
6461
6462 static nfsstat4
create_vnode(vnode_t * dvp,char * nm,vattr_t * vap,createmode4 mode,cred_t * cr,vnode_t ** vpp,bool_t * created)6463 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6464 cred_t *cr, vnode_t **vpp, bool_t *created)
6465 {
6466 int error;
6467 nfsstat4 status = NFS4_OK;
6468 vattr_t va;
6469
6470 tryagain:
6471
6472 /*
6473 * The file open mode used is VWRITE. If the client needs
6474 * some other semantic, then it should do the access checking
6475 * itself. It would have been nice to have the file open mode
6476 * passed as part of the arguments.
6477 */
6478
6479 *created = TRUE;
6480 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6481
6482 if (error) {
6483 *created = FALSE;
6484
6485 /*
6486 * If we got something other than file already exists
6487 * then just return this error. Otherwise, we got
6488 * EEXIST. If we were doing a GUARDED create, then
6489 * just return this error. Otherwise, we need to
6490 * make sure that this wasn't a duplicate of an
6491 * exclusive create request.
6492 *
6493 * The assumption is made that a non-exclusive create
6494 * request will never return EEXIST.
6495 */
6496
6497 if (error != EEXIST || mode == GUARDED4) {
6498 status = puterrno4(error);
6499 return (status);
6500 }
6501 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6502 NULL, NULL, NULL);
6503
6504 if (error) {
6505 /*
6506 * We couldn't find the file that we thought that
6507 * we just created. So, we'll just try creating
6508 * it again.
6509 */
6510 if (error == ENOENT)
6511 goto tryagain;
6512
6513 status = puterrno4(error);
6514 return (status);
6515 }
6516
6517 if (mode == UNCHECKED4) {
6518 /* existing object must be regular file */
6519 if ((*vpp)->v_type != VREG) {
6520 if ((*vpp)->v_type == VDIR)
6521 status = NFS4ERR_ISDIR;
6522 else if ((*vpp)->v_type == VLNK)
6523 status = NFS4ERR_SYMLINK;
6524 else
6525 status = NFS4ERR_INVAL;
6526 VN_RELE(*vpp);
6527 return (status);
6528 }
6529
6530 return (NFS4_OK);
6531 }
6532
6533 /* Check for duplicate request */
6534 va.va_mask = AT_MTIME;
6535 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6536 if (!error) {
6537 /* We found the file */
6538 const timestruc_t *mtime = &vap->va_mtime;
6539
6540 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6541 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6542 /* but its not our creation */
6543 VN_RELE(*vpp);
6544 return (NFS4ERR_EXIST);
6545 }
6546 *created = TRUE; /* retrans of create == created */
6547 return (NFS4_OK);
6548 }
6549 VN_RELE(*vpp);
6550 return (NFS4ERR_EXIST);
6551 }
6552
6553 return (NFS4_OK);
6554 }
6555
6556 static nfsstat4
check_open_access(uint32_t access,struct compound_state * cs,struct svc_req * req)6557 check_open_access(uint32_t access, struct compound_state *cs,
6558 struct svc_req *req)
6559 {
6560 int error;
6561 vnode_t *vp;
6562 bool_t readonly;
6563 cred_t *cr = cs->cr;
6564
6565 /* For now we don't allow mandatory locking as per V2/V3 */
6566 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6567 return (NFS4ERR_ACCESS);
6568 }
6569
6570 vp = cs->vp;
6571 ASSERT(cr != NULL && vp->v_type == VREG);
6572
6573 /*
6574 * If the file system is exported read only and we are trying
6575 * to open for write, then return NFS4ERR_ROFS
6576 */
6577
6578 readonly = rdonly4(req, cs);
6579
6580 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6581 return (NFS4ERR_ROFS);
6582
6583 if (access & OPEN4_SHARE_ACCESS_READ) {
6584 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6585 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6586 return (NFS4ERR_ACCESS);
6587 }
6588 }
6589
6590 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6591 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6592 if (error)
6593 return (NFS4ERR_ACCESS);
6594 }
6595
6596 return (NFS4_OK);
6597 }
6598
6599 static void
rfs4_verifier_to_mtime(verifier4 v,timestruc_t * mtime)6600 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6601 {
6602 timespec32_t *time = (timespec32_t *)&v;
6603
6604 /*
6605 * Ensure no time overflows. Assumes underlying
6606 * filesystem supports at least 32 bits.
6607 * Truncate nsec to usec resolution to allow valid
6608 * compares even if the underlying filesystem truncates.
6609 */
6610 mtime->tv_sec = time->tv_sec % TIME32_MAX;
6611 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6612 }
6613
6614 static nfsstat4
rfs4_createfile(OPEN4args * args,struct svc_req * req,struct compound_state * cs,change_info4 * cinfo,bitmap4 * attrset,clientid4 clientid)6615 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6616 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6617 {
6618 struct nfs4_svgetit_arg sarg;
6619 struct nfs4_ntov_table ntov;
6620
6621 bool_t ntov_table_init = FALSE;
6622 struct statvfs64 sb;
6623 nfsstat4 status;
6624 vnode_t *vp;
6625 vattr_t bva, ava, iva, cva, *vap;
6626 vnode_t *dvp;
6627 char *nm = NULL;
6628 uint_t buflen;
6629 bool_t created;
6630 bool_t setsize = FALSE;
6631 len_t reqsize;
6632 int error;
6633 bool_t trunc;
6634 caller_context_t ct;
6635 component4 *component;
6636 bslabel_t *clabel;
6637 struct sockaddr *ca;
6638 char *name = NULL;
6639 fattr4 *fattr = NULL;
6640
6641 ASSERT(*attrset == 0);
6642
6643 sarg.sbp = &sb;
6644 sarg.is_referral = B_FALSE;
6645
6646 dvp = cs->vp;
6647
6648 /* Check if the file system is read only */
6649 if (rdonly4(req, cs))
6650 return (NFS4ERR_ROFS);
6651
6652 /* check the label of including directory */
6653 if (is_system_labeled()) {
6654 ASSERT(req->rq_label != NULL);
6655 clabel = req->rq_label;
6656 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6657 "got client label from request(1)",
6658 struct svc_req *, req);
6659 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6660 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6661 cs->exi)) {
6662 return (NFS4ERR_ACCESS);
6663 }
6664 }
6665 }
6666
6667 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6668 dvp->v_flag & V_XATTRDIR) {
6669 /* prohibit EXCL create of named attributes */
6670 return (NFS4ERR_INVAL);
6671 }
6672
6673 /*
6674 * Get the last component of path name in nm. cs will reference
6675 * the including directory on success.
6676 */
6677 component = &args->claim.open_claim4_u.file;
6678 status = utf8_dir_verify(component);
6679 if (status != NFS4_OK)
6680 return (status);
6681
6682 nm = utf8_to_fn(component, &buflen, NULL);
6683
6684 if (nm == NULL)
6685 return (NFS4ERR_RESOURCE);
6686
6687 if (buflen > MAXNAMELEN) {
6688 kmem_free(nm, buflen);
6689 return (NFS4ERR_NAMETOOLONG);
6690 }
6691
6692 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6693 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6694 if (error) {
6695 kmem_free(nm, buflen);
6696 return (puterrno4(error));
6697 }
6698
6699 if (bva.va_type != VDIR) {
6700 kmem_free(nm, buflen);
6701 return (NFS4ERR_NOTDIR);
6702 }
6703
6704 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6705
6706 switch (args->mode) {
6707 case GUARDED4:
6708 /*FALLTHROUGH*/
6709 case UNCHECKED4:
6710 case EXCLUSIVE4_1:
6711 nfs4_ntov_table_init(&ntov);
6712 ntov_table_init = TRUE;
6713
6714 if (args->mode == EXCLUSIVE4_1)
6715 fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6716 else
6717 fattr = &args->createhow4_u.createattrs;
6718
6719 status = do_rfs4_set_attrs(attrset,
6720 fattr,
6721 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6722
6723 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6724 sarg.vap->va_type != VREG) {
6725 if (sarg.vap->va_type == VDIR)
6726 status = NFS4ERR_ISDIR;
6727 else if (sarg.vap->va_type == VLNK)
6728 status = NFS4ERR_SYMLINK;
6729 else
6730 status = NFS4ERR_INVAL;
6731 }
6732
6733 if (status != NFS4_OK) {
6734 kmem_free(nm, buflen);
6735 nfs4_ntov_table_free(&ntov, &sarg);
6736 *attrset = 0;
6737 return (status);
6738 }
6739
6740 vap = sarg.vap;
6741 vap->va_type = VREG;
6742 vap->va_mask |= AT_TYPE;
6743
6744 if ((vap->va_mask & AT_MODE) == 0) {
6745 vap->va_mask |= AT_MODE;
6746 vap->va_mode = (mode_t)0600;
6747 }
6748
6749 if (vap->va_mask & AT_SIZE) {
6750
6751 /* Disallow create with a non-zero size */
6752
6753 if ((reqsize = sarg.vap->va_size) != 0) {
6754 kmem_free(nm, buflen);
6755 nfs4_ntov_table_free(&ntov, &sarg);
6756 *attrset = 0;
6757 return (NFS4ERR_INVAL);
6758 }
6759 setsize = TRUE;
6760 }
6761 if (args->mode == EXCLUSIVE4_1) {
6762 rfs4_verifier_to_mtime(
6763 args->createhow4_u.ch_createboth.cva_verf,
6764 &vap->va_mtime);
6765 /* attrset will be set later */
6766 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6767 vap->va_mask |= AT_MTIME;
6768 }
6769 break;
6770
6771 case EXCLUSIVE4:
6772 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6773 cva.va_type = VREG;
6774 cva.va_mode = (mode_t)0;
6775
6776 rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6777 &cva.va_mtime);
6778
6779 vap = &cva;
6780
6781 /*
6782 * For EXCL create, attrset is set to the server attr
6783 * used to cache the client's verifier.
6784 */
6785 *attrset = FATTR4_TIME_MODIFY_MASK;
6786 break;
6787 }
6788
6789 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6790 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6791 MAXPATHLEN + 1);
6792
6793 if (name == NULL) {
6794 kmem_free(nm, buflen);
6795 return (NFS4ERR_SERVERFAULT);
6796 }
6797
6798 status = create_vnode(dvp, name, vap, args->mode,
6799 cs->cr, &vp, &created);
6800 if (nm != name)
6801 kmem_free(name, MAXPATHLEN + 1);
6802 kmem_free(nm, buflen);
6803
6804 if (status != NFS4_OK) {
6805 if (ntov_table_init)
6806 nfs4_ntov_table_free(&ntov, &sarg);
6807 *attrset = 0;
6808 return (status);
6809 }
6810
6811 trunc = (setsize && !created);
6812
6813 if (args->mode != EXCLUSIVE4) {
6814 bitmap4 createmask = fattr->attrmask;
6815
6816 /*
6817 * True verification that object was created with correct
6818 * attrs is impossible. The attrs could have been changed
6819 * immediately after object creation. If attributes did
6820 * not verify, the only recourse for the server is to
6821 * destroy the object. Maybe if some attrs (like gid)
6822 * are set incorrectly, the object should be destroyed;
6823 * however, seems bad as a default policy. Do we really
6824 * want to destroy an object over one of the times not
6825 * verifying correctly? For these reasons, the server
6826 * currently sets bits in attrset for createattrs
6827 * that were set; however, no verification is done.
6828 *
6829 * vmask_to_nmask accounts for vattr bits set on create
6830 * [do_rfs4_set_attrs() only sets resp bits for
6831 * non-vattr/vfs bits.]
6832 * Mask off any bits we set by default so as not to return
6833 * more attrset bits than were requested in createattrs
6834 */
6835 if (created) {
6836 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6837 *attrset &= createmask;
6838 } else {
6839 /*
6840 * We did not create the vnode (we tried but it
6841 * already existed). In this case, the only createattr
6842 * that the spec allows the server to set is size,
6843 * and even then, it can only be set if it is 0.
6844 */
6845 *attrset = 0;
6846 if (trunc)
6847 *attrset = FATTR4_SIZE_MASK;
6848 }
6849 }
6850 if (ntov_table_init)
6851 nfs4_ntov_table_free(&ntov, &sarg);
6852
6853 /*
6854 * Get the initial "after" sequence number, if it fails,
6855 * set to zero, time to before.
6856 */
6857 iva.va_mask = AT_CTIME|AT_SEQ;
6858 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6859 iva.va_seq = 0;
6860 iva.va_ctime = bva.va_ctime;
6861 }
6862
6863 /*
6864 * create_vnode attempts to create the file exclusive,
6865 * if it already exists the VOP_CREATE will fail and
6866 * may not increase va_seq. It is atomic if
6867 * we haven't changed the directory, but if it has changed
6868 * we don't know what changed it.
6869 */
6870 if (!created) {
6871 if (bva.va_seq && iva.va_seq &&
6872 bva.va_seq == iva.va_seq)
6873 cinfo->atomic = TRUE;
6874 else
6875 cinfo->atomic = FALSE;
6876 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6877 } else {
6878 /*
6879 * The entry was created, we need to sync the
6880 * directory metadata.
6881 */
6882 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6883
6884 /*
6885 * Get "after" change value, if it fails, simply return the
6886 * before value.
6887 */
6888 ava.va_mask = AT_CTIME|AT_SEQ;
6889 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6890 ava.va_ctime = bva.va_ctime;
6891 ava.va_seq = 0;
6892 }
6893
6894 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6895
6896 /*
6897 * The cinfo->atomic = TRUE only if we have
6898 * non-zero va_seq's, and it has incremented by exactly one
6899 * during the create_vnode and it didn't
6900 * change during the VOP_FSYNC.
6901 */
6902 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6903 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6904 cinfo->atomic = TRUE;
6905 else
6906 cinfo->atomic = FALSE;
6907 }
6908
6909 /* Check for mandatory locking and that the size gets set. */
6910 cva.va_mask = AT_MODE;
6911 if (setsize)
6912 cva.va_mask |= AT_SIZE;
6913
6914 /* Assume the worst */
6915 cs->mandlock = TRUE;
6916
6917 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6918 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6919
6920 /*
6921 * Truncate the file if necessary; this would be
6922 * the case for create over an existing file.
6923 */
6924
6925 if (trunc) {
6926 int in_crit = 0;
6927 rfs4_file_t *fp;
6928 nfs4_srv_t *nsrv4;
6929 bool_t create = FALSE;
6930
6931 /*
6932 * We are writing over an existing file.
6933 * Check to see if we need to recall a delegation.
6934 */
6935 nsrv4 = nfs4_get_srv();
6936 rfs4_hold_deleg_policy(nsrv4);
6937 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6938 if (rfs4_check_delegated_byfp(FWRITE, fp,
6939 (reqsize == 0), FALSE, FALSE, &clientid)) {
6940 rfs4_file_rele(fp);
6941 rfs4_rele_deleg_policy(nsrv4);
6942 VN_RELE(vp);
6943 *attrset = 0;
6944 return (NFS4ERR_DELAY);
6945 }
6946 rfs4_file_rele(fp);
6947 }
6948 rfs4_rele_deleg_policy(nsrv4);
6949
6950 if (nbl_need_check(vp)) {
6951 in_crit = 1;
6952
6953 ASSERT(reqsize == 0);
6954
6955 nbl_start_crit(vp, RW_READER);
6956 if (nbl_conflict(vp, NBL_WRITE, 0,
6957 cva.va_size, 0, NULL)) {
6958 in_crit = 0;
6959 nbl_end_crit(vp);
6960 VN_RELE(vp);
6961 *attrset = 0;
6962 return (NFS4ERR_ACCESS);
6963 }
6964 }
6965 ct.cc_sysid = 0;
6966 ct.cc_pid = 0;
6967 ct.cc_caller_id = nfs4_srv_caller_id;
6968 ct.cc_flags = CC_DONTBLOCK;
6969
6970 cva.va_mask = AT_SIZE;
6971 cva.va_size = reqsize;
6972 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6973 if (in_crit)
6974 nbl_end_crit(vp);
6975 }
6976 }
6977
6978 error = makefh4(&cs->fh, vp, cs->exi);
6979
6980 /*
6981 * Force modified data and metadata out to stable storage.
6982 */
6983 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6984
6985 if (error) {
6986 VN_RELE(vp);
6987 *attrset = 0;
6988 return (puterrno4(error));
6989 }
6990
6991 /* if parent dir is attrdir, set namedattr fh flag */
6992 if (dvp->v_flag & V_XATTRDIR)
6993 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6994
6995 if (cs->vp)
6996 VN_RELE(cs->vp);
6997
6998 cs->vp = vp;
6999
7000 /*
7001 * if we did not create the file, we will need to check
7002 * the access bits on the file
7003 */
7004
7005 if (!created) {
7006 if (setsize)
7007 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
7008 status = check_open_access(args->share_access, cs, req);
7009 if (status != NFS4_OK)
7010 *attrset = 0;
7011 }
7012 return (status);
7013 }
7014
7015 static void
close_expired_state(rfs4_entry_t u_entry)7016 close_expired_state(rfs4_entry_t u_entry)
7017 {
7018 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
7019
7020 if (sp->rs_closed)
7021 return;
7022
7023 /* not expired ? */
7024 if (gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
7025 <= rfs4_lease_time)
7026 return;
7027
7028 rfs4_state_close(sp, TRUE, TRUE, CRED());
7029 rfs4_dbe_invalidate(sp->rs_dbe);
7030 }
7031
7032 /*ARGSUSED*/
7033 static void
rfs4_do_open(struct compound_state * cs,struct svc_req * req,rfs4_openowner_t * oo,delegreq_t deleg,uint32_t access,uint32_t deny,OPEN4res * resp,int deleg_cur)7034 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
7035 rfs4_openowner_t *oo, delegreq_t deleg,
7036 uint32_t access, uint32_t deny,
7037 OPEN4res *resp, int deleg_cur)
7038 {
7039 /* XXX Currently not using req */
7040 rfs4_state_t *sp;
7041 rfs4_file_t *fp;
7042 bool_t screate = TRUE;
7043 bool_t fcreate = TRUE;
7044 uint32_t open_a, share_a;
7045 uint32_t open_d, share_d;
7046 rfs4_deleg_state_t *dsp;
7047 sysid_t sysid;
7048 nfsstat4 status;
7049 caller_context_t ct;
7050 int fflags = 0;
7051 int recall = 0;
7052 int err;
7053 int first_open;
7054 int tries = 0;
7055
7056 /* get the file struct and hold a lock on it during initial open */
7057 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
7058 if (fp == NULL) {
7059 resp->status = NFS4ERR_RESOURCE;
7060 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
7061 return;
7062 }
7063
7064 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
7065 if (sp == NULL) {
7066 resp->status = NFS4ERR_RESOURCE;
7067 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
7068 /* No need to keep any reference */
7069 rw_exit(&fp->rf_file_rwlock);
7070 rfs4_file_rele(fp);
7071 return;
7072 }
7073
7074 /* try to get the sysid before continuing */
7075 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
7076 resp->status = status;
7077 rfs4_file_rele(fp);
7078 /* Not a fully formed open; "close" it */
7079 if (screate == TRUE)
7080 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7081 rfs4_state_rele(sp);
7082 return;
7083 }
7084
7085 /* Calculate the fflags for this OPEN. */
7086 if (access & OPEN4_SHARE_ACCESS_READ)
7087 fflags |= FREAD;
7088 if (access & OPEN4_SHARE_ACCESS_WRITE)
7089 fflags |= FWRITE;
7090
7091 again:
7092 rfs4_dbe_lock(sp->rs_dbe);
7093
7094 /*
7095 * Calculate the new deny and access mode that this open is adding to
7096 * the file for this open owner;
7097 */
7098 open_d = (deny & ~sp->rs_open_deny);
7099 open_a = (access & ~sp->rs_open_access);
7100
7101 /*
7102 * Calculate the new share access and share deny modes that this open
7103 * is adding to the file for this open owner;
7104 */
7105 share_a = (access & ~sp->rs_share_access);
7106 share_d = (deny & ~sp->rs_share_deny);
7107
7108 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7109
7110 /*
7111 * Check to see the client has already sent an open for this
7112 * open owner on this file with the same share/deny modes.
7113 * If so, we don't need to check for a conflict and we don't
7114 * need to add another shrlock. If not, then we need to
7115 * check for conflicts in deny and access before checking for
7116 * conflicts in delegation. We don't want to recall a
7117 * delegation based on an open that will eventually fail based
7118 * on shares modes.
7119 */
7120
7121 if (share_a || share_d) {
7122 if ((err = rfs4_share(sp, access, deny)) != 0) {
7123 rfs4_dbe_unlock(sp->rs_dbe);
7124 if (err == NFS4ERR_SHARE_DENIED && ++tries < 2) {
7125 /*
7126 * Cleanup recently expired (not yet cleaned by
7127 * reaper thread) and re-try.
7128 */
7129 nfs4_srv_t *nsrv4 = nfs4_get_srv();
7130
7131 rfs4_dbsearch_cb(nsrv4->rfs4_state_file_idx,
7132 sp->rs_finfo, rfs4_lookup_exp_state_max,
7133 close_expired_state);
7134 goto again;
7135 }
7136
7137 resp->status = err;
7138
7139 rfs4_file_rele(fp);
7140 /* Not a fully formed open; "close" it */
7141 if (screate == TRUE)
7142 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7143 rfs4_state_rele(sp);
7144 return;
7145 }
7146 }
7147
7148 rfs4_dbe_lock(fp->rf_dbe);
7149
7150 /*
7151 * Check to see if this file is delegated and if so, if a
7152 * recall needs to be done.
7153 */
7154 if (rfs4_check_recall(sp, access)) {
7155 rfs4_dbe_unlock(fp->rf_dbe);
7156 rfs4_dbe_unlock(sp->rs_dbe);
7157 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7158 delay(NFS4_DELEGATION_CONFLICT_DELAY);
7159 rfs4_dbe_lock(sp->rs_dbe);
7160
7161 /* if state closed while lock was dropped */
7162 if (sp->rs_closed) {
7163 if (share_a || share_d)
7164 (void) rfs4_unshare(sp);
7165 rfs4_dbe_unlock(sp->rs_dbe);
7166 rfs4_file_rele(fp);
7167 /* Not a fully formed open; "close" it */
7168 if (screate == TRUE)
7169 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7170 rfs4_state_rele(sp);
7171 resp->status = NFS4ERR_OLD_STATEID;
7172 return;
7173 }
7174
7175 rfs4_dbe_lock(fp->rf_dbe);
7176 /* Let's see if the delegation was returned */
7177 if (rfs4_check_recall(sp, access)) {
7178 rfs4_dbe_unlock(fp->rf_dbe);
7179 if (share_a || share_d)
7180 (void) rfs4_unshare(sp);
7181 rfs4_dbe_unlock(sp->rs_dbe);
7182 rfs4_file_rele(fp);
7183 rfs4_update_lease(sp->rs_owner->ro_client);
7184
7185 /* Not a fully formed open; "close" it */
7186 if (screate == TRUE)
7187 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7188 rfs4_state_rele(sp);
7189 resp->status = NFS4ERR_DELAY;
7190 return;
7191 }
7192 }
7193 /*
7194 * the share check passed and any delegation conflict has been
7195 * taken care of, now call vop_open.
7196 * if this is the first open then call vop_open with fflags.
7197 * if not, call vn_open_upgrade with just the upgrade flags.
7198 *
7199 * if the file has been opened already, it will have the current
7200 * access mode in the state struct. if it has no share access, then
7201 * this is a new open.
7202 *
7203 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7204 * call VOP_OPEN(), just do the open upgrade.
7205 */
7206 if (first_open && !deleg_cur) {
7207 ct.cc_sysid = sysid;
7208 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7209 ct.cc_caller_id = nfs4_srv_caller_id;
7210 ct.cc_flags = CC_DONTBLOCK;
7211 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7212 if (err) {
7213 rfs4_dbe_unlock(fp->rf_dbe);
7214 if (share_a || share_d)
7215 (void) rfs4_unshare(sp);
7216 rfs4_dbe_unlock(sp->rs_dbe);
7217 rfs4_file_rele(fp);
7218
7219 /* Not a fully formed open; "close" it */
7220 if (screate == TRUE)
7221 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7222 rfs4_state_rele(sp);
7223 /* check if a monitor detected a delegation conflict */
7224 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7225 resp->status = NFS4ERR_DELAY;
7226 else
7227 resp->status = NFS4ERR_SERVERFAULT;
7228 return;
7229 }
7230 } else { /* open upgrade */
7231 /*
7232 * calculate the fflags for the new mode that is being added
7233 * by this upgrade.
7234 */
7235 fflags = 0;
7236 if (open_a & OPEN4_SHARE_ACCESS_READ)
7237 fflags |= FREAD;
7238 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7239 fflags |= FWRITE;
7240 vn_open_upgrade(cs->vp, fflags);
7241 }
7242 sp->rs_open_access |= access;
7243 sp->rs_open_deny |= deny;
7244
7245 if (open_d & OPEN4_SHARE_DENY_READ)
7246 fp->rf_deny_read++;
7247 if (open_d & OPEN4_SHARE_DENY_WRITE)
7248 fp->rf_deny_write++;
7249 fp->rf_share_deny |= deny;
7250
7251 if (open_a & OPEN4_SHARE_ACCESS_READ)
7252 fp->rf_access_read++;
7253 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7254 fp->rf_access_write++;
7255 fp->rf_share_access |= access;
7256
7257 /*
7258 * Check for delegation here. if the deleg argument is not
7259 * DELEG_ANY, then this is a reclaim from a client and
7260 * we must honor the delegation requested. If necessary we can
7261 * set the recall flag.
7262 */
7263
7264 dsp = rfs4_grant_delegation(deleg, sp, &recall);
7265
7266 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7267
7268 next_stateid(&sp->rs_stateid);
7269
7270 resp->stateid = sp->rs_stateid.stateid;
7271
7272 rfs4_dbe_unlock(fp->rf_dbe);
7273 rfs4_dbe_unlock(sp->rs_dbe);
7274
7275 if (dsp) {
7276 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7277 rfs4_deleg_state_rele(dsp);
7278 }
7279
7280 rfs4_file_rele(fp);
7281 rfs4_state_rele(sp);
7282
7283 resp->status = NFS4_OK;
7284 }
7285
7286 /*ARGSUSED*/
7287 static void
rfs4_do_openfh(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7288 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7289 rfs4_openowner_t *oo, OPEN4res *resp)
7290 {
7291 /* cs->vp and cs->fh have been updated by putfh. */
7292 rfs4_do_open(cs, req, oo, DELEG_ANY,
7293 (args->share_access & 0xff), args->share_deny, resp, 0);
7294 }
7295
7296 /*ARGSUSED*/
7297 static void
rfs4_do_opennull(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7298 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7299 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7300 {
7301 change_info4 *cinfo = &resp->cinfo;
7302 bitmap4 *attrset = &resp->attrset;
7303
7304 if (args->opentype == OPEN4_NOCREATE)
7305 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7306 req, cs, args->share_access, cinfo);
7307 else {
7308 /* inhibit delegation grants during exclusive create */
7309
7310 if (args->mode == EXCLUSIVE4)
7311 rfs4_disable_delegation();
7312
7313 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7314 oo->ro_client->rc_clientid);
7315 }
7316
7317 if (resp->status == NFS4_OK) {
7318
7319 /* cs->vp cs->fh now reference the desired file */
7320
7321 rfs4_do_open(cs, req, oo,
7322 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7323 args->share_access, args->share_deny, resp, 0);
7324
7325 /*
7326 * If rfs4_createfile set attrset, we must
7327 * clear this attrset before the response is copied.
7328 */
7329 if (resp->status != NFS4_OK && resp->attrset) {
7330 resp->attrset = 0;
7331 }
7332 }
7333 else
7334 *cs->statusp = resp->status;
7335
7336 if (args->mode == EXCLUSIVE4)
7337 rfs4_enable_delegation();
7338 }
7339
7340 /*ARGSUSED*/
7341 static void
rfs4_do_openprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7342 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7343 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7344 {
7345 change_info4 *cinfo = &resp->cinfo;
7346 vattr_t va;
7347 vtype_t v_type = cs->vp->v_type;
7348 int error = 0;
7349
7350 /* Verify that we have a regular file */
7351 if (v_type != VREG) {
7352 if (v_type == VDIR)
7353 resp->status = NFS4ERR_ISDIR;
7354 else if (v_type == VLNK)
7355 resp->status = NFS4ERR_SYMLINK;
7356 else
7357 resp->status = NFS4ERR_INVAL;
7358 return;
7359 }
7360
7361 va.va_mask = AT_MODE|AT_UID;
7362 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7363 if (error) {
7364 resp->status = puterrno4(error);
7365 return;
7366 }
7367
7368 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7369
7370 /*
7371 * Check if we have access to the file, Note the the file
7372 * could have originally been open UNCHECKED or GUARDED
7373 * with mode bits that will now fail, but there is nothing
7374 * we can really do about that except in the case that the
7375 * owner of the file is the one requesting the open.
7376 */
7377 if (crgetuid(cs->cr) != va.va_uid) {
7378 resp->status = check_open_access(args->share_access, cs, req);
7379 if (resp->status != NFS4_OK) {
7380 return;
7381 }
7382 }
7383
7384 /*
7385 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7386 */
7387 cinfo->before = 0;
7388 cinfo->after = 0;
7389 cinfo->atomic = FALSE;
7390
7391 rfs4_do_open(cs, req, oo,
7392 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7393 args->share_access, args->share_deny, resp, 0);
7394 }
7395
7396 static void
rfs4_do_opendelcur(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7397 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7398 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7399 {
7400 int error;
7401 nfsstat4 status;
7402 stateid4 stateid =
7403 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7404 rfs4_deleg_state_t *dsp;
7405
7406 /*
7407 * Find the state info from the stateid and confirm that the
7408 * file is delegated. If the state openowner is the same as
7409 * the supplied openowner we're done. If not, get the file
7410 * info from the found state info. Use that file info to
7411 * create the state for this lock owner. Note solaris doen't
7412 * really need the pathname to find the file. We may want to
7413 * lookup the pathname and make sure that the vp exist and
7414 * matches the vp in the file structure. However it is
7415 * possible that the pathname nolonger exists (local process
7416 * unlinks the file), so this may not be that useful.
7417 */
7418
7419 status = rfs4_get_deleg_state(&stateid, &dsp);
7420 if (status != NFS4_OK) {
7421 resp->status = status;
7422 return;
7423 }
7424
7425 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7426
7427 /*
7428 * New lock owner, create state. Since this was probably called
7429 * in response to a CB_RECALL we set deleg to DELEG_NONE
7430 */
7431
7432 ASSERT(cs->vp != NULL);
7433 VN_RELE(cs->vp);
7434 VN_HOLD(dsp->rds_finfo->rf_vp);
7435 cs->vp = dsp->rds_finfo->rf_vp;
7436
7437 error = makefh4(&cs->fh, cs->vp, cs->exi);
7438 if (error != 0) {
7439 rfs4_deleg_state_rele(dsp);
7440 *cs->statusp = resp->status = puterrno4(error);
7441 return;
7442 }
7443
7444 /* Mark progress for delegation returns */
7445 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7446 rfs4_deleg_state_rele(dsp);
7447 rfs4_do_open(cs, req, oo, DELEG_NONE,
7448 args->share_access, args->share_deny, resp, 1);
7449 }
7450
7451 /*ARGSUSED*/
7452 static void
rfs4_do_opendelprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7453 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7454 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7455 {
7456 /*
7457 * Lookup the pathname, it must already exist since this file
7458 * was delegated.
7459 *
7460 * Find the file and state info for this vp and open owner pair.
7461 * check that they are in fact delegated.
7462 * check that the state access and deny modes are the same.
7463 *
7464 * Return the delgation possibly seting the recall flag.
7465 */
7466 rfs4_file_t *fp;
7467 rfs4_state_t *sp;
7468 bool_t create = FALSE;
7469 bool_t dcreate = FALSE;
7470 rfs4_deleg_state_t *dsp;
7471 nfsace4 *ace;
7472
7473 /* Note we ignore oflags */
7474 resp->status = rfs4_lookupfile(
7475 &args->claim.open_claim4_u.file_delegate_prev,
7476 req, cs, args->share_access, &resp->cinfo);
7477
7478 if (resp->status != NFS4_OK) {
7479 return;
7480 }
7481
7482 /* get the file struct and hold a lock on it during initial open */
7483 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7484 if (fp == NULL) {
7485 resp->status = NFS4ERR_RESOURCE;
7486 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7487 return;
7488 }
7489
7490 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7491 if (sp == NULL) {
7492 resp->status = NFS4ERR_SERVERFAULT;
7493 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7494 rw_exit(&fp->rf_file_rwlock);
7495 rfs4_file_rele(fp);
7496 return;
7497 }
7498
7499 rfs4_dbe_lock(sp->rs_dbe);
7500 rfs4_dbe_lock(fp->rf_dbe);
7501 if (args->share_access != sp->rs_share_access ||
7502 args->share_deny != sp->rs_share_deny ||
7503 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7504 NFS4_DEBUG(rfs4_debug,
7505 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7506 rfs4_dbe_unlock(fp->rf_dbe);
7507 rfs4_dbe_unlock(sp->rs_dbe);
7508 rfs4_file_rele(fp);
7509 rfs4_state_rele(sp);
7510 resp->status = NFS4ERR_SERVERFAULT;
7511 return;
7512 }
7513 rfs4_dbe_unlock(fp->rf_dbe);
7514 rfs4_dbe_unlock(sp->rs_dbe);
7515
7516 dsp = rfs4_finddeleg(sp, &dcreate);
7517 if (dsp == NULL) {
7518 rfs4_state_rele(sp);
7519 rfs4_file_rele(fp);
7520 resp->status = NFS4ERR_SERVERFAULT;
7521 return;
7522 }
7523
7524 next_stateid(&sp->rs_stateid);
7525
7526 resp->stateid = sp->rs_stateid.stateid;
7527
7528 resp->delegation.delegation_type = dsp->rds_dtype;
7529
7530 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7531 open_read_delegation4 *rv =
7532 &resp->delegation.open_delegation4_u.read;
7533
7534 rv->stateid = dsp->rds_delegid.stateid;
7535 rv->recall = FALSE; /* no policy in place to set to TRUE */
7536 ace = &rv->permissions;
7537 } else {
7538 open_write_delegation4 *rv =
7539 &resp->delegation.open_delegation4_u.write;
7540
7541 rv->stateid = dsp->rds_delegid.stateid;
7542 rv->recall = FALSE; /* no policy in place to set to TRUE */
7543 ace = &rv->permissions;
7544 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7545 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7546 }
7547
7548 /* XXX For now */
7549 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7550 ace->flag = 0;
7551 ace->access_mask = 0;
7552 ace->who.utf8string_len = 0;
7553 ace->who.utf8string_val = 0;
7554
7555 rfs4_deleg_state_rele(dsp);
7556 rfs4_state_rele(sp);
7557 rfs4_file_rele(fp);
7558 }
7559
7560 typedef enum {
7561 NFS4_CHKSEQ_OKAY = 0,
7562 NFS4_CHKSEQ_REPLAY = 1,
7563 NFS4_CHKSEQ_BAD = 2
7564 } rfs4_chkseq_t;
7565
7566 /*
7567 * Generic function for sequence number checks.
7568 */
7569 static rfs4_chkseq_t
rfs4_check_seqid(seqid4 seqid,nfs_resop4 * lastop,seqid4 rqst_seq,nfs_resop4 * resop,bool_t copyres)7570 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7571 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7572 {
7573 /* Same sequence ids and matching operations? */
7574 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7575 if (copyres == TRUE) {
7576 rfs4_free_reply(resop);
7577 rfs4_copy_reply(resop, lastop);
7578 }
7579 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7580 "Replayed SEQID %d\n", seqid));
7581 return (NFS4_CHKSEQ_REPLAY);
7582 }
7583
7584 /* If the incoming sequence is not the next expected then it is bad */
7585 if (rqst_seq != seqid + 1) {
7586 if (rqst_seq == seqid) {
7587 NFS4_DEBUG(rfs4_debug,
7588 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7589 "but last op was %d current op is %d\n",
7590 lastop->resop, resop->resop));
7591 return (NFS4_CHKSEQ_BAD);
7592 }
7593 NFS4_DEBUG(rfs4_debug,
7594 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7595 rqst_seq, seqid));
7596 return (NFS4_CHKSEQ_BAD);
7597 }
7598
7599 /* Everything okay -- next expected */
7600 return (NFS4_CHKSEQ_OKAY);
7601 }
7602
7603
7604 static rfs4_chkseq_t
rfs4_check_open_seqid(seqid4 seqid,rfs4_openowner_t * op,nfs_resop4 * resop,const compound_state_t * cs)7605 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7606 const compound_state_t *cs)
7607 {
7608 rfs4_chkseq_t rc;
7609
7610 if (rfs4_has_session(cs))
7611 return (NFS4_CHKSEQ_OKAY);
7612
7613 rfs4_dbe_lock(op->ro_dbe);
7614 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7615 TRUE);
7616 rfs4_dbe_unlock(op->ro_dbe);
7617
7618 if (rc == NFS4_CHKSEQ_OKAY)
7619 rfs4_update_lease(op->ro_client);
7620
7621 return (rc);
7622 }
7623
7624 static rfs4_chkseq_t
rfs4_check_olo_seqid(seqid4 olo_seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7625 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7626 {
7627 rfs4_chkseq_t rc;
7628
7629 rfs4_dbe_lock(op->ro_dbe);
7630 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7631 olo_seqid, resop, FALSE);
7632 rfs4_dbe_unlock(op->ro_dbe);
7633
7634 return (rc);
7635 }
7636
7637 static rfs4_chkseq_t
rfs4_check_lock_seqid(seqid4 seqid,rfs4_lo_state_t * lsp,nfs_resop4 * resop)7638 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7639 {
7640 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7641
7642 rfs4_dbe_lock(lsp->rls_dbe);
7643 if (!lsp->rls_skip_seqid_check)
7644 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7645 resop, TRUE);
7646 rfs4_dbe_unlock(lsp->rls_dbe);
7647
7648 return (rc);
7649 }
7650
7651 static void
rfs4_op_open(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7652 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7653 struct svc_req *req, struct compound_state *cs)
7654 {
7655 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7656 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7657 open_owner4 *owner = &args->owner;
7658 open_claim_type4 claim = args->claim.claim;
7659 rfs4_client_t *cp;
7660 rfs4_openowner_t *oo;
7661 bool_t create;
7662 bool_t replay = FALSE;
7663 int can_reclaim;
7664
7665 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7666 OPEN4args *, args);
7667
7668 if (cs->vp == NULL) {
7669 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7670 goto end;
7671 }
7672
7673 /* rfc5661 section 18.16.3 */
7674 if (rfs4_has_session(cs))
7675 owner->clientid = cs->client->rc_clientid;
7676
7677 /*
7678 * Need to check clientid and lease expiration first based on
7679 * error ordering and incrementing sequence id.
7680 */
7681 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7682 if (cp == NULL) {
7683 *cs->statusp = resp->status =
7684 rfs4_check_clientid(&owner->clientid, 0);
7685 goto end;
7686 }
7687
7688 if (rfs4_lease_expired(cp)) {
7689 rfs4_client_close(cp);
7690 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7691 goto end;
7692 }
7693 can_reclaim = cp->rc_can_reclaim;
7694
7695 /*
7696 * RFC8881 18.51.3
7697 * If non-reclaim locking operations are done before the
7698 * RECLAIM_COMPLETE, error NFS4ERR_GRACE will be returned
7699 */
7700 if (rfs4_has_session(cs) && !cp->rc_reclaim_completed &&
7701 claim != CLAIM_PREVIOUS) {
7702 rfs4_client_rele(cp);
7703 *cs->statusp = resp->status = NFS4ERR_GRACE;
7704 goto end;
7705 }
7706
7707 /*
7708 * Find the open_owner for use from this point forward. Take
7709 * care in updating the sequence id based on the type of error
7710 * being returned.
7711 */
7712 retry:
7713 create = TRUE;
7714 oo = rfs4_findopenowner(owner, &create, args->seqid);
7715 if (oo == NULL) {
7716 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7717 rfs4_client_rele(cp);
7718 goto end;
7719 }
7720
7721 /*
7722 * OPEN_CONFIRM must not be implemented in v4.1
7723 */
7724 if (rfs4_has_session(cs)) {
7725 oo->ro_need_confirm = FALSE;
7726 }
7727
7728 /* Hold off access to the sequence space while the open is done */
7729 /* Workaround to avoid deadlock */
7730 if (!rfs4_has_session(cs))
7731 rfs4_sw_enter(&oo->ro_sw);
7732
7733 /*
7734 * If the open_owner existed before at the server, then check
7735 * the sequence id.
7736 */
7737 if (!create && !oo->ro_postpone_confirm) {
7738 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7739 case NFS4_CHKSEQ_BAD:
7740 ASSERT(!rfs4_has_session(cs));
7741 if ((args->seqid > oo->ro_open_seqid) &&
7742 oo->ro_need_confirm) {
7743 rfs4_free_opens(oo, TRUE, FALSE);
7744 rfs4_sw_exit(&oo->ro_sw);
7745 rfs4_openowner_rele(oo);
7746 goto retry;
7747 }
7748 resp->status = NFS4ERR_BAD_SEQID;
7749 goto out;
7750 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7751 replay = TRUE;
7752 goto out;
7753 default:
7754 break;
7755 }
7756
7757 /*
7758 * Sequence was ok and open owner exists
7759 * check to see if we have yet to see an
7760 * open_confirm.
7761 */
7762 if (oo->ro_need_confirm) {
7763 rfs4_free_opens(oo, TRUE, FALSE);
7764 ASSERT(!rfs4_has_session(cs));
7765 rfs4_sw_exit(&oo->ro_sw);
7766 rfs4_openowner_rele(oo);
7767 goto retry;
7768 }
7769 }
7770 /* Grace only applies to regular-type OPENs */
7771 if (rfs4_clnt_in_grace(cp) &&
7772 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7773 claim == CLAIM_FH)) {
7774 *cs->statusp = resp->status = NFS4ERR_GRACE;
7775 goto out;
7776 }
7777
7778 /*
7779 * If previous state at the server existed then can_reclaim
7780 * will be set. If not reply NFS4ERR_NO_GRACE to the
7781 * client.
7782 */
7783 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7784 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7785 goto out;
7786 }
7787
7788
7789 /*
7790 * Reject the open if the client has missed the grace period
7791 */
7792 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7793 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7794 goto out;
7795 }
7796
7797 /* Couple of up-front bookkeeping items */
7798 if (oo->ro_need_confirm) {
7799 /*
7800 * If this is a reclaim OPEN then we should not ask
7801 * for a confirmation of the open_owner per the
7802 * protocol specification.
7803 */
7804 if (claim == CLAIM_PREVIOUS)
7805 oo->ro_need_confirm = FALSE;
7806 else
7807 resp->rflags |= OPEN4_RESULT_CONFIRM;
7808 }
7809 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7810
7811 /*
7812 * If there is an unshared filesystem mounted on this vnode,
7813 * do not allow to open/create in this directory.
7814 */
7815 if (vn_ismntpt(cs->vp)) {
7816 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7817 goto out;
7818 }
7819
7820 /*
7821 * access must READ, WRITE, or BOTH. No access is invalid.
7822 * deny can be READ, WRITE, BOTH, or NONE.
7823 * bits not defined for access/deny are invalid.
7824 */
7825 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7826 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7827 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7828 *cs->statusp = resp->status = NFS4ERR_INVAL;
7829 goto out;
7830 }
7831
7832
7833 /*
7834 * make sure attrset is zero before response is built.
7835 */
7836 resp->attrset = 0;
7837
7838 switch (claim) {
7839 case CLAIM_NULL:
7840 rfs4_do_opennull(cs, req, args, oo, resp);
7841 break;
7842 case CLAIM_PREVIOUS:
7843 rfs4_do_openprev(cs, req, args, oo, resp);
7844 break;
7845 case CLAIM_DELEGATE_CUR:
7846 rfs4_do_opendelcur(cs, req, args, oo, resp);
7847 break;
7848 case CLAIM_DELEGATE_PREV:
7849 rfs4_do_opendelprev(cs, req, args, oo, resp);
7850 break;
7851 case CLAIM_FH:
7852 rfs4_do_openfh(cs, req, args, oo, resp);
7853 break;
7854 default:
7855 resp->status = NFS4ERR_INVAL;
7856 break;
7857 }
7858
7859 out:
7860 rfs4_client_rele(cp);
7861
7862 /* Catch sequence id handling here to make it a little easier */
7863 switch (resp->status) {
7864 case NFS4ERR_BADXDR:
7865 case NFS4ERR_BAD_SEQID:
7866 case NFS4ERR_BAD_STATEID:
7867 case NFS4ERR_NOFILEHANDLE:
7868 case NFS4ERR_RESOURCE:
7869 case NFS4ERR_STALE_CLIENTID:
7870 case NFS4ERR_STALE_STATEID:
7871 /*
7872 * The protocol states that if any of these errors are
7873 * being returned, the sequence id should not be
7874 * incremented. Any other return requires an
7875 * increment.
7876 */
7877 break;
7878 default:
7879 /* Always update the lease in this case */
7880 rfs4_update_lease(oo->ro_client);
7881
7882 /* Regular response - copy the result */
7883 if (!replay)
7884 rfs4_update_open_resp(oo, resop, &cs->fh);
7885
7886 /*
7887 * REPLAY case: Only if the previous response was OK
7888 * do we copy the filehandle. If not OK, no
7889 * filehandle to copy.
7890 */
7891 if (replay == TRUE &&
7892 resp->status == NFS4_OK &&
7893 oo->ro_reply_fh.nfs_fh4_val) {
7894 /*
7895 * If this is a replay, we must restore the
7896 * current filehandle/vp to that of what was
7897 * returned originally. Try our best to do
7898 * it.
7899 */
7900 nfs_fh4_fmt_t *fh_fmtp =
7901 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7902
7903 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7904 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7905
7906 if (cs->exi == NULL) {
7907 resp->status = NFS4ERR_STALE;
7908 goto finish;
7909 }
7910
7911 VN_RELE(cs->vp);
7912
7913 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7914 &resp->status);
7915
7916 if (cs->vp == NULL)
7917 goto finish;
7918
7919 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7920 }
7921
7922 /*
7923 * If this was a replay, no need to update the
7924 * sequence id. If the open_owner was not created on
7925 * this pass, then update. The first use of an
7926 * open_owner will not bump the sequence id.
7927 */
7928 if (replay == FALSE && !create)
7929 rfs4_update_open_sequence(oo);
7930 /*
7931 * If the client is receiving an error and the
7932 * open_owner needs to be confirmed, there is no way
7933 * to notify the client of this fact ignoring the fact
7934 * that the server has no method of returning a
7935 * stateid to confirm. Therefore, the server needs to
7936 * mark this open_owner in a way as to avoid the
7937 * sequence id checking the next time the client uses
7938 * this open_owner.
7939 */
7940 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7941 oo->ro_postpone_confirm = TRUE;
7942 /*
7943 * If OK response then clear the postpone flag and
7944 * reset the sequence id to keep in sync with the
7945 * client.
7946 */
7947 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7948 oo->ro_postpone_confirm = FALSE;
7949 oo->ro_open_seqid = args->seqid;
7950 }
7951 break;
7952 }
7953
7954 finish:
7955 *cs->statusp = resp->status;
7956
7957 if (!rfs4_has_session(cs))
7958 rfs4_sw_exit(&oo->ro_sw);
7959 rfs4_openowner_rele(oo);
7960
7961 put_stateid4(cs, &resp->stateid);
7962 end:
7963 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7964 OPEN4res *, resp);
7965 }
7966
7967 /*ARGSUSED*/
7968 void
rfs4_op_open_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7969 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7970 struct svc_req *req, struct compound_state *cs)
7971 {
7972 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7973 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7974 rfs4_state_t *sp;
7975 nfsstat4 status;
7976
7977 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7978 OPEN_CONFIRM4args *, args);
7979
7980 ASSERT(!rfs4_has_session(cs));
7981
7982 if (cs->vp == NULL) {
7983 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7984 goto out;
7985 }
7986
7987 if (cs->vp->v_type != VREG) {
7988 *cs->statusp = resp->status =
7989 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7990 return;
7991 }
7992
7993 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7994 if (status != NFS4_OK) {
7995 *cs->statusp = resp->status = status;
7996 goto out;
7997 }
7998
7999 /* Ensure specified filehandle matches */
8000 if (cs->vp != sp->rs_finfo->rf_vp) {
8001 rfs4_state_rele(sp);
8002 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8003 goto out;
8004 }
8005
8006 /* hold off other access to open_owner while we tinker */
8007 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8008
8009 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8010 case NFS4_CHECK_STATEID_OKAY:
8011 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8012 resop, cs) != 0) {
8013 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8014 break;
8015 }
8016 /*
8017 * If it is the appropriate stateid and determined to
8018 * be "OKAY" then this means that the stateid does not
8019 * need to be confirmed and the client is in error for
8020 * sending an OPEN_CONFIRM.
8021 */
8022 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8023 break;
8024 case NFS4_CHECK_STATEID_OLD:
8025 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8026 break;
8027 case NFS4_CHECK_STATEID_BAD:
8028 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8029 break;
8030 case NFS4_CHECK_STATEID_EXPIRED:
8031 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8032 break;
8033 case NFS4_CHECK_STATEID_CLOSED:
8034 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8035 break;
8036 case NFS4_CHECK_STATEID_REPLAY:
8037 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8038 resop, cs)) {
8039 case NFS4_CHKSEQ_OKAY:
8040 /*
8041 * This is replayed stateid; if seqid matches
8042 * next expected, then client is using wrong seqid.
8043 */
8044 /* fall through */
8045 case NFS4_CHKSEQ_BAD:
8046 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8047 break;
8048 case NFS4_CHKSEQ_REPLAY:
8049 /*
8050 * Note this case is the duplicate case so
8051 * resp->status is already set.
8052 */
8053 *cs->statusp = resp->status;
8054 rfs4_update_lease(sp->rs_owner->ro_client);
8055 break;
8056 }
8057 break;
8058 case NFS4_CHECK_STATEID_UNCONFIRMED:
8059 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8060 resop, cs) != NFS4_CHKSEQ_OKAY) {
8061 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8062 break;
8063 }
8064 *cs->statusp = resp->status = NFS4_OK;
8065
8066 next_stateid(&sp->rs_stateid);
8067 resp->open_stateid = sp->rs_stateid.stateid;
8068 sp->rs_owner->ro_need_confirm = FALSE;
8069 rfs4_update_lease(sp->rs_owner->ro_client);
8070 rfs4_update_open_sequence(sp->rs_owner);
8071 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8072 break;
8073 default:
8074 ASSERT(FALSE);
8075 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8076 break;
8077 }
8078 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8079 rfs4_state_rele(sp);
8080
8081 out:
8082 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
8083 OPEN_CONFIRM4res *, resp);
8084 }
8085
8086 /*ARGSUSED*/
8087 void
rfs4_op_open_downgrade(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8088 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
8089 struct svc_req *req, struct compound_state *cs)
8090 {
8091 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
8092 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
8093 uint32_t access = args->share_access;
8094 uint32_t deny = args->share_deny;
8095 nfsstat4 status;
8096 rfs4_state_t *sp;
8097 rfs4_file_t *fp;
8098 int fflags = 0;
8099
8100 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
8101 OPEN_DOWNGRADE4args *, args);
8102
8103 if (cs->vp == NULL) {
8104 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8105 goto out;
8106 }
8107
8108 if (cs->vp->v_type != VREG) {
8109 *cs->statusp = resp->status = NFS4ERR_INVAL;
8110 return;
8111 }
8112
8113 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
8114 if (status != NFS4_OK) {
8115 *cs->statusp = resp->status = status;
8116 goto out;
8117 }
8118
8119 /* Ensure specified filehandle matches */
8120 if (cs->vp != sp->rs_finfo->rf_vp) {
8121 rfs4_state_rele(sp);
8122 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8123 goto out;
8124 }
8125
8126 /* hold off other access to open_owner while we tinker */
8127 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8128
8129 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8130 case NFS4_CHECK_STATEID_OKAY:
8131 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8132 resop, cs) != NFS4_CHKSEQ_OKAY) {
8133 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8134 goto end;
8135 }
8136 break;
8137 case NFS4_CHECK_STATEID_OLD:
8138 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8139 goto end;
8140 case NFS4_CHECK_STATEID_BAD:
8141 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8142 goto end;
8143 case NFS4_CHECK_STATEID_EXPIRED:
8144 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8145 goto end;
8146 case NFS4_CHECK_STATEID_CLOSED:
8147 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8148 goto end;
8149 case NFS4_CHECK_STATEID_UNCONFIRMED:
8150 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8151 goto end;
8152 case NFS4_CHECK_STATEID_REPLAY:
8153 ASSERT(!rfs4_has_session(cs));
8154
8155 /* Check the sequence id for the open owner */
8156 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8157 resop, cs)) {
8158 case NFS4_CHKSEQ_OKAY:
8159 /*
8160 * This is replayed stateid; if seqid matches
8161 * next expected, then client is using wrong seqid.
8162 */
8163 /* fall through */
8164 case NFS4_CHKSEQ_BAD:
8165 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8166 goto end;
8167 case NFS4_CHKSEQ_REPLAY:
8168 /*
8169 * Note this case is the duplicate case so
8170 * resp->status is already set.
8171 */
8172 *cs->statusp = resp->status;
8173 rfs4_update_lease(sp->rs_owner->ro_client);
8174 goto end;
8175 }
8176 break;
8177 default:
8178 ASSERT(FALSE);
8179 break;
8180 }
8181
8182 rfs4_dbe_lock(sp->rs_dbe);
8183 /*
8184 * Check that the new access modes and deny modes are valid.
8185 * Check that no invalid bits are set.
8186 */
8187 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8188 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8189 *cs->statusp = resp->status = NFS4ERR_INVAL;
8190 rfs4_update_open_sequence(sp->rs_owner);
8191 rfs4_dbe_unlock(sp->rs_dbe);
8192 goto end;
8193 }
8194
8195 /*
8196 * The new modes must be a subset of the current modes and
8197 * the access must specify at least one mode. To test that
8198 * the new mode is a subset of the current modes we bitwise
8199 * AND them together and check that the result equals the new
8200 * mode. For example:
8201 * New mode, access == R and current mode, sp->rs_open_access == RW
8202 * access & sp->rs_open_access == R == access, so the new access mode
8203 * is valid. Consider access == RW, sp->rs_open_access = R
8204 * access & sp->rs_open_access == R != access, so the new access mode
8205 * is invalid.
8206 */
8207 if ((access & sp->rs_open_access) != access ||
8208 (deny & sp->rs_open_deny) != deny ||
8209 (access &
8210 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8211 *cs->statusp = resp->status = NFS4ERR_INVAL;
8212 rfs4_update_open_sequence(sp->rs_owner);
8213 rfs4_dbe_unlock(sp->rs_dbe);
8214 goto end;
8215 }
8216
8217 /*
8218 * Release any share locks associated with this stateID.
8219 * Strictly speaking, this violates the spec because the
8220 * spec effectively requires that open downgrade be atomic.
8221 * At present, fs_shrlock does not have this capability.
8222 */
8223 (void) rfs4_unshare(sp);
8224
8225 status = rfs4_share(sp, access, deny);
8226 if (status != NFS4_OK) {
8227 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8228 rfs4_update_open_sequence(sp->rs_owner);
8229 rfs4_dbe_unlock(sp->rs_dbe);
8230 goto end;
8231 }
8232
8233 fp = sp->rs_finfo;
8234 rfs4_dbe_lock(fp->rf_dbe);
8235
8236 /*
8237 * If the current mode has deny read and the new mode
8238 * does not, decrement the number of deny read mode bits
8239 * and if it goes to zero turn off the deny read bit
8240 * on the file.
8241 */
8242 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8243 (deny & OPEN4_SHARE_DENY_READ) == 0) {
8244 fp->rf_deny_read--;
8245 if (fp->rf_deny_read == 0)
8246 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8247 }
8248
8249 /*
8250 * If the current mode has deny write and the new mode
8251 * does not, decrement the number of deny write mode bits
8252 * and if it goes to zero turn off the deny write bit
8253 * on the file.
8254 */
8255 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8256 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8257 fp->rf_deny_write--;
8258 if (fp->rf_deny_write == 0)
8259 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8260 }
8261
8262 /*
8263 * If the current mode has access read and the new mode
8264 * does not, decrement the number of access read mode bits
8265 * and if it goes to zero turn off the access read bit
8266 * on the file. set fflags to FREAD for the call to
8267 * vn_open_downgrade().
8268 */
8269 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8270 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8271 fp->rf_access_read--;
8272 if (fp->rf_access_read == 0)
8273 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8274 fflags |= FREAD;
8275 }
8276
8277 /*
8278 * If the current mode has access write and the new mode
8279 * does not, decrement the number of access write mode bits
8280 * and if it goes to zero turn off the access write bit
8281 * on the file. set fflags to FWRITE for the call to
8282 * vn_open_downgrade().
8283 */
8284 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8285 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8286 fp->rf_access_write--;
8287 if (fp->rf_access_write == 0)
8288 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8289 fflags |= FWRITE;
8290 }
8291
8292 /* Check that the file is still accessible */
8293 ASSERT(fp->rf_share_access);
8294
8295 rfs4_dbe_unlock(fp->rf_dbe);
8296
8297 /* now set the new open access and deny modes */
8298 sp->rs_open_access = access;
8299 sp->rs_open_deny = deny;
8300
8301 /*
8302 * we successfully downgraded the share lock, now we need to downgrade
8303 * the open. it is possible that the downgrade was only for a deny
8304 * mode and we have nothing else to do.
8305 */
8306 if ((fflags & (FREAD|FWRITE)) != 0)
8307 vn_open_downgrade(cs->vp, fflags);
8308
8309 /* Update the stateid */
8310 next_stateid(&sp->rs_stateid);
8311 resp->open_stateid = sp->rs_stateid.stateid;
8312
8313 rfs4_dbe_unlock(sp->rs_dbe);
8314
8315 *cs->statusp = resp->status = NFS4_OK;
8316 /* Update the lease */
8317 rfs4_update_lease(sp->rs_owner->ro_client);
8318 /* And the sequence */
8319 rfs4_update_open_sequence(sp->rs_owner);
8320 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8321
8322 end:
8323 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8324 rfs4_state_rele(sp);
8325 out:
8326 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8327 OPEN_DOWNGRADE4res *, resp);
8328 }
8329
8330 static void *
memstr(const void * s1,const char * s2,size_t n)8331 memstr(const void *s1, const char *s2, size_t n)
8332 {
8333 size_t l = strlen(s2);
8334 char *p = (char *)s1;
8335
8336 while (n >= l) {
8337 if (bcmp(p, s2, l) == 0)
8338 return (p);
8339 p++;
8340 n--;
8341 }
8342
8343 return (NULL);
8344 }
8345
8346 /*
8347 * The logic behind this function is detailed in the NFSv4 RFC in the
8348 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8349 * that section for explicit guidance to server behavior for
8350 * SETCLIENTID.
8351 */
8352 void
rfs4_op_setclientid(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8353 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8354 struct svc_req *req, struct compound_state *cs)
8355 {
8356 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8357 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8358 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8359 rfs4_clntip_t *ci;
8360 bool_t create;
8361 char *addr, *netid;
8362 int len;
8363
8364 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8365 SETCLIENTID4args *, args);
8366 retry:
8367 newcp = cp_confirmed = cp_unconfirmed = NULL;
8368
8369 /*
8370 * Save the caller's IP address
8371 */
8372 args->client.cl_addr =
8373 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8374
8375 /*
8376 * Record if it is a Solaris client that cannot handle referrals.
8377 */
8378 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8379 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8380 /* Add a "yes, it's downrev" record */
8381 create = TRUE;
8382 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8383 ASSERT(ci != NULL);
8384 rfs4_dbe_rele(ci->ri_dbe);
8385 } else {
8386 /* Remove any previous record */
8387 rfs4_invalidate_clntip(args->client.cl_addr);
8388 }
8389
8390 /*
8391 * In search of an EXISTING client matching the incoming
8392 * request to establish a new client identifier at the server
8393 */
8394 create = TRUE;
8395 cp = rfs4_findclient(&args->client, &create, NULL);
8396
8397 /* Should never happen */
8398 ASSERT(cp != NULL);
8399
8400 if (cp == NULL) {
8401 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8402 goto out;
8403 }
8404
8405 /*
8406 * Easiest case. Client identifier is newly created and is
8407 * unconfirmed. Also note that for this case, no other
8408 * entries exist for the client identifier. Nothing else to
8409 * check. Just setup the response and respond.
8410 */
8411 if (create) {
8412 *cs->statusp = res->status = NFS4_OK;
8413 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8414 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8415 cp->rc_confirm_verf;
8416 /* Setup callback information; CB_NULL confirmation later */
8417 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8418
8419 rfs4_client_rele(cp);
8420 goto out;
8421 }
8422
8423 /*
8424 * An existing, confirmed client may exist but it may not have
8425 * been active for at least one lease period. If so, then
8426 * "close" the client and create a new client identifier
8427 */
8428 if (rfs4_lease_expired(cp)) {
8429 rfs4_client_close(cp);
8430 goto retry;
8431 }
8432
8433 if (cp->rc_need_confirm == TRUE)
8434 cp_unconfirmed = cp;
8435 else
8436 cp_confirmed = cp;
8437
8438 cp = NULL;
8439
8440 /*
8441 * We have a confirmed client, now check for an
8442 * unconfimred entry
8443 */
8444 if (cp_confirmed) {
8445 /* If creds don't match then client identifier is inuse */
8446 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8447 rfs4_cbinfo_t *cbp;
8448 /*
8449 * Some one else has established this client
8450 * id. Try and say * who they are. We will use
8451 * the call back address supplied by * the
8452 * first client.
8453 */
8454 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8455
8456 addr = netid = NULL;
8457
8458 cbp = &cp_confirmed->rc_cbinfo;
8459 if (cbp->cb_callback.cb_location.r_addr &&
8460 cbp->cb_callback.cb_location.r_netid) {
8461 cb_client4 *cbcp = &cbp->cb_callback;
8462
8463 len = strlen(cbcp->cb_location.r_addr)+1;
8464 addr = kmem_alloc(len, KM_SLEEP);
8465 bcopy(cbcp->cb_location.r_addr, addr, len);
8466 len = strlen(cbcp->cb_location.r_netid)+1;
8467 netid = kmem_alloc(len, KM_SLEEP);
8468 bcopy(cbcp->cb_location.r_netid, netid, len);
8469 }
8470
8471 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8472 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8473
8474 rfs4_client_rele(cp_confirmed);
8475 }
8476
8477 /*
8478 * Confirmed, creds match, and verifier matches; must
8479 * be an update of the callback info
8480 */
8481 if (cp_confirmed->rc_nfs_client.verifier ==
8482 args->client.verifier) {
8483 /* Setup callback information */
8484 rfs4_client_setcb(cp_confirmed, &args->callback,
8485 args->callback_ident);
8486
8487 /* everything okay -- move ahead */
8488 *cs->statusp = res->status = NFS4_OK;
8489 res->SETCLIENTID4res_u.resok4.clientid =
8490 cp_confirmed->rc_clientid;
8491
8492 /* update the confirm_verifier and return it */
8493 rfs4_client_scv_next(cp_confirmed);
8494 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8495 cp_confirmed->rc_confirm_verf;
8496
8497 rfs4_client_rele(cp_confirmed);
8498 goto out;
8499 }
8500
8501 /*
8502 * Creds match but the verifier doesn't. Must search
8503 * for an unconfirmed client that would be replaced by
8504 * this request.
8505 */
8506 create = FALSE;
8507 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8508 cp_confirmed);
8509 }
8510
8511 /*
8512 * At this point, we have taken care of the brand new client
8513 * struct, INUSE case, update of an existing, and confirmed
8514 * client struct.
8515 */
8516
8517 /*
8518 * check to see if things have changed while we originally
8519 * picked up the client struct. If they have, then return and
8520 * retry the processing of this SETCLIENTID request.
8521 */
8522 if (cp_unconfirmed) {
8523 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8524 if (!cp_unconfirmed->rc_need_confirm) {
8525 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8526 rfs4_client_rele(cp_unconfirmed);
8527 if (cp_confirmed)
8528 rfs4_client_rele(cp_confirmed);
8529 goto retry;
8530 }
8531 /* do away with the old unconfirmed one */
8532 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8533 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8534 rfs4_client_rele(cp_unconfirmed);
8535 cp_unconfirmed = NULL;
8536 }
8537
8538 /*
8539 * This search will temporarily hide the confirmed client
8540 * struct while a new client struct is created as the
8541 * unconfirmed one.
8542 */
8543 create = TRUE;
8544 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8545
8546 ASSERT(newcp != NULL);
8547
8548 if (newcp == NULL) {
8549 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8550 rfs4_client_rele(cp_confirmed);
8551 goto out;
8552 }
8553
8554 /*
8555 * If one was not created, then a similar request must be in
8556 * process so release and start over with this one
8557 */
8558 if (create != TRUE) {
8559 rfs4_client_rele(newcp);
8560 if (cp_confirmed)
8561 rfs4_client_rele(cp_confirmed);
8562 goto retry;
8563 }
8564
8565 *cs->statusp = res->status = NFS4_OK;
8566 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8567 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8568 newcp->rc_confirm_verf;
8569 /* Setup callback information; CB_NULL confirmation later */
8570 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8571
8572 newcp->rc_cp_confirmed = cp_confirmed;
8573
8574 rfs4_client_rele(newcp);
8575
8576 out:
8577 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8578 SETCLIENTID4res *, res);
8579 }
8580
8581 /*ARGSUSED*/
8582 void
rfs4_op_setclientid_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8583 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8584 struct svc_req *req, struct compound_state *cs)
8585 {
8586 SETCLIENTID_CONFIRM4args *args =
8587 &argop->nfs_argop4_u.opsetclientid_confirm;
8588 SETCLIENTID_CONFIRM4res *res =
8589 &resop->nfs_resop4_u.opsetclientid_confirm;
8590 rfs4_client_t *cp, *cptoclose = NULL;
8591 nfs4_srv_t *nsrv4;
8592
8593 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8594 struct compound_state *, cs,
8595 SETCLIENTID_CONFIRM4args *, args);
8596
8597 nsrv4 = nfs4_get_srv();
8598 *cs->statusp = res->status = NFS4_OK;
8599
8600 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8601
8602 if (cp == NULL) {
8603 *cs->statusp = res->status =
8604 rfs4_check_clientid(&args->clientid, 1);
8605 goto out;
8606 }
8607
8608 if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8609 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8610 rfs4_client_rele(cp);
8611 goto out;
8612 }
8613
8614 /* If the verifier doesn't match, the record doesn't match */
8615 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8616 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8617 rfs4_client_rele(cp);
8618 goto out;
8619 }
8620
8621 rfs4_dbe_lock(cp->rc_dbe);
8622 cp->rc_need_confirm = FALSE;
8623 if (cp->rc_cp_confirmed) {
8624 cptoclose = cp->rc_cp_confirmed;
8625 cptoclose->rc_ss_remove = 1;
8626 cp->rc_cp_confirmed = NULL;
8627 }
8628
8629 /*
8630 * Update the client's associated server instance, if it's changed
8631 * since the client was created.
8632 */
8633 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8634 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8635
8636 /*
8637 * Record clientid in stable storage.
8638 * Must be done after server instance has been assigned.
8639 */
8640 rfs4_ss_clid(nsrv4, cp);
8641
8642 rfs4_dbe_unlock(cp->rc_dbe);
8643
8644 if (cptoclose)
8645 /* don't need to rele, client_close does it */
8646 rfs4_client_close(cptoclose);
8647
8648 /* If needed, initiate CB_NULL call for callback path */
8649 rfs4_deleg_cb_check(cp);
8650 rfs4_update_lease(cp);
8651
8652 /*
8653 * Check to see if client can perform reclaims
8654 */
8655 rfs4_ss_chkclid(nsrv4, cp);
8656
8657 rfs4_client_rele(cp);
8658
8659 out:
8660 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8661 struct compound_state *, cs,
8662 SETCLIENTID_CONFIRM4 *, res);
8663 }
8664
8665 extern stateid4 invalid_stateid;
8666
8667 /*ARGSUSED*/
8668 void
rfs4_op_close(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8669 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8670 struct svc_req *req, struct compound_state *cs)
8671 {
8672 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8673 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8674 rfs4_state_t *sp;
8675 nfsstat4 status;
8676
8677 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8678 CLOSE4args *, args);
8679
8680 if (cs->vp == NULL) {
8681 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8682 goto out;
8683 }
8684
8685 get_stateid4(cs, &args->open_stateid);
8686
8687 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8688 if (status != NFS4_OK) {
8689 *cs->statusp = resp->status = status;
8690 goto out;
8691 }
8692
8693 /* Ensure specified filehandle matches */
8694 if (cs->vp != sp->rs_finfo->rf_vp) {
8695 rfs4_state_rele(sp);
8696 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8697 goto out;
8698 }
8699
8700 /* hold off other access to open_owner while we tinker */
8701 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8702
8703 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8704 case NFS4_CHECK_STATEID_OKAY:
8705 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8706 resop, cs) != NFS4_CHKSEQ_OKAY) {
8707 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8708 goto end;
8709 }
8710 break;
8711 case NFS4_CHECK_STATEID_OLD:
8712 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8713 goto end;
8714 case NFS4_CHECK_STATEID_BAD:
8715 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8716 goto end;
8717 case NFS4_CHECK_STATEID_EXPIRED:
8718 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8719 goto end;
8720 case NFS4_CHECK_STATEID_CLOSED:
8721 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8722 goto end;
8723 case NFS4_CHECK_STATEID_UNCONFIRMED:
8724 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8725 goto end;
8726 case NFS4_CHECK_STATEID_REPLAY:
8727 ASSERT(!rfs4_has_session(cs));
8728
8729 /* Check the sequence id for the open owner */
8730 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8731 resop, cs)) {
8732 case NFS4_CHKSEQ_OKAY:
8733 /*
8734 * This is replayed stateid; if seqid matches
8735 * next expected, then client is using wrong seqid.
8736 */
8737 /* FALL THROUGH */
8738 case NFS4_CHKSEQ_BAD:
8739 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8740 goto end;
8741 case NFS4_CHKSEQ_REPLAY:
8742 /*
8743 * Note this case is the duplicate case so
8744 * resp->status is already set.
8745 */
8746 *cs->statusp = resp->status;
8747 rfs4_update_lease(sp->rs_owner->ro_client);
8748 goto end;
8749 }
8750 break;
8751 default:
8752 ASSERT(FALSE);
8753 break;
8754 }
8755
8756 rfs4_dbe_lock(sp->rs_dbe);
8757
8758 /* Update the stateid. */
8759 next_stateid(&sp->rs_stateid);
8760 rfs4_dbe_unlock(sp->rs_dbe);
8761
8762 rfs4_update_lease(sp->rs_owner->ro_client);
8763 rfs4_update_open_sequence(sp->rs_owner);
8764 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8765
8766 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8767
8768 /* See RFC8881 section 18.2.4, and RFC7530 section 16.2.5 */
8769 resp->open_stateid = invalid_stateid;
8770 *cs->statusp = resp->status = status;
8771
8772 end:
8773 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8774 rfs4_state_rele(sp);
8775 out:
8776 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8777 CLOSE4res *, resp);
8778 }
8779
8780 /*
8781 * Manage the counts on the file struct and close all file locks
8782 */
8783 /*ARGSUSED*/
8784 void
rfs4_release_share_lock_state(rfs4_state_t * sp,cred_t * cr,bool_t close_of_client)8785 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8786 bool_t close_of_client)
8787 {
8788 rfs4_file_t *fp = sp->rs_finfo;
8789 rfs4_lo_state_t *lsp;
8790 int fflags = 0;
8791
8792 /*
8793 * If this call is part of the larger closing down of client
8794 * state then it is just easier to release all locks
8795 * associated with this client instead of going through each
8796 * individual file and cleaning locks there.
8797 */
8798 if (close_of_client) {
8799 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8800 !list_is_empty(&sp->rs_lostatelist) &&
8801 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8802 /* Is the PxFS kernel module loaded? */
8803 if (lm_remove_file_locks != NULL) {
8804 int new_sysid;
8805
8806 /* Encode the cluster nodeid in new sysid */
8807 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8808 lm_set_nlmid_flk(&new_sysid);
8809
8810 /*
8811 * This PxFS routine removes file locks for a
8812 * client over all nodes of a cluster.
8813 */
8814 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8815 "lm_remove_file_locks(sysid=0x%x)\n",
8816 new_sysid));
8817 (*lm_remove_file_locks)(new_sysid);
8818 } else {
8819 struct flock64 flk;
8820
8821 /* Release all locks for this client */
8822 flk.l_type = F_UNLKSYS;
8823 flk.l_whence = 0;
8824 flk.l_start = 0;
8825 flk.l_len = 0;
8826 flk.l_sysid =
8827 sp->rs_owner->ro_client->rc_sysidt;
8828 flk.l_pid = 0;
8829 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8830 &flk, F_REMOTELOCK | FREAD | FWRITE,
8831 (u_offset_t)0, NULL, CRED(), NULL);
8832 }
8833
8834 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8835 }
8836 }
8837
8838 /*
8839 * Release all locks on this file by this lock owner or at
8840 * least mark the locks as having been released
8841 */
8842 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8843 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8844 lsp->rls_locks_cleaned = TRUE;
8845
8846 /* Was this already taken care of above? */
8847 if (!close_of_client &&
8848 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8849 (void) cleanlocks(sp->rs_finfo->rf_vp,
8850 lsp->rls_locker->rl_pid,
8851 lsp->rls_locker->rl_client->rc_sysidt);
8852 }
8853
8854 /*
8855 * Release any shrlocks associated with this open state ID.
8856 * This must be done before the rfs4_state gets marked closed.
8857 */
8858 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8859 (void) rfs4_unshare(sp);
8860
8861 if (sp->rs_open_access) {
8862 rfs4_dbe_lock(fp->rf_dbe);
8863
8864 /*
8865 * Decrement the count for each access and deny bit that this
8866 * state has contributed to the file.
8867 * If the file counts go to zero
8868 * clear the appropriate bit in the appropriate mask.
8869 */
8870 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8871 fp->rf_access_read--;
8872 fflags |= FREAD;
8873 if (fp->rf_access_read == 0)
8874 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8875 }
8876 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8877 fp->rf_access_write--;
8878 fflags |= FWRITE;
8879 if (fp->rf_access_write == 0)
8880 fp->rf_share_access &=
8881 ~OPEN4_SHARE_ACCESS_WRITE;
8882 }
8883 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8884 fp->rf_deny_read--;
8885 if (fp->rf_deny_read == 0)
8886 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8887 }
8888 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8889 fp->rf_deny_write--;
8890 if (fp->rf_deny_write == 0)
8891 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8892 }
8893
8894 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8895
8896 rfs4_dbe_unlock(fp->rf_dbe);
8897
8898 sp->rs_open_access = 0;
8899 sp->rs_open_deny = 0;
8900 }
8901 }
8902
8903 /*
8904 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8905 */
8906 static nfsstat4
lock_denied(LOCK4denied * dp,struct flock64 * flk)8907 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8908 {
8909 rfs4_lockowner_t *lo;
8910 rfs4_client_t *cp;
8911 uint32_t len;
8912
8913 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8914 if (lo != NULL) {
8915 cp = lo->rl_client;
8916 if (rfs4_lease_expired(cp)) {
8917 rfs4_lockowner_rele(lo);
8918 rfs4_dbe_hold(cp->rc_dbe);
8919 rfs4_client_close(cp);
8920 return (NFS4ERR_EXPIRED);
8921 }
8922 dp->owner.clientid = lo->rl_owner.clientid;
8923 len = lo->rl_owner.owner_len;
8924 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8925 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8926 dp->owner.owner_len = len;
8927 rfs4_lockowner_rele(lo);
8928 goto finish;
8929 }
8930
8931 /*
8932 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8933 * of the client id contain the boot time for a NFS4 lock. So we
8934 * fabricate and identity by setting clientid to the sysid, and
8935 * the lock owner to the pid.
8936 */
8937 dp->owner.clientid = flk->l_sysid;
8938 len = sizeof (pid_t);
8939 dp->owner.owner_len = len;
8940 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8941 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8942 finish:
8943 dp->offset = flk->l_start;
8944 dp->length = flk->l_len;
8945
8946 if (flk->l_type == F_RDLCK)
8947 dp->locktype = READ_LT;
8948 else if (flk->l_type == F_WRLCK)
8949 dp->locktype = WRITE_LT;
8950 else
8951 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8952
8953 return (NFS4_OK);
8954 }
8955
8956 /*
8957 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8958 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8959 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8960 * for that (obviously); they are sending the LOCK requests with some delays
8961 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8962 * locking and delay implementation at the client side.
8963 *
8964 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8965 * fast retries on its own (the for loop below) in a hope the lock will be
8966 * available soon. And if not, the client won't need to resend the LOCK
8967 * requests so fast to check the lock availability. This basically saves some
8968 * network traffic and tries to make sure the client gets the lock ASAP.
8969 */
8970 static int
setlock(vnode_t * vp,struct flock64 * flock,int flag,cred_t * cred)8971 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8972 {
8973 int error;
8974 struct flock64 flk;
8975 int i;
8976 clock_t delaytime;
8977 int cmd;
8978 int spin_cnt = 0;
8979
8980 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8981 retry:
8982 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8983
8984 for (i = 0; i < rfs4_maxlock_tries; i++) {
8985 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8986 error = VOP_FRLOCK(vp, cmd,
8987 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8988
8989 if (error != EAGAIN && error != EACCES)
8990 break;
8991
8992 if (i < rfs4_maxlock_tries - 1) {
8993 delay(delaytime);
8994 delaytime *= 2;
8995 }
8996 }
8997
8998 if (error == EAGAIN || error == EACCES) {
8999 /* Get the owner of the lock */
9000 flk = *flock;
9001 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
9002 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
9003 NULL) == 0) {
9004 /*
9005 * There's a race inherent in the current VOP_FRLOCK
9006 * design where:
9007 * a: "other guy" takes a lock that conflicts with a
9008 * lock we want
9009 * b: we attempt to take our lock (non-blocking) and
9010 * the attempt fails.
9011 * c: "other guy" releases the conflicting lock
9012 * d: we ask what lock conflicts with the lock we want,
9013 * getting F_UNLCK (no lock blocks us)
9014 *
9015 * If we retry the non-blocking lock attempt in this
9016 * case (restart at step 'b') there's some possibility
9017 * that many such attempts might fail. However a test
9018 * designed to actually provoke this race shows that
9019 * the vast majority of cases require no retry, and
9020 * only a few took as many as three retries. Here's
9021 * the test outcome:
9022 *
9023 * number of retries how many times we needed
9024 * that many retries
9025 * 0 79461
9026 * 1 862
9027 * 2 49
9028 * 3 5
9029 *
9030 * Given those empirical results, we arbitrarily limit
9031 * the retry count to ten.
9032 *
9033 * If we actually make to ten retries and give up,
9034 * nothing catastrophic happens, but we're unable to
9035 * return the information about the conflicting lock to
9036 * the NFS client. That's an acceptable trade off vs.
9037 * letting this retry loop run forever.
9038 */
9039 if (flk.l_type == F_UNLCK) {
9040 if (spin_cnt++ < 10) {
9041 /* No longer locked, retry */
9042 goto retry;
9043 }
9044 } else {
9045 *flock = flk;
9046 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
9047 F_GETLK, &flk);
9048 }
9049 }
9050 }
9051
9052 return (error);
9053 }
9054
9055 /*ARGSUSED*/
9056 static nfsstat4
rfs4_do_lock(rfs4_lo_state_t * lsp,nfs_lock_type4 locktype,offset4 offset,length4 length,cred_t * cred,nfs_resop4 * resop)9057 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
9058 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
9059 {
9060 nfsstat4 status;
9061 rfs4_lockowner_t *lo = lsp->rls_locker;
9062 rfs4_state_t *sp = lsp->rls_state;
9063 struct flock64 flock;
9064 int16_t ltype;
9065 int flag;
9066 int error;
9067 sysid_t sysid;
9068 LOCK4res *lres;
9069 vnode_t *vp;
9070
9071 if (rfs4_lease_expired(lo->rl_client)) {
9072 return (NFS4ERR_EXPIRED);
9073 }
9074
9075 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9076 return (status);
9077
9078 /* Check for zero length. To lock to end of file use all ones for V4 */
9079 if (length == 0)
9080 return (NFS4ERR_INVAL);
9081 else if (length == (length4)(~0))
9082 length = 0; /* Posix to end of file */
9083
9084 retry:
9085 rfs4_dbe_lock(sp->rs_dbe);
9086 if (sp->rs_closed == TRUE) {
9087 rfs4_dbe_unlock(sp->rs_dbe);
9088 return (NFS4ERR_OLD_STATEID);
9089 }
9090
9091 if (resop->resop != OP_LOCKU) {
9092 switch (locktype) {
9093 case READ_LT:
9094 case READW_LT:
9095 if ((sp->rs_share_access
9096 & OPEN4_SHARE_ACCESS_READ) == 0) {
9097 rfs4_dbe_unlock(sp->rs_dbe);
9098
9099 return (NFS4ERR_OPENMODE);
9100 }
9101 ltype = F_RDLCK;
9102 break;
9103 case WRITE_LT:
9104 case WRITEW_LT:
9105 if ((sp->rs_share_access
9106 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
9107 rfs4_dbe_unlock(sp->rs_dbe);
9108
9109 return (NFS4ERR_OPENMODE);
9110 }
9111 ltype = F_WRLCK;
9112 break;
9113 }
9114 } else
9115 ltype = F_UNLCK;
9116
9117 flock.l_type = ltype;
9118 flock.l_whence = 0; /* SEEK_SET */
9119 flock.l_start = offset;
9120 flock.l_len = length;
9121 flock.l_sysid = sysid;
9122 flock.l_pid = lsp->rls_locker->rl_pid;
9123
9124 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9125 if (flock.l_len < 0 || flock.l_start < 0) {
9126 rfs4_dbe_unlock(sp->rs_dbe);
9127 return (NFS4ERR_INVAL);
9128 }
9129
9130 /*
9131 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9132 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9133 */
9134 flag = (int)sp->rs_share_access | F_REMOTELOCK;
9135
9136 vp = sp->rs_finfo->rf_vp;
9137 VN_HOLD(vp);
9138
9139 /*
9140 * We need to unlock sp before we call the underlying filesystem to
9141 * acquire the file lock.
9142 */
9143 rfs4_dbe_unlock(sp->rs_dbe);
9144
9145 error = setlock(vp, &flock, flag, cred);
9146
9147 /*
9148 * Make sure the file is still open. In a case the file was closed in
9149 * the meantime, clean the lock we acquired using the setlock() call
9150 * above, and return the appropriate error.
9151 */
9152 rfs4_dbe_lock(sp->rs_dbe);
9153 if (sp->rs_closed == TRUE) {
9154 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9155 rfs4_dbe_unlock(sp->rs_dbe);
9156
9157 VN_RELE(vp);
9158
9159 return (NFS4ERR_OLD_STATEID);
9160 }
9161 rfs4_dbe_unlock(sp->rs_dbe);
9162
9163 VN_RELE(vp);
9164
9165 if (error == 0) {
9166 rfs4_dbe_lock(lsp->rls_dbe);
9167 next_stateid(&lsp->rls_lockid);
9168 rfs4_dbe_unlock(lsp->rls_dbe);
9169 }
9170
9171 /*
9172 * N.B. We map error values to nfsv4 errors. This is differrent
9173 * than puterrno4 routine.
9174 */
9175 switch (error) {
9176 case 0:
9177 status = NFS4_OK;
9178 break;
9179 case EAGAIN:
9180 case EACCES: /* Old value */
9181 /* Can only get here if op is OP_LOCK */
9182 ASSERT(resop->resop == OP_LOCK);
9183 lres = &resop->nfs_resop4_u.oplock;
9184 status = NFS4ERR_DENIED;
9185 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9186 == NFS4ERR_EXPIRED)
9187 goto retry;
9188 break;
9189 case ENOLCK:
9190 status = NFS4ERR_DELAY;
9191 break;
9192 case EOVERFLOW:
9193 status = NFS4ERR_INVAL;
9194 break;
9195 case EINVAL:
9196 status = NFS4ERR_NOTSUPP;
9197 break;
9198 default:
9199 status = NFS4ERR_SERVERFAULT;
9200 break;
9201 }
9202
9203 return (status);
9204 }
9205
9206 /*ARGSUSED*/
9207 void
rfs4_op_lock(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9208 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9209 struct svc_req *req, struct compound_state *cs)
9210 {
9211 LOCK4args *args = &argop->nfs_argop4_u.oplock;
9212 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9213 nfsstat4 status;
9214 stateid4 *stateid;
9215 rfs4_lockowner_t *lo;
9216 rfs4_client_t *cp;
9217 rfs4_state_t *sp = NULL;
9218 rfs4_lo_state_t *lsp = NULL;
9219 bool_t ls_sw_held = FALSE;
9220 bool_t create = TRUE;
9221 bool_t lcreate = TRUE;
9222 bool_t dup_lock = FALSE;
9223 int rc;
9224
9225 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9226 LOCK4args *, args);
9227
9228 if (cs->vp == NULL) {
9229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9230 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9231 cs, LOCK4res *, resp);
9232 return;
9233 }
9234
9235 if (args->locker.new_lock_owner) {
9236 /* Create a new lockowner for this instance */
9237 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9238
9239 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9240
9241 stateid = &olo->open_stateid;
9242 get_stateid4(cs, stateid);
9243 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9244 if (status != NFS4_OK) {
9245 NFS4_DEBUG(rfs4_debug,
9246 (CE_NOTE, "Get state failed in lock %d", status));
9247 *cs->statusp = resp->status = status;
9248 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9249 cs, LOCK4res *, resp);
9250 return;
9251 }
9252
9253 /* Ensure specified filehandle matches */
9254 if (cs->vp != sp->rs_finfo->rf_vp) {
9255 rfs4_state_rele(sp);
9256 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9257 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9258 cs, LOCK4res *, resp);
9259 return;
9260 }
9261
9262 /* hold off other access to open_owner while we tinker */
9263 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9264
9265 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9266 case NFS4_CHECK_STATEID_OLD:
9267 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9268 goto end;
9269 case NFS4_CHECK_STATEID_BAD:
9270 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9271 goto end;
9272 case NFS4_CHECK_STATEID_EXPIRED:
9273 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9274 goto end;
9275 case NFS4_CHECK_STATEID_UNCONFIRMED:
9276 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9277 goto end;
9278 case NFS4_CHECK_STATEID_CLOSED:
9279 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9280 goto end;
9281 case NFS4_CHECK_STATEID_OKAY:
9282 if (rfs4_has_session(cs))
9283 break;
9284 /* FALLTHROUGH */
9285 case NFS4_CHECK_STATEID_REPLAY:
9286 ASSERT(!rfs4_has_session(cs));
9287
9288 switch (rfs4_check_olo_seqid(olo->open_seqid,
9289 sp->rs_owner, resop)) {
9290 case NFS4_CHKSEQ_OKAY:
9291 if (rc == NFS4_CHECK_STATEID_OKAY)
9292 break;
9293 /*
9294 * This is replayed stateid; if seqid
9295 * matches next expected, then client
9296 * is using wrong seqid.
9297 */
9298 /* FALLTHROUGH */
9299 case NFS4_CHKSEQ_BAD:
9300 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9301 goto end;
9302 case NFS4_CHKSEQ_REPLAY:
9303 /* This is a duplicate LOCK request */
9304 dup_lock = TRUE;
9305
9306 /*
9307 * For a duplicate we do not want to
9308 * create a new lockowner as it should
9309 * already exist.
9310 * Turn off the lockowner create flag.
9311 */
9312 lcreate = FALSE;
9313 }
9314 break;
9315 }
9316
9317 /*
9318 * See RFC 8881 18.10.3. MUST be ignored by the server:
9319 * The clientid field of the lock_owner field of the
9320 * open_owner field (locker.open_owner.lock_owner.clientid).
9321 */
9322 if (rfs4_has_session(cs))
9323 olo->lock_owner.clientid = cs->client->rc_clientid;
9324
9325 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9326 if (lo == NULL) {
9327 NFS4_DEBUG(rfs4_debug,
9328 (CE_NOTE, "rfs4_op_lock: no lock owner"));
9329 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9330 goto end;
9331 }
9332
9333 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9334 if (lsp == NULL) {
9335 rfs4_update_lease(sp->rs_owner->ro_client);
9336 /*
9337 * Only update theh open_seqid if this is not
9338 * a duplicate request
9339 */
9340 if (dup_lock == FALSE) {
9341 rfs4_update_open_sequence(sp->rs_owner);
9342 }
9343
9344 NFS4_DEBUG(rfs4_debug,
9345 (CE_NOTE, "rfs4_op_lock: no state"));
9346 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9347 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9348 rfs4_lockowner_rele(lo);
9349 goto end;
9350 }
9351
9352 /*
9353 * This is the new_lock_owner branch and the client is
9354 * supposed to be associating a new lock_owner with
9355 * the open file at this point. If we find that a
9356 * lock_owner/state association already exists and a
9357 * successful LOCK request was returned to the client,
9358 * an error is returned to the client since this is
9359 * not appropriate. The client should be using the
9360 * existing lock_owner branch.
9361 */
9362 if (!rfs4_has_session(cs) && !dup_lock && !create) {
9363 if (lsp->rls_lock_completed == TRUE) {
9364 *cs->statusp =
9365 resp->status = NFS4ERR_BAD_SEQID;
9366 rfs4_lockowner_rele(lo);
9367 goto end;
9368 }
9369 }
9370
9371 rfs4_update_lease(sp->rs_owner->ro_client);
9372
9373 /*
9374 * Only update theh open_seqid if this is not
9375 * a duplicate request
9376 */
9377 if (dup_lock == FALSE) {
9378 rfs4_update_open_sequence(sp->rs_owner);
9379 }
9380
9381 /*
9382 * If this is a duplicate lock request, just copy the
9383 * previously saved reply and return.
9384 */
9385 if (dup_lock == TRUE) {
9386 /* verify that lock_seqid's match */
9387 if (lsp->rls_seqid != olo->lock_seqid) {
9388 NFS4_DEBUG(rfs4_debug,
9389 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9390 "lsp->seqid=%d old->seqid=%d",
9391 lsp->rls_seqid, olo->lock_seqid));
9392 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9393 } else {
9394 rfs4_copy_reply(resop, &lsp->rls_reply);
9395 /*
9396 * Make sure to copy the just
9397 * retrieved reply status into the
9398 * overall compound status
9399 */
9400 *cs->statusp = resp->status;
9401 }
9402 rfs4_lockowner_rele(lo);
9403 goto end;
9404 }
9405
9406 rfs4_dbe_lock(lsp->rls_dbe);
9407
9408 /* Make sure to update the lock sequence id */
9409 lsp->rls_seqid = olo->lock_seqid;
9410
9411 NFS4_DEBUG(rfs4_debug,
9412 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9413
9414 /*
9415 * This is used to signify the newly created lockowner
9416 * stateid and its sequence number. The checks for
9417 * sequence number and increment don't occur on the
9418 * very first lock request for a lockowner.
9419 */
9420 lsp->rls_skip_seqid_check = TRUE;
9421
9422 /* hold off other access to lsp while we tinker */
9423 rfs4_sw_enter(&lsp->rls_sw);
9424 ls_sw_held = TRUE;
9425
9426 rfs4_dbe_unlock(lsp->rls_dbe);
9427
9428 rfs4_lockowner_rele(lo);
9429 } else {
9430 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9431 /* get lsp and hold the lock on the underlying file struct */
9432 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9433 != NFS4_OK) {
9434 *cs->statusp = resp->status = status;
9435 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9436 cs, LOCK4res *, resp);
9437 return;
9438 }
9439 create = FALSE; /* We didn't create lsp */
9440
9441 /* Ensure specified filehandle matches */
9442 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9443 rfs4_lo_state_rele(lsp, TRUE);
9444 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9445 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9446 cs, LOCK4res *, resp);
9447 return;
9448 }
9449
9450 /* hold off other access to lsp while we tinker */
9451 rfs4_sw_enter(&lsp->rls_sw);
9452 ls_sw_held = TRUE;
9453
9454 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9455 /*
9456 * The stateid looks like it was okay (expected to be
9457 * the next one)
9458 */
9459 case NFS4_CHECK_STATEID_OKAY:
9460 if (rfs4_has_session(cs))
9461 break;
9462
9463 /*
9464 * The sequence id is now checked. Determine
9465 * if this is a replay or if it is in the
9466 * expected (next) sequence. In the case of a
9467 * replay, there are two replay conditions
9468 * that may occur. The first is the normal
9469 * condition where a LOCK is done with a
9470 * NFS4_OK response and the stateid is
9471 * updated. That case is handled below when
9472 * the stateid is identified as a REPLAY. The
9473 * second is the case where an error is
9474 * returned, like NFS4ERR_DENIED, and the
9475 * sequence number is updated but the stateid
9476 * is not updated. This second case is dealt
9477 * with here. So it may seem odd that the
9478 * stateid is okay but the sequence id is a
9479 * replay but it is okay.
9480 */
9481 switch (rfs4_check_lock_seqid(
9482 args->locker.locker4_u.lock_owner.lock_seqid,
9483 lsp, resop)) {
9484 case NFS4_CHKSEQ_REPLAY:
9485 if (resp->status != NFS4_OK) {
9486 /*
9487 * Here is our replay and need
9488 * to verify that the last
9489 * response was an error.
9490 */
9491 *cs->statusp = resp->status;
9492 goto end;
9493 }
9494 /*
9495 * This is done since the sequence id
9496 * looked like a replay but it didn't
9497 * pass our check so a BAD_SEQID is
9498 * returned as a result.
9499 */
9500 /*FALLTHROUGH*/
9501 case NFS4_CHKSEQ_BAD:
9502 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9503 goto end;
9504 case NFS4_CHKSEQ_OKAY:
9505 /* Everything looks okay move ahead */
9506 break;
9507 }
9508 break;
9509 case NFS4_CHECK_STATEID_OLD:
9510 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9511 goto end;
9512 case NFS4_CHECK_STATEID_BAD:
9513 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9514 goto end;
9515 case NFS4_CHECK_STATEID_EXPIRED:
9516 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9517 goto end;
9518 case NFS4_CHECK_STATEID_CLOSED:
9519 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9520 goto end;
9521 case NFS4_CHECK_STATEID_REPLAY:
9522 ASSERT(!rfs4_has_session(cs));
9523
9524 switch (rfs4_check_lock_seqid(
9525 args->locker.locker4_u.lock_owner.lock_seqid,
9526 lsp, resop)) {
9527 case NFS4_CHKSEQ_OKAY:
9528 /*
9529 * This is a replayed stateid; if
9530 * seqid matches the next expected,
9531 * then client is using wrong seqid.
9532 */
9533 case NFS4_CHKSEQ_BAD:
9534 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9535 goto end;
9536 case NFS4_CHKSEQ_REPLAY:
9537 rfs4_update_lease(lsp->rls_locker->rl_client);
9538 *cs->statusp = status = resp->status;
9539 goto end;
9540 }
9541 break;
9542 default:
9543 ASSERT(FALSE);
9544 break;
9545 }
9546
9547 rfs4_update_lock_sequence(lsp);
9548 rfs4_update_lease(lsp->rls_locker->rl_client);
9549 }
9550
9551 /*
9552 * NFS4 only allows locking on regular files, so
9553 * verify type of object.
9554 */
9555 if (cs->vp->v_type != VREG) {
9556 if (cs->vp->v_type == VDIR)
9557 status = NFS4ERR_ISDIR;
9558 else
9559 status = NFS4ERR_INVAL;
9560 goto out;
9561 }
9562
9563 cp = lsp->rls_state->rs_owner->ro_client;
9564
9565 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9566 status = NFS4ERR_GRACE;
9567 goto out;
9568 }
9569
9570 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9571 status = NFS4ERR_NO_GRACE;
9572 goto out;
9573 }
9574
9575 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9576 status = NFS4ERR_NO_GRACE;
9577 goto out;
9578 }
9579
9580 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9581 cs->deleg = TRUE;
9582
9583 status = rfs4_do_lock(lsp, args->locktype,
9584 args->offset, args->length, cs->cr, resop);
9585
9586 out:
9587 lsp->rls_skip_seqid_check = FALSE;
9588
9589 *cs->statusp = resp->status = status;
9590
9591 if (status == NFS4_OK) {
9592 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9593 lsp->rls_lock_completed = TRUE;
9594
9595 put_stateid4(cs, &resp->LOCK4res_u.lock_stateid);
9596 }
9597 /*
9598 * Only update the "OPEN" response here if this was a new
9599 * lock_owner
9600 */
9601 if (sp)
9602 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9603
9604 rfs4_update_lock_resp(lsp, resop);
9605
9606 end:
9607 if (lsp) {
9608 if (ls_sw_held)
9609 rfs4_sw_exit(&lsp->rls_sw);
9610 /*
9611 * If an sp obtained, then the lsp does not represent
9612 * a lock on the file struct.
9613 */
9614 if (sp != NULL)
9615 rfs4_lo_state_rele(lsp, FALSE);
9616 else
9617 rfs4_lo_state_rele(lsp, TRUE);
9618 }
9619 if (sp) {
9620 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9621 rfs4_state_rele(sp);
9622 }
9623
9624 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9625 LOCK4res *, resp);
9626 }
9627
9628 /* free function for LOCK/LOCKT */
9629 static void
lock_denied_free(nfs_resop4 * resop)9630 lock_denied_free(nfs_resop4 *resop)
9631 {
9632 LOCK4denied *dp = NULL;
9633
9634 switch (resop->resop) {
9635 case OP_LOCK:
9636 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9637 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9638 break;
9639 case OP_LOCKT:
9640 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9641 dp = &resop->nfs_resop4_u.oplockt.denied;
9642 break;
9643 default:
9644 break;
9645 }
9646
9647 if (dp)
9648 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9649 }
9650
9651 /*ARGSUSED*/
9652 void
rfs4_op_locku(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9653 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9654 struct svc_req *req, struct compound_state *cs)
9655 {
9656 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9657 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9658 nfsstat4 status;
9659 stateid4 *stateid = &args->lock_stateid;
9660 rfs4_lo_state_t *lsp;
9661
9662 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9663 LOCKU4args *, args);
9664
9665 if (cs->vp == NULL) {
9666 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9667 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9668 LOCKU4res *, resp);
9669 return;
9670 }
9671
9672 get_stateid4(cs, stateid);
9673
9674 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9675 *cs->statusp = resp->status = status;
9676 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9677 LOCKU4res *, resp);
9678 return;
9679 }
9680
9681 /* Ensure specified filehandle matches */
9682 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9683 rfs4_lo_state_rele(lsp, TRUE);
9684 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9685 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9686 LOCKU4res *, resp);
9687 return;
9688 }
9689
9690 /* hold off other access to lsp while we tinker */
9691 rfs4_sw_enter(&lsp->rls_sw);
9692
9693 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9694 case NFS4_CHECK_STATEID_OKAY:
9695 if (rfs4_has_session(cs))
9696 break;
9697
9698 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9699 != NFS4_CHKSEQ_OKAY) {
9700 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9701 goto end;
9702 }
9703 break;
9704 case NFS4_CHECK_STATEID_OLD:
9705 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9706 goto end;
9707 case NFS4_CHECK_STATEID_BAD:
9708 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9709 goto end;
9710 case NFS4_CHECK_STATEID_EXPIRED:
9711 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9712 goto end;
9713 case NFS4_CHECK_STATEID_CLOSED:
9714 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9715 goto end;
9716 case NFS4_CHECK_STATEID_REPLAY:
9717 ASSERT(!rfs4_has_session(cs));
9718
9719 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9720 case NFS4_CHKSEQ_OKAY:
9721 /*
9722 * This is a replayed stateid; if
9723 * seqid matches the next expected,
9724 * then client is using wrong seqid.
9725 */
9726 case NFS4_CHKSEQ_BAD:
9727 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9728 goto end;
9729 case NFS4_CHKSEQ_REPLAY:
9730 rfs4_update_lease(lsp->rls_locker->rl_client);
9731 *cs->statusp = status = resp->status;
9732 goto end;
9733 }
9734 break;
9735 default:
9736 ASSERT(FALSE);
9737 break;
9738 }
9739
9740 rfs4_update_lock_sequence(lsp);
9741 rfs4_update_lease(lsp->rls_locker->rl_client);
9742
9743 /*
9744 * NFS4 only allows locking on regular files, so
9745 * verify type of object.
9746 */
9747 if (cs->vp->v_type != VREG) {
9748 if (cs->vp->v_type == VDIR)
9749 status = NFS4ERR_ISDIR;
9750 else
9751 status = NFS4ERR_INVAL;
9752 goto out;
9753 }
9754
9755 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9756 status = NFS4ERR_GRACE;
9757 goto out;
9758 }
9759
9760 status = rfs4_do_lock(lsp, args->locktype,
9761 args->offset, args->length, cs->cr, resop);
9762
9763 out:
9764 *cs->statusp = resp->status = status;
9765
9766 if (status == NFS4_OK)
9767 resp->lock_stateid = lsp->rls_lockid.stateid;
9768
9769 rfs4_update_lock_resp(lsp, resop);
9770
9771 end:
9772 rfs4_sw_exit(&lsp->rls_sw);
9773 rfs4_lo_state_rele(lsp, TRUE);
9774
9775 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9776 LOCKU4res *, resp);
9777 }
9778
9779 /*
9780 * LOCKT is a best effort routine, the client can not be guaranteed that
9781 * the status return is still in effect by the time the reply is received.
9782 * They are numerous race conditions in this routine, but we are not required
9783 * and can not be accurate.
9784 */
9785 /*ARGSUSED*/
9786 void
rfs4_op_lockt(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9787 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9788 struct svc_req *req, struct compound_state *cs)
9789 {
9790 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9791 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9792 rfs4_lockowner_t *lo;
9793 rfs4_client_t *cp;
9794 bool_t create = FALSE;
9795 struct flock64 flk;
9796 int error;
9797 int flag = FREAD | FWRITE;
9798 int ltype;
9799 length4 posix_length;
9800 sysid_t sysid;
9801 pid_t pid;
9802
9803 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9804 LOCKT4args *, args);
9805
9806 if (cs->vp == NULL) {
9807 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9808 goto out;
9809 }
9810
9811 /*
9812 * NFS4 only allows locking on regular files, so
9813 * verify type of object.
9814 */
9815 if (cs->vp->v_type != VREG) {
9816 if (cs->vp->v_type == VDIR)
9817 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9818 else
9819 *cs->statusp = resp->status = NFS4ERR_INVAL;
9820 goto out;
9821 }
9822
9823 /*
9824 * Check out the clientid to ensure the server knows about it
9825 * so that we correctly inform the client of a server reboot.
9826 */
9827 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9828 == NULL) {
9829 *cs->statusp = resp->status =
9830 rfs4_check_clientid(&args->owner.clientid, 0);
9831 goto out;
9832 }
9833 if (rfs4_lease_expired(cp)) {
9834 rfs4_client_close(cp);
9835 /*
9836 * Protocol doesn't allow returning NFS4ERR_STALE as
9837 * other operations do on this check so STALE_CLIENTID
9838 * is returned instead
9839 */
9840 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9841 goto out;
9842 }
9843
9844 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9845 *cs->statusp = resp->status = NFS4ERR_GRACE;
9846 rfs4_client_rele(cp);
9847 goto out;
9848 }
9849 rfs4_client_rele(cp);
9850
9851 resp->status = NFS4_OK;
9852
9853 switch (args->locktype) {
9854 case READ_LT:
9855 case READW_LT:
9856 ltype = F_RDLCK;
9857 break;
9858 case WRITE_LT:
9859 case WRITEW_LT:
9860 ltype = F_WRLCK;
9861 break;
9862 }
9863
9864 posix_length = args->length;
9865 /* Check for zero length. To lock to end of file use all ones for V4 */
9866 if (posix_length == 0) {
9867 *cs->statusp = resp->status = NFS4ERR_INVAL;
9868 goto out;
9869 } else if (posix_length == (length4)(~0)) {
9870 posix_length = 0; /* Posix to end of file */
9871 }
9872
9873 /*
9874 * See RFC 8881 18.11.3:
9875 * The clientid field of the owner MAY be set to any value
9876 * by the client and MUST be ignored by the server.
9877 */
9878 if (rfs4_has_session(cs))
9879 args->owner.clientid = cs->client->rc_clientid;
9880
9881 /* Find or create a lockowner */
9882 lo = rfs4_findlockowner(&args->owner, &create);
9883
9884 if (lo) {
9885 pid = lo->rl_pid;
9886 if ((resp->status =
9887 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9888 goto err;
9889 } else {
9890 pid = 0;
9891 sysid = lockt_sysid;
9892 }
9893 retry:
9894 flk.l_type = ltype;
9895 flk.l_whence = 0; /* SEEK_SET */
9896 flk.l_start = args->offset;
9897 flk.l_len = posix_length;
9898 flk.l_sysid = sysid;
9899 flk.l_pid = pid;
9900 flag |= F_REMOTELOCK;
9901
9902 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9903
9904 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9905 if (flk.l_len < 0 || flk.l_start < 0) {
9906 resp->status = NFS4ERR_INVAL;
9907 goto err;
9908 }
9909 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9910 NULL, cs->cr, NULL);
9911
9912 /*
9913 * N.B. We map error values to nfsv4 errors. This is differrent
9914 * than puterrno4 routine.
9915 */
9916 switch (error) {
9917 case 0:
9918 if (flk.l_type == F_UNLCK)
9919 resp->status = NFS4_OK;
9920 else {
9921 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9922 goto retry;
9923 resp->status = NFS4ERR_DENIED;
9924 }
9925 break;
9926 case EOVERFLOW:
9927 resp->status = NFS4ERR_INVAL;
9928 break;
9929 case EINVAL:
9930 resp->status = NFS4ERR_NOTSUPP;
9931 break;
9932 default:
9933 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9934 error);
9935 resp->status = NFS4ERR_SERVERFAULT;
9936 break;
9937 }
9938
9939 err:
9940 if (lo)
9941 rfs4_lockowner_rele(lo);
9942 *cs->statusp = resp->status;
9943 out:
9944 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9945 LOCKT4res *, resp);
9946 }
9947
9948 int
rfs4_share(rfs4_state_t * sp,uint32_t access,uint32_t deny)9949 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9950 {
9951 int err;
9952 int cmd;
9953 vnode_t *vp;
9954 struct shrlock shr;
9955 struct shr_locowner shr_loco;
9956 int fflags = 0;
9957
9958 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9959 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9960
9961 if (sp->rs_closed)
9962 return (NFS4ERR_OLD_STATEID);
9963
9964 vp = sp->rs_finfo->rf_vp;
9965 ASSERT(vp);
9966
9967 shr.s_access = shr.s_deny = 0;
9968
9969 if (access & OPEN4_SHARE_ACCESS_READ) {
9970 fflags |= FREAD;
9971 shr.s_access |= F_RDACC;
9972 }
9973 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9974 fflags |= FWRITE;
9975 shr.s_access |= F_WRACC;
9976 }
9977 ASSERT(shr.s_access);
9978
9979 if (deny & OPEN4_SHARE_DENY_READ)
9980 shr.s_deny |= F_RDDNY;
9981 if (deny & OPEN4_SHARE_DENY_WRITE)
9982 shr.s_deny |= F_WRDNY;
9983
9984 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9985 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9986 shr_loco.sl_pid = shr.s_pid;
9987 shr_loco.sl_id = shr.s_sysid;
9988 shr.s_owner = (caddr_t)&shr_loco;
9989 shr.s_own_len = sizeof (shr_loco);
9990
9991 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9992
9993 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9994 if (err != 0) {
9995 if (err == EAGAIN)
9996 err = NFS4ERR_SHARE_DENIED;
9997 else
9998 err = puterrno4(err);
9999 return (err);
10000 }
10001
10002 sp->rs_share_access |= access;
10003 sp->rs_share_deny |= deny;
10004
10005 return (0);
10006 }
10007
10008 int
rfs4_unshare(rfs4_state_t * sp)10009 rfs4_unshare(rfs4_state_t *sp)
10010 {
10011 int err;
10012 struct shrlock shr;
10013 struct shr_locowner shr_loco;
10014
10015 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
10016
10017 if (sp->rs_closed || sp->rs_share_access == 0)
10018 return (0);
10019
10020 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
10021 ASSERT(sp->rs_finfo->rf_vp);
10022
10023 shr.s_access = shr.s_deny = 0;
10024 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
10025 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
10026 shr_loco.sl_pid = shr.s_pid;
10027 shr_loco.sl_id = shr.s_sysid;
10028 shr.s_owner = (caddr_t)&shr_loco;
10029 shr.s_own_len = sizeof (shr_loco);
10030
10031 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
10032 NULL);
10033 if (err != 0) {
10034 err = puterrno4(err);
10035 return (err);
10036 }
10037
10038 sp->rs_share_access = 0;
10039 sp->rs_share_deny = 0;
10040
10041 return (0);
10042
10043 }
10044
10045 static int
rdma_setup_read_data4(READ4args * args,READ4res * rok)10046 rdma_setup_read_data4(READ4args *args, READ4res *rok)
10047 {
10048 struct clist *wcl;
10049 count4 count = rok->data_len;
10050 int wlist_len;
10051
10052 wcl = args->wlist;
10053 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
10054 return (FALSE);
10055 }
10056 wcl = args->wlist;
10057 rok->wlist_len = wlist_len;
10058 rok->wlist = wcl;
10059 return (TRUE);
10060 }
10061
10062 /* tunable to disable server referrals */
10063 int rfs4_no_referrals = 0;
10064
10065 /*
10066 * Find an NFS record in reparse point data.
10067 * Returns 0 for success and <0 or an errno value on failure.
10068 */
10069 int
vn_find_nfs_record(vnode_t * vp,nvlist_t ** nvlp,char ** svcp,char ** datap)10070 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
10071 {
10072 int err;
10073 char *stype, *val;
10074 nvlist_t *nvl;
10075 nvpair_t *curr;
10076
10077 if ((nvl = reparse_init()) == NULL)
10078 return (-1);
10079
10080 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
10081 reparse_free(nvl);
10082 return (err);
10083 }
10084
10085 curr = NULL;
10086 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
10087 if ((stype = nvpair_name(curr)) == NULL) {
10088 reparse_free(nvl);
10089 return (-2);
10090 }
10091 if (strncasecmp(stype, "NFS", 3) == 0)
10092 break;
10093 }
10094
10095 if ((curr == NULL) ||
10096 (nvpair_value_string(curr, &val))) {
10097 reparse_free(nvl);
10098 return (-3);
10099 }
10100 *nvlp = nvl;
10101 *svcp = stype;
10102 *datap = val;
10103 return (0);
10104 }
10105
10106 int
vn_is_nfs_reparse(vnode_t * vp,cred_t * cr)10107 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
10108 {
10109 nvlist_t *nvl;
10110 char *s, *d;
10111
10112 if (rfs4_no_referrals != 0)
10113 return (B_FALSE);
10114
10115 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10116 return (B_FALSE);
10117
10118 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
10119 return (B_FALSE);
10120
10121 reparse_free(nvl);
10122
10123 return (B_TRUE);
10124 }
10125
10126 /*
10127 * There is a user-level copy of this routine in ref_subr.c.
10128 * Changes should be kept in sync.
10129 */
10130 static int
nfs4_create_components(char * path,component4 * comp4)10131 nfs4_create_components(char *path, component4 *comp4)
10132 {
10133 int slen, plen, ncomp;
10134 char *ori_path, *nxtc, buf[MAXNAMELEN];
10135
10136 if (path == NULL)
10137 return (0);
10138
10139 plen = strlen(path) + 1; /* include the terminator */
10140 ori_path = path;
10141 ncomp = 0;
10142
10143 /* count number of components in the path */
10144 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
10145 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
10146 if ((slen = nxtc - path) == 0) {
10147 path = nxtc + 1;
10148 continue;
10149 }
10150
10151 if (comp4 != NULL) {
10152 bcopy(path, buf, slen);
10153 buf[slen] = '\0';
10154 (void) str_to_utf8(buf, &comp4[ncomp]);
10155 }
10156
10157 ncomp++; /* 1 valid component */
10158 path = nxtc + 1;
10159 }
10160 if (*nxtc == '\0' || *nxtc == '\n')
10161 break;
10162 }
10163
10164 return (ncomp);
10165 }
10166
10167 /*
10168 * There is a user-level copy of this routine in ref_subr.c.
10169 * Changes should be kept in sync.
10170 */
10171 static int
make_pathname4(char * path,pathname4 * pathname)10172 make_pathname4(char *path, pathname4 *pathname)
10173 {
10174 int ncomp;
10175 component4 *comp4;
10176
10177 if (pathname == NULL)
10178 return (0);
10179
10180 if (path == NULL) {
10181 pathname->pathname4_val = NULL;
10182 pathname->pathname4_len = 0;
10183 return (0);
10184 }
10185
10186 /* count number of components to alloc buffer */
10187 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10188 pathname->pathname4_val = NULL;
10189 pathname->pathname4_len = 0;
10190 return (0);
10191 }
10192 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10193
10194 /* copy components into allocated buffer */
10195 ncomp = nfs4_create_components(path, comp4);
10196
10197 pathname->pathname4_val = comp4;
10198 pathname->pathname4_len = ncomp;
10199
10200 return (ncomp);
10201 }
10202
10203 #define xdr_fs_locations4 xdr_fattr4_fs_locations
10204
10205 fs_locations4 *
fetch_referral(vnode_t * vp,cred_t * cr)10206 fetch_referral(vnode_t *vp, cred_t *cr)
10207 {
10208 nvlist_t *nvl;
10209 char *stype, *sdata;
10210 fs_locations4 *result;
10211 char buf[1024];
10212 size_t bufsize;
10213 XDR xdr;
10214 int err;
10215
10216 /*
10217 * Check attrs to ensure it's a reparse point
10218 */
10219 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10220 return (NULL);
10221
10222 /*
10223 * Look for an NFS record and get the type and data
10224 */
10225 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10226 return (NULL);
10227
10228 /*
10229 * With the type and data, upcall to get the referral
10230 */
10231 bufsize = sizeof (buf);
10232 bzero(buf, sizeof (buf));
10233 err = reparse_kderef((const char *)stype, (const char *)sdata,
10234 buf, &bufsize);
10235 reparse_free(nvl);
10236
10237 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10238 char *, stype, char *, sdata, char *, buf, int, err);
10239 if (err) {
10240 cmn_err(CE_NOTE,
10241 "reparsed daemon not running: unable to get referral (%d)",
10242 err);
10243 return (NULL);
10244 }
10245
10246 /*
10247 * We get an XDR'ed record back from the kderef call
10248 */
10249 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10250 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10251 err = xdr_fs_locations4(&xdr, result);
10252 XDR_DESTROY(&xdr);
10253 if (err != TRUE) {
10254 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10255 int, err);
10256 return (NULL);
10257 }
10258
10259 /*
10260 * Look at path to recover fs_root, ignoring the leading '/'
10261 */
10262 (void) make_pathname4(vp->v_path, &result->fs_root);
10263
10264 return (result);
10265 }
10266
10267 char *
build_symlink(vnode_t * vp,cred_t * cr,size_t * strsz)10268 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10269 {
10270 fs_locations4 *fsl;
10271 fs_location4 *fs;
10272 char *server, *path, *symbuf;
10273 static char *prefix = "/net/";
10274 int i, size, npaths;
10275 uint_t len;
10276
10277 /* Get the referral */
10278 if ((fsl = fetch_referral(vp, cr)) == NULL)
10279 return (NULL);
10280
10281 /* Deal with only the first location and first server */
10282 fs = &fsl->locations_val[0];
10283 server = utf8_to_str(&fs->server_val[0], &len, NULL);
10284 if (server == NULL) {
10285 rfs4_free_fs_locations4(fsl);
10286 kmem_free(fsl, sizeof (fs_locations4));
10287 return (NULL);
10288 }
10289
10290 /* Figure out size for "/net/" + host + /path/path/path + NULL */
10291 size = strlen(prefix) + len;
10292 for (i = 0; i < fs->rootpath.pathname4_len; i++)
10293 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10294
10295 /* Allocate the symlink buffer and fill it */
10296 symbuf = kmem_zalloc(size, KM_SLEEP);
10297 (void) strcat(symbuf, prefix);
10298 (void) strcat(symbuf, server);
10299 kmem_free(server, len);
10300
10301 npaths = 0;
10302 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10303 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10304 if (path == NULL)
10305 continue;
10306 (void) strcat(symbuf, "/");
10307 (void) strcat(symbuf, path);
10308 npaths++;
10309 kmem_free(path, len);
10310 }
10311
10312 rfs4_free_fs_locations4(fsl);
10313 kmem_free(fsl, sizeof (fs_locations4));
10314
10315 if (strsz != NULL)
10316 *strsz = size;
10317 return (symbuf);
10318 }
10319
10320 /*
10321 * Check to see if we have a downrev Solaris client, so that we
10322 * can send it a symlink instead of a referral.
10323 */
10324 int
client_is_downrev(struct svc_req * req)10325 client_is_downrev(struct svc_req *req)
10326 {
10327 struct sockaddr *ca;
10328 rfs4_clntip_t *ci;
10329 bool_t create = FALSE;
10330 int is_downrev;
10331
10332 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10333 ASSERT(ca);
10334 ci = rfs4_find_clntip(ca, &create);
10335 if (ci == NULL)
10336 return (0);
10337 is_downrev = ci->ri_no_referrals;
10338 rfs4_dbe_rele(ci->ri_dbe);
10339 return (is_downrev);
10340 }
10341
10342 /*
10343 * Do the main work of handling HA-NFSv4 Resource Group failover on
10344 * Sun Cluster.
10345 * We need to detect whether any RG admin paths have been added or removed,
10346 * and adjust resources accordingly.
10347 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10348 * order to scale, the list and array of paths need to be held in more
10349 * suitable data structures.
10350 */
10351 static void
hanfsv4_failover(nfs4_srv_t * nsrv4)10352 hanfsv4_failover(nfs4_srv_t *nsrv4)
10353 {
10354 int i, start_grace, numadded_paths = 0;
10355 char **added_paths = NULL;
10356 rfs4_dss_path_t *dss_path;
10357
10358 /*
10359 * Note: currently, dss_pathlist cannot be NULL, since
10360 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10361 * make the latter dynamically specified too, the following will
10362 * need to be adjusted.
10363 */
10364
10365 /*
10366 * First, look for removed paths: RGs that have been failed-over
10367 * away from this node.
10368 * Walk the "currently-serving" dss_pathlist and, for each
10369 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10370 * from nfsd. If not, that RG path has been removed.
10371 *
10372 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10373 * any duplicates.
10374 */
10375 dss_path = nsrv4->dss_pathlist;
10376 do {
10377 int found = 0;
10378 char *path = dss_path->path;
10379
10380 /* used only for non-HA so may not be removed */
10381 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10382 dss_path = dss_path->next;
10383 continue;
10384 }
10385
10386 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10387 int cmpret;
10388 char *newpath = rfs4_dss_newpaths[i];
10389
10390 /*
10391 * Since nfsd has sorted rfs4_dss_newpaths for us,
10392 * once the return from strcmp is negative we know
10393 * we've passed the point where "path" should be,
10394 * and can stop searching: "path" has been removed.
10395 */
10396 cmpret = strcmp(path, newpath);
10397 if (cmpret < 0)
10398 break;
10399 if (cmpret == 0) {
10400 found = 1;
10401 break;
10402 }
10403 }
10404
10405 if (found == 0) {
10406 unsigned index = dss_path->index;
10407 rfs4_servinst_t *sip = dss_path->sip;
10408 rfs4_dss_path_t *path_next = dss_path->next;
10409
10410 /*
10411 * This path has been removed.
10412 * We must clear out the servinst reference to
10413 * it, since it's now owned by another
10414 * node: we should not attempt to touch it.
10415 */
10416 ASSERT(dss_path == sip->dss_paths[index]);
10417 sip->dss_paths[index] = NULL;
10418
10419 /* remove from "currently-serving" list, and destroy */
10420 remque(dss_path);
10421 /* allow for NUL */
10422 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10423 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10424
10425 dss_path = path_next;
10426 } else {
10427 /* path was found; not removed */
10428 dss_path = dss_path->next;
10429 }
10430 } while (dss_path != nsrv4->dss_pathlist);
10431
10432 /*
10433 * Now, look for added paths: RGs that have been failed-over
10434 * to this node.
10435 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10436 * for each path, check if it is on the "currently-serving"
10437 * dss_pathlist. If not, that RG path has been added.
10438 *
10439 * Note: we don't do duplicate detection here; nfsd does that for us.
10440 *
10441 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10442 * an upper bound for the size needed for added_paths[numadded_paths].
10443 */
10444
10445 /* probably more space than we need, but guaranteed to be enough */
10446 if (rfs4_dss_numnewpaths > 0) {
10447 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10448 added_paths = kmem_zalloc(sz, KM_SLEEP);
10449 }
10450
10451 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10452 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10453 int found = 0;
10454 char *newpath = rfs4_dss_newpaths[i];
10455
10456 dss_path = nsrv4->dss_pathlist;
10457 do {
10458 char *path = dss_path->path;
10459
10460 /* used only for non-HA */
10461 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10462 dss_path = dss_path->next;
10463 continue;
10464 }
10465
10466 if (strncmp(path, newpath, strlen(path)) == 0) {
10467 found = 1;
10468 break;
10469 }
10470
10471 dss_path = dss_path->next;
10472 } while (dss_path != nsrv4->dss_pathlist);
10473
10474 if (found == 0) {
10475 added_paths[numadded_paths] = newpath;
10476 numadded_paths++;
10477 }
10478 }
10479
10480 /* did we find any added paths? */
10481 if (numadded_paths > 0) {
10482
10483 /* create a new server instance, and start its grace period */
10484 start_grace = 1;
10485 /* CSTYLED */
10486 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10487
10488 /* read in the stable storage state from these paths */
10489 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10490
10491 /*
10492 * Multiple failovers during a grace period will cause
10493 * clients of the same resource group to be partitioned
10494 * into different server instances, with different
10495 * grace periods. Since clients of the same resource
10496 * group must be subject to the same grace period,
10497 * we need to reset all currently active grace periods.
10498 */
10499 rfs4_grace_reset_all(nsrv4);
10500 }
10501
10502 if (rfs4_dss_numnewpaths > 0)
10503 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10504 }
10505