1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 /* 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 33 * Copyright 2019 Nexenta Systems, Inc. 34 * Copyright 2019 Nexenta by DDN, Inc. 35 * Copyright 2021-2025 Racktop Systems, Inc. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/cred.h> 42 #include <sys/buf.h> 43 #include <sys/vfs.h> 44 #include <sys/vfs_opreg.h> 45 #include <sys/vnode.h> 46 #include <sys/uio.h> 47 #include <sys/errno.h> 48 #include <sys/sysmacros.h> 49 #include <sys/statvfs.h> 50 #include <sys/kmem.h> 51 #include <sys/dirent.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/systeminfo.h> 55 #include <sys/flock.h> 56 #include <sys/pathname.h> 57 #include <sys/nbmlock.h> 58 #include <sys/share.h> 59 #include <sys/atomic.h> 60 #include <sys/policy.h> 61 #include <sys/fem.h> 62 #include <sys/sdt.h> 63 #include <sys/ddi.h> 64 #include <sys/zone.h> 65 66 #include <fs/fs_reparse.h> 67 68 #include <rpc/types.h> 69 #include <rpc/auth.h> 70 #include <rpc/rpcsec_gss.h> 71 #include <rpc/svc.h> 72 73 #include <nfs/nfs.h> 74 #include <nfs/nfssys.h> 75 #include <nfs/export.h> 76 #include <nfs/nfs_cmd.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_drc.h> 80 81 #include <sys/strsubr.h> 82 #include <sys/strsun.h> 83 84 #include <inet/common.h> 85 #include <inet/ip.h> 86 #include <inet/ip6.h> 87 88 #include <sys/tsol/label.h> 89 #include <sys/tsol/tndb.h> 90 91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 95 extern struct svc_ops rdma_svc_ops; 96 extern int nfs_loaned_buffers; 97 #define RFS4_LOOKUP_EXP_STATE_MAX 8 /* Limit of loop to clean expired states */ 98 static int rfs4_lookup_exp_state_max = RFS4_LOOKUP_EXP_STATE_MAX; 99 /* End of Tunables */ 100 101 static int rdma_setup_read_data4(READ4args *, READ4res *); 102 103 /* 104 * Used to bump the stateid4.seqid value and show changes in the stateid 105 */ 106 #define next_stateid(sp) (++(sp)->bits.chgseq) 107 108 /* 109 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 110 * This is used to return NFS4ERR_TOOSMALL when clients specify 111 * maxcount that isn't large enough to hold the smallest possible 112 * XDR encoded dirent. 113 * 114 * sizeof cookie (8 bytes) + 115 * sizeof name_len (4 bytes) + 116 * sizeof smallest (padded) name (4 bytes) + 117 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 118 * sizeof attrlist4_len (4 bytes) + 119 * sizeof next boolean (4 bytes) 120 * 121 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 122 * the smallest possible entry4 (assumes no attrs requested). 123 * sizeof nfsstat4 (4 bytes) + 124 * sizeof verifier4 (8 bytes) + 125 * sizeof entry4list bool (4 bytes) + 126 * sizeof entry4 (36 bytes) + 127 * sizeof eof bool (4 bytes) 128 * 129 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 130 * VOP_READDIR. Its value is the size of the maximum possible dirent 131 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 132 * required for a given name length. MAXNAMELEN is the maximum 133 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 134 * macros are to allow for . and .. entries -- just a minor tweak to try 135 * and guarantee that buffer we give to VOP_READDIR will be large enough 136 * to hold ., .., and the largest possible solaris dirent64. 137 */ 138 #define RFS4_MINLEN_ENTRY4 36 139 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 140 #define RFS4_MINLEN_RDDIR_BUF \ 141 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 142 143 /* 144 * It would be better to pad to 4 bytes since that's what XDR would do, 145 * but the dirents UFS gives us are already padded to 8, so just take 146 * what we're given. Dircount is only a hint anyway. Currently the 147 * solaris kernel is ASCII only, so there's no point in calling the 148 * UTF8 functions. 149 * 150 * dirent64: named padded to provide 8 byte struct alignment 151 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 152 * 153 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 154 * 155 */ 156 #define DIRENT64_TO_DIRCOUNT(dp) \ 157 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 158 159 160 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 161 162 u_longlong_t nfs4_srv_caller_id; 163 uint_t nfs4_srv_vkey = 0; 164 165 void rfs4_init_compound_state(struct compound_state *); 166 167 static void nullfree(caddr_t); 168 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 169 struct compound_state *); 170 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 171 struct compound_state *); 172 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 173 struct compound_state *); 174 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 175 struct compound_state *); 176 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 177 struct compound_state *); 178 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 179 struct svc_req *, struct compound_state *); 180 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, 181 struct svc_req *, struct compound_state *); 182 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 183 struct compound_state *); 184 static void rfs4_op_getattr_free(nfs_resop4 *); 185 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 186 struct compound_state *); 187 static void rfs4_op_getfh_free(nfs_resop4 *); 188 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 189 struct compound_state *); 190 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 struct compound_state *); 192 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 struct compound_state *); 194 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 195 struct compound_state *); 196 static void lock_denied_free(nfs_resop4 *); 197 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 198 struct compound_state *); 199 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 200 struct compound_state *); 201 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 202 struct compound_state *); 203 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 204 struct compound_state *); 205 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 206 struct svc_req *req, struct compound_state *cs); 207 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 208 struct compound_state *); 209 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 210 struct compound_state *); 211 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, 212 struct svc_req *, struct compound_state *); 213 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 214 struct svc_req *, struct compound_state *); 215 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 216 struct compound_state *); 217 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 218 struct compound_state *); 219 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 220 struct compound_state *); 221 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 222 struct compound_state *); 223 static void rfs4_op_read_free(nfs_resop4 *); 224 static void rfs4_op_readdir_free(nfs_resop4 *resop); 225 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 226 struct compound_state *); 227 static void rfs4_op_readlink_free(nfs_resop4 *); 228 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 229 struct svc_req *, struct compound_state *); 230 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 231 struct compound_state *); 232 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 233 struct compound_state *); 234 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 235 struct compound_state *); 236 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 237 struct compound_state *); 238 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 239 struct compound_state *); 240 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 241 struct compound_state *); 242 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 243 struct compound_state *); 244 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 245 struct compound_state *); 246 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, 247 struct svc_req *, struct compound_state *); 248 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, 249 struct svc_req *req, struct compound_state *); 250 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 251 struct compound_state *); 252 static void rfs4_op_secinfo_free(nfs_resop4 *); 253 254 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop, 255 struct svc_req *req, struct compound_state *cs); 256 void rfs4x_exchange_id_free(nfs_resop4 *); 257 258 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop, 259 struct svc_req *req, struct compound_state *cs); 260 261 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop, 262 struct svc_req *req, compound_state_t *cs); 263 264 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop, 265 struct svc_req *req, struct compound_state *cs); 266 267 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop, 268 struct svc_req *req, compound_state_t *cs); 269 270 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop, 271 struct svc_req *req, compound_state_t *cs); 272 273 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop, 274 struct svc_req *req, compound_state_t *cs); 275 276 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop, 277 struct svc_req *req, compound_state_t *cs); 278 void rfs4x_op_free_stateid(nfs_argop4 *argop, nfs_resop4 *resop, 279 struct svc_req *req, compound_state_t *cs); 280 281 static nfsstat4 check_open_access(uint32_t, struct compound_state *, 282 struct svc_req *); 283 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 284 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *); 285 286 /* 287 * translation table for attrs 288 */ 289 struct nfs4_ntov_table { 290 union nfs4_attr_u *na; 291 uint8_t amap[NFS4_MAXNUM_ATTRS]; 292 int attrcnt; 293 bool_t vfsstat; 294 }; 295 296 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); 297 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 298 struct nfs4_svgetit_arg *sargp); 299 300 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, 301 struct compound_state *cs, struct nfs4_svgetit_arg *sargp, 302 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); 303 304 static void hanfsv4_failover(nfs4_srv_t *); 305 306 fem_t *deleg_rdops; 307 fem_t *deleg_wrops; 308 309 /* 310 * NFS4 op dispatch table 311 */ 312 313 struct rfsv4disp { 314 void (*dis_proc)(); /* proc to call */ 315 void (*dis_resfree)(); /* frees space allocated by proc */ 316 int dis_flags; /* OP_IDEMPOTENT, etc... */ 317 }; 318 319 #define OP_IDEMPOTENT (1 << 0) 320 #define OP_CLEAR_STATEID (1 << 1) 321 322 static struct rfsv4disp rfsv4disptab[] = { 323 /* 324 * NFS VERSION 4 325 */ 326 327 /* RFS_NULL = 0 */ 328 {rfs4_op_illegal, nullfree, 0}, 329 330 /* UNUSED = 1 */ 331 {rfs4_op_illegal, nullfree, 0}, 332 333 /* UNUSED = 2 */ 334 {rfs4_op_illegal, nullfree, 0}, 335 336 /* OP_ACCESS = 3 */ 337 {rfs4_op_access, nullfree, OP_IDEMPOTENT}, 338 339 /* OP_CLOSE = 4 */ 340 {rfs4_op_close, nullfree, OP_CLEAR_STATEID}, 341 342 /* OP_COMMIT = 5 */ 343 {rfs4_op_commit, nullfree, OP_IDEMPOTENT}, 344 345 /* OP_CREATE = 6 */ 346 {rfs4_op_create, nullfree, OP_CLEAR_STATEID}, 347 348 /* OP_DELEGPURGE = 7 */ 349 {rfs4_op_delegpurge, nullfree, 0}, 350 351 /* OP_DELEGRETURN = 8 */ 352 {rfs4_op_delegreturn, nullfree, 0}, 353 354 /* OP_GETATTR = 9 */ 355 {rfs4_op_getattr, rfs4_op_getattr_free, OP_IDEMPOTENT}, 356 357 /* OP_GETFH = 10 */ 358 {rfs4_op_getfh, rfs4_op_getfh_free, OP_IDEMPOTENT}, 359 360 /* OP_LINK = 11 */ 361 {rfs4_op_link, nullfree, 0}, 362 363 /* OP_LOCK = 12 */ 364 {rfs4_op_lock, lock_denied_free, 0}, 365 366 /* OP_LOCKT = 13 */ 367 {rfs4_op_lockt, lock_denied_free, 0}, 368 369 /* OP_LOCKU = 14 */ 370 {rfs4_op_locku, nullfree, 0}, 371 372 /* OP_LOOKUP = 15 */ 373 {rfs4_op_lookup, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 374 375 /* OP_LOOKUPP = 16 */ 376 {rfs4_op_lookupp, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 377 378 /* OP_NVERIFY = 17 */ 379 {rfs4_op_nverify, nullfree, OP_IDEMPOTENT}, 380 381 /* OP_OPEN = 18 */ 382 {rfs4_op_open, rfs4_free_reply, 0}, 383 384 /* OP_OPENATTR = 19 */ 385 {rfs4_op_openattr, nullfree, 0}, 386 387 /* OP_OPEN_CONFIRM = 20 */ 388 {rfs4_op_open_confirm, nullfree, 0}, 389 390 /* OP_OPEN_DOWNGRADE = 21 */ 391 {rfs4_op_open_downgrade, nullfree, 0}, 392 393 /* OP_OPEN_PUTFH = 22 */ 394 {rfs4_op_putfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 395 396 /* OP_PUTPUBFH = 23 */ 397 {rfs4_op_putpubfh, nullfree, OP_IDEMPOTENT}, 398 399 /* OP_PUTROOTFH = 24 */ 400 {rfs4_op_putrootfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 401 402 /* OP_READ = 25 */ 403 {rfs4_op_read, rfs4_op_read_free, OP_IDEMPOTENT}, 404 405 /* OP_READDIR = 26 */ 406 {rfs4_op_readdir, rfs4_op_readdir_free, OP_IDEMPOTENT}, 407 408 /* OP_READLINK = 27 */ 409 {rfs4_op_readlink, rfs4_op_readlink_free, OP_IDEMPOTENT}, 410 411 /* OP_REMOVE = 28 */ 412 {rfs4_op_remove, nullfree, 0}, 413 414 /* OP_RENAME = 29 */ 415 {rfs4_op_rename, nullfree, 0}, 416 417 /* OP_RENEW = 30 */ 418 {rfs4_op_renew, nullfree, 0}, 419 420 /* OP_RESTOREFH = 31 */ 421 {rfs4_op_restorefh, nullfree, OP_IDEMPOTENT}, 422 423 /* OP_SAVEFH = 32 */ 424 {rfs4_op_savefh, nullfree, OP_IDEMPOTENT}, 425 426 /* OP_SECINFO = 33 */ 427 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0}, 428 429 /* OP_SETATTR = 34 */ 430 {rfs4_op_setattr, nullfree, 0}, 431 432 /* OP_SETCLIENTID = 35 */ 433 {rfs4_op_setclientid, nullfree, 0}, 434 435 /* OP_SETCLIENTID_CONFIRM = 36 */ 436 {rfs4_op_setclientid_confirm, nullfree, 0}, 437 438 /* OP_VERIFY = 37 */ 439 {rfs4_op_verify, nullfree, OP_IDEMPOTENT}, 440 441 /* OP_WRITE = 38 */ 442 {rfs4_op_write, nullfree, 0}, 443 444 /* OP_RELEASE_LOCKOWNER = 39 */ 445 {rfs4_op_release_lockowner, nullfree, 0}, 446 447 /* 448 * NFSv4.1 operations 449 */ 450 451 /* OP_BACKCHANNEL_CTL = 40 */ 452 {rfs4_op_notsup, nullfree, 0}, 453 454 /* OP_BIND_CONN_TO_SESSION = 41 */ 455 {rfs4x_op_bind_conn_to_session, nullfree, 0}, 456 457 /* OP_EXCHANGE_ID = 42 */ 458 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0}, 459 460 /* OP_CREATE_SESSION = 43 */ 461 {rfs4x_op_create_session, nullfree, 0}, 462 463 /* OP_DESTROY_SESSION = 44 */ 464 {rfs4x_op_destroy_session, nullfree, 0}, 465 466 /* OP_FREE_STATEID = 45 */ 467 {rfs4x_op_free_stateid, nullfree, 0}, 468 469 /* OP_GET_DIR_DELEGATION = 46 */ 470 {rfs4_op_notsup, nullfree, 0}, 471 472 /* OP_GETDEVICEINFO = 47 */ 473 {rfs4_op_notsup, nullfree, 0}, 474 475 /* OP_GETDEVICELIST = 48 */ 476 {rfs4_op_notsup, nullfree, 0}, 477 478 /* OP_LAYOUTCOMMIT = 49 */ 479 {rfs4_op_notsup, nullfree, 0}, 480 481 /* OP_LAYOUTGET = 50 */ 482 {rfs4_op_notsup, nullfree, 0}, 483 484 /* OP_LAYOUTRETURN = 51 */ 485 {rfs4_op_notsup, nullfree, 0}, 486 487 /* OP_SECINFO_NO_NAME = 52 */ 488 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0}, 489 490 /* OP_SEQUENCE = 53 */ 491 {rfs4x_op_sequence, nullfree, 0}, 492 493 /* OP_SET_SSV = 54 */ 494 {rfs4_op_notsup, nullfree, 0}, 495 496 /* OP_TEST_STATEID = 55 */ 497 {rfs4_op_notsup, nullfree, 0}, 498 499 /* OP_WANT_DELEGATION = 56 */ 500 {rfs4_op_notsup, nullfree, 0}, 501 502 /* OP_DESTROY_CLIENTID = 57 */ 503 {rfs4x_op_destroy_clientid, nullfree, 0}, 504 505 /* OP_RECLAIM_COMPLETE = 58 */ 506 {rfs4x_op_reclaim_complete, nullfree, 0}, 507 508 /* 509 * NFSv4.2 operations 510 */ 511 /* OP_ALLOCATE = 59 */ 512 {rfs4_op_notsup, nullfree, 0}, 513 514 /* OP_COPY = 60 */ 515 {rfs4_op_notsup, nullfree, 0}, 516 517 /* OP_COPY_NOTIFY = 61 */ 518 {rfs4_op_notsup, nullfree, 0}, 519 520 /* OP_DEALLOCATE = 62 */ 521 {rfs4_op_notsup, nullfree, 0}, 522 523 /* OP_IO_ADVISE = 63 */ 524 {rfs4_op_notsup, nullfree, 0}, 525 526 /* OP_LAYOUTERROR = 64 */ 527 {rfs4_op_notsup, nullfree, 0}, 528 529 /* OP_LAYOUTSTATS = 65 */ 530 {rfs4_op_notsup, nullfree, 0}, 531 532 /* OP_OFFLOAD_CANCEL = 66 */ 533 {rfs4_op_notsup, nullfree, 0}, 534 535 /* OP_OFFLOAD_STATUS = 67 */ 536 {rfs4_op_notsup, nullfree, 0}, 537 538 /* OP_READ_PLUS = 68 */ 539 {rfs4_op_notsup, nullfree, 0}, 540 541 /* OP_SEEK = 69 */ 542 {rfs4_op_notsup, nullfree, 0}, 543 544 /* OP_WRITE_SAME = 70 */ 545 {rfs4_op_notsup, nullfree, 0}, 546 547 /* OP_CLONE = 71 */ 548 {rfs4_op_notsup, nullfree, 0}, 549 550 }; 551 552 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]); 553 554 #define OP_ILLEGAL_IDX (rfsv4disp_cnt) 555 556 #ifdef DEBUG 557 558 int rfs4_fillone_debug = 0; 559 int rfs4_no_stub_access = 1; 560 int rfs4_rddir_debug = 0; 561 562 static char *rfs4_op_string[] = { 563 "rfs4_op_null", 564 "rfs4_op_1 unused", 565 "rfs4_op_2 unused", 566 "rfs4_op_access", 567 "rfs4_op_close", 568 "rfs4_op_commit", 569 "rfs4_op_create", 570 "rfs4_op_delegpurge", 571 "rfs4_op_delegreturn", 572 "rfs4_op_getattr", 573 "rfs4_op_getfh", 574 "rfs4_op_link", 575 "rfs4_op_lock", 576 "rfs4_op_lockt", 577 "rfs4_op_locku", 578 "rfs4_op_lookup", 579 "rfs4_op_lookupp", 580 "rfs4_op_nverify", 581 "rfs4_op_open", 582 "rfs4_op_openattr", 583 "rfs4_op_open_confirm", 584 "rfs4_op_open_downgrade", 585 "rfs4_op_putfh", 586 "rfs4_op_putpubfh", 587 "rfs4_op_putrootfh", 588 "rfs4_op_read", 589 "rfs4_op_readdir", 590 "rfs4_op_readlink", 591 "rfs4_op_remove", 592 "rfs4_op_rename", 593 "rfs4_op_renew", 594 "rfs4_op_restorefh", 595 "rfs4_op_savefh", 596 "rfs4_op_secinfo", 597 "rfs4_op_setattr", 598 "rfs4_op_setclientid", 599 "rfs4_op_setclient_confirm", 600 "rfs4_op_verify", 601 "rfs4_op_write", 602 "rfs4_op_release_lockowner", 603 /* NFSv4.1 */ 604 "backchannel_ctl", 605 "bind_conn_to_session", 606 "exchange_id", 607 "create_session", 608 "destroy_session", 609 "free_stateid", 610 "get_dir_delegation", 611 "getdeviceinfo", 612 "getdevicelist", 613 "layoutcommit", 614 "layoutget", 615 "layoutreturn", 616 "secinfo_no_name", 617 "sequence", 618 "set_ssv", 619 "test_stateid", 620 "want_delegation", 621 "destroy_clientid", 622 "reclaim_complete", 623 /* NFSv4.2 */ 624 "allocate", 625 "copy", 626 "copy_notify", 627 "deallocate", 628 "io_advise", 629 "layouterror", 630 "layoutstats", 631 "offload_cancel", 632 "offload_status", 633 "read_plus", 634 "seek", 635 "write_same", 636 "clone", 637 638 "rfs4_op_illegal" 639 }; 640 641 #endif 642 643 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *); 644 645 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 646 647 extern void rfs4_free_fs_locations4(fs_locations4 *); 648 649 #ifdef nextdp 650 #undef nextdp 651 #endif 652 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 653 654 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = { 655 VOPNAME_OPEN, { .femop_open = deleg_rd_open }, 656 VOPNAME_WRITE, { .femop_write = deleg_rd_write }, 657 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr }, 658 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock }, 659 VOPNAME_SPACE, { .femop_space = deleg_rd_space }, 660 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr }, 661 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent }, 662 NULL, NULL 663 }; 664 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { 665 VOPNAME_OPEN, { .femop_open = deleg_wr_open }, 666 VOPNAME_READ, { .femop_read = deleg_wr_read }, 667 VOPNAME_WRITE, { .femop_write = deleg_wr_write }, 668 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr }, 669 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock }, 670 VOPNAME_SPACE, { .femop_space = deleg_wr_space }, 671 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr }, 672 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent }, 673 NULL, NULL 674 }; 675 676 677 nfs4_srv_t * 678 nfs4_get_srv(void) 679 { 680 nfs_globals_t *ng = nfs_srv_getzg(); 681 nfs4_srv_t *srv = ng->nfs4_srv; 682 ASSERT(srv != NULL); 683 return (srv); 684 } 685 686 void 687 rfs4_srv_zone_init(nfs_globals_t *ng) 688 { 689 nfs4_srv_t *nsrv4; 690 timespec32_t verf; 691 692 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP); 693 694 /* 695 * The following algorithm attempts to find a unique verifier 696 * to be used as the write verifier returned from the server 697 * to the client. It is important that this verifier change 698 * whenever the server reboots. Of secondary importance, it 699 * is important for the verifier to be unique between two 700 * different servers. 701 * 702 * Thus, an attempt is made to use the system hostid and the 703 * current time in seconds when the nfssrv kernel module is 704 * loaded. It is assumed that an NFS server will not be able 705 * to boot and then to reboot in less than a second. If the 706 * hostid has not been set, then the current high resolution 707 * time is used. This will ensure different verifiers each 708 * time the server reboots and minimize the chances that two 709 * different servers will have the same verifier. 710 * XXX - this is broken on LP64 kernels. 711 */ 712 verf.tv_sec = (time_t)zone_get_hostid(NULL); 713 if (verf.tv_sec != 0) { 714 verf.tv_nsec = gethrestime_sec(); 715 } else { 716 timespec_t tverf; 717 718 gethrestime(&tverf); 719 verf.tv_sec = (time_t)tverf.tv_sec; 720 verf.tv_nsec = tverf.tv_nsec; 721 } 722 nsrv4->write4verf = *(uint64_t *)&verf; 723 724 /* Used to manage create/destroy of server state */ 725 nsrv4->nfs4_server_state = NULL; 726 nsrv4->nfs4_cur_servinst = NULL; 727 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE; 728 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL); 729 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL); 730 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL); 731 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL); 732 733 ng->nfs4_srv = nsrv4; 734 } 735 736 void 737 rfs4_srv_zone_fini(nfs_globals_t *ng) 738 { 739 nfs4_srv_t *nsrv4 = ng->nfs4_srv; 740 741 ng->nfs4_srv = NULL; 742 743 mutex_destroy(&nsrv4->deleg_lock); 744 mutex_destroy(&nsrv4->state_lock); 745 mutex_destroy(&nsrv4->servinst_lock); 746 rw_destroy(&nsrv4->deleg_policy_lock); 747 748 kmem_free(nsrv4, sizeof (*nsrv4)); 749 } 750 751 void 752 rfs4_srvrinit(void) 753 { 754 extern void rfs4_attr_init(); 755 756 rfs4_attr_init(); 757 758 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) { 759 rfs4_disable_delegation(); 760 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, 761 &deleg_wrops) != 0) { 762 rfs4_disable_delegation(); 763 fem_free(deleg_rdops); 764 } 765 766 nfs4_srv_caller_id = fs_new_caller_id(); 767 lockt_sysid = lm_alloc_sysidt(); 768 vsd_create(&nfs4_srv_vkey, NULL); 769 rfs4_state_g_init(); 770 } 771 772 void 773 rfs4_srvrfini(void) 774 { 775 if (lockt_sysid != LM_NOSYSID) { 776 lm_free_sysidt(lockt_sysid); 777 lockt_sysid = LM_NOSYSID; 778 } 779 780 rfs4_state_g_fini(); 781 782 fem_free(deleg_rdops); 783 fem_free(deleg_wrops); 784 } 785 786 void 787 rfs4_do_server_start(int server_upordown, int srv_delegation, 788 nfs4_minor_t nfs4_minor_max, int cluster_booted) 789 { 790 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 791 792 /* Is this a warm start? */ 793 if (server_upordown == NFS_SERVER_QUIESCED) { 794 cmn_err(CE_NOTE, "nfs4_srv: " 795 "server was previously quiesced; " 796 "existing NFSv4 state will be re-used"); 797 798 /* 799 * HA-NFSv4: this is also the signal 800 * that a Resource Group failover has 801 * occurred. 802 */ 803 if (cluster_booted) 804 hanfsv4_failover(nsrv4); 805 } else { 806 /* Cold start */ 807 nsrv4->rfs4_start_time = 0; 808 rfs4_state_zone_init(nsrv4); 809 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max, 810 nfs4_drc_hash); 811 812 /* 813 * The nfsd service was started with the -s option 814 * we need to pull in any state from the paths indicated. 815 */ 816 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { 817 /* read in the stable storage state from these paths */ 818 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths, 819 rfs4_dss_newpaths); 820 } 821 } 822 823 nsrv4->nfs4_minor_max = nfs4_minor_max; 824 825 /* Check if delegation is to be enabled */ 826 if (srv_delegation != FALSE) 827 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE); 828 } 829 830 void 831 rfs4_init_compound_state(struct compound_state *cs) 832 { 833 bzero(cs, sizeof (*cs)); 834 cs->cont = TRUE; 835 cs->access = CS_ACCESS_DENIED; 836 cs->deleg = FALSE; 837 cs->mandlock = FALSE; 838 cs->fh.nfs_fh4_val = cs->fhbuf; 839 } 840 841 /* Do cleanup of the compound_state */ 842 void 843 rfs4_fini_compound_state(struct compound_state *cs) 844 { 845 if (cs->vp) { 846 VN_RELE(cs->vp); 847 } 848 if (cs->saved_vp) { 849 VN_RELE(cs->saved_vp); 850 } 851 if (cs->cr) { 852 crfree(cs->cr); 853 } 854 if (cs->saved_fh.nfs_fh4_val) { 855 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE); 856 } 857 if (cs->sp) { 858 rfs4x_session_rele(cs->sp); 859 } 860 } 861 862 void 863 rfs4_grace_start(rfs4_servinst_t *sip) 864 { 865 rw_enter(&sip->rwlock, RW_WRITER); 866 sip->start_time = nfs_sys_uptime(); 867 sip->grace_period = rfs4_grace_period; 868 rw_exit(&sip->rwlock); 869 } 870 871 /* 872 * returns true if the instance's grace period has never been started 873 */ 874 int 875 rfs4_servinst_grace_new(rfs4_servinst_t *sip) 876 { 877 time_t start_time; 878 879 rw_enter(&sip->rwlock, RW_READER); 880 start_time = sip->start_time; 881 rw_exit(&sip->rwlock); 882 883 return (start_time == 0); 884 } 885 886 /* 887 * Indicates if server instance is within the 888 * grace period. 889 */ 890 int 891 rfs4_servinst_in_grace(rfs4_servinst_t *sip) 892 { 893 time_t grace_expiry; 894 895 /* All clients called reclaim-complete */ 896 if (sip->nreclaim == 0 || sip->grace_period == 0) 897 return (0); 898 899 rw_enter(&sip->rwlock, RW_READER); 900 grace_expiry = sip->start_time + sip->grace_period; 901 rw_exit(&sip->rwlock); 902 903 if (nfs_sys_uptime() < grace_expiry) 904 return (1); 905 906 /* Once grace period ends, optimize next calls */ 907 sip->grace_period = 0; 908 return (0); 909 } 910 911 int 912 rfs4_clnt_in_grace(rfs4_client_t *cp) 913 { 914 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 915 916 return (rfs4_servinst_in_grace(cp->rc_server_instance)); 917 } 918 919 /* 920 * reset all currently active grace periods 921 */ 922 void 923 rfs4_grace_reset_all(nfs4_srv_t *nsrv4) 924 { 925 rfs4_servinst_t *sip; 926 927 mutex_enter(&nsrv4->servinst_lock); 928 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 929 if (rfs4_servinst_in_grace(sip)) 930 rfs4_grace_start(sip); 931 mutex_exit(&nsrv4->servinst_lock); 932 } 933 934 /* 935 * start any new instances' grace periods 936 */ 937 void 938 rfs4_grace_start_new(nfs4_srv_t *nsrv4) 939 { 940 rfs4_servinst_t *sip; 941 942 mutex_enter(&nsrv4->servinst_lock); 943 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 944 if (rfs4_servinst_grace_new(sip)) 945 rfs4_grace_start(sip); 946 mutex_exit(&nsrv4->servinst_lock); 947 } 948 949 static rfs4_dss_path_t * 950 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip, 951 char *path, unsigned index) 952 { 953 size_t len; 954 rfs4_dss_path_t *dss_path; 955 956 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP); 957 958 /* 959 * Take a copy of the string, since the original may be overwritten. 960 * Sadly, no strdup() in the kernel. 961 */ 962 /* allow for NUL */ 963 len = strlen(path) + 1; 964 dss_path->path = kmem_alloc(len, KM_SLEEP); 965 (void) strlcpy(dss_path->path, path, len); 966 967 /* associate with servinst */ 968 dss_path->sip = sip; 969 dss_path->index = index; 970 971 /* 972 * Add to list of served paths. 973 * No locking required, as we're only ever called at startup. 974 */ 975 if (nsrv4->dss_pathlist == NULL) { 976 /* this is the first dss_path_t */ 977 978 /* needed for insque/remque */ 979 dss_path->next = dss_path->prev = dss_path; 980 981 nsrv4->dss_pathlist = dss_path; 982 } else { 983 insque(dss_path, nsrv4->dss_pathlist); 984 } 985 986 return (dss_path); 987 } 988 989 /* 990 * Create a new server instance, and make it the currently active instance. 991 * Note that starting the grace period too early will reduce the clients' 992 * recovery window. 993 */ 994 void 995 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace, 996 int dss_npaths, char **dss_paths) 997 { 998 unsigned i; 999 rfs4_servinst_t *sip; 1000 rfs4_oldstate_t *oldstate; 1001 1002 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP); 1003 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL); 1004 1005 sip->nreclaim = 0; 1006 sip->start_time = (time_t)0; 1007 sip->grace_period = (time_t)0; 1008 sip->next = NULL; 1009 sip->prev = NULL; 1010 1011 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL); 1012 /* 1013 * This initial dummy entry is required to setup for insque/remque. 1014 * It must be skipped over whenever the list is traversed. 1015 */ 1016 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 1017 /* insque/remque require initial list entry to be self-terminated */ 1018 oldstate->next = oldstate; 1019 oldstate->prev = oldstate; 1020 sip->oldstate = oldstate; 1021 1022 1023 sip->dss_npaths = dss_npaths; 1024 sip->dss_paths = kmem_alloc(dss_npaths * 1025 sizeof (rfs4_dss_path_t *), KM_SLEEP); 1026 1027 for (i = 0; i < dss_npaths; i++) { 1028 sip->dss_paths[i] = 1029 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i); 1030 } 1031 1032 mutex_enter(&nsrv4->servinst_lock); 1033 if (nsrv4->nfs4_cur_servinst != NULL) { 1034 /* add to linked list */ 1035 sip->prev = nsrv4->nfs4_cur_servinst; 1036 nsrv4->nfs4_cur_servinst->next = sip; 1037 } 1038 if (start_grace) 1039 rfs4_grace_start(sip); 1040 /* make the new instance "current" */ 1041 nsrv4->nfs4_cur_servinst = sip; 1042 1043 mutex_exit(&nsrv4->servinst_lock); 1044 } 1045 1046 /* 1047 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy 1048 * all instances directly. 1049 */ 1050 void 1051 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4) 1052 { 1053 rfs4_servinst_t *sip, *prev, *current; 1054 #ifdef DEBUG 1055 int n = 0; 1056 #endif 1057 1058 mutex_enter(&nsrv4->servinst_lock); 1059 ASSERT(nsrv4->nfs4_cur_servinst != NULL); 1060 current = nsrv4->nfs4_cur_servinst; 1061 nsrv4->nfs4_cur_servinst = NULL; 1062 for (sip = current; sip != NULL; sip = prev) { 1063 prev = sip->prev; 1064 rw_destroy(&sip->rwlock); 1065 if (sip->oldstate) 1066 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t)); 1067 if (sip->dss_paths) { 1068 int i = sip->dss_npaths; 1069 1070 while (i > 0) { 1071 i--; 1072 if (sip->dss_paths[i] != NULL) { 1073 char *path = sip->dss_paths[i]->path; 1074 1075 if (path != NULL) { 1076 kmem_free(path, 1077 strlen(path) + 1); 1078 } 1079 kmem_free(sip->dss_paths[i], 1080 sizeof (rfs4_dss_path_t)); 1081 } 1082 } 1083 kmem_free(sip->dss_paths, 1084 sip->dss_npaths * sizeof (rfs4_dss_path_t *)); 1085 } 1086 kmem_free(sip, sizeof (rfs4_servinst_t)); 1087 #ifdef DEBUG 1088 n++; 1089 #endif 1090 } 1091 mutex_exit(&nsrv4->servinst_lock); 1092 } 1093 1094 /* 1095 * Assign the current server instance to a client_t. 1096 * Should be called with cp->rc_dbe held. 1097 */ 1098 void 1099 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp, 1100 rfs4_servinst_t *sip) 1101 { 1102 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1103 1104 /* 1105 * The lock ensures that if the current instance is in the process 1106 * of changing, we will see the new one. 1107 */ 1108 mutex_enter(&nsrv4->servinst_lock); 1109 cp->rc_server_instance = sip; 1110 mutex_exit(&nsrv4->servinst_lock); 1111 } 1112 1113 rfs4_servinst_t * 1114 rfs4_servinst(rfs4_client_t *cp) 1115 { 1116 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1117 1118 return (cp->rc_server_instance); 1119 } 1120 1121 /* ARGSUSED */ 1122 static void 1123 nullfree(caddr_t resop) 1124 { 1125 } 1126 1127 /* 1128 * This is a fall-through for invalid or not implemented (yet) ops 1129 */ 1130 /* ARGSUSED */ 1131 static void 1132 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1133 struct compound_state *cs) 1134 { 1135 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 1136 } 1137 1138 /* 1139 * Check if the security flavor, nfsnum, is in the flavor_list. 1140 */ 1141 bool_t 1142 in_flavor_list(int nfsnum, int *flavor_list, int count) 1143 { 1144 int i; 1145 1146 for (i = 0; i < count; i++) { 1147 if (nfsnum == flavor_list[i]) 1148 return (TRUE); 1149 } 1150 return (FALSE); 1151 } 1152 1153 /* 1154 * Used by rfs4_op_secinfo to get the security information from the 1155 * export structure associated with the component. 1156 */ 1157 /* ARGSUSED */ 1158 nfsstat4 1159 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) 1160 { 1161 int error, different_export = 0; 1162 vnode_t *dvp, *vp; 1163 struct exportinfo *exi; 1164 fid_t fid; 1165 uint_t count, i; 1166 secinfo4 *resok_val; 1167 struct secinfo *secp; 1168 seconfig_t *si; 1169 bool_t did_traverse = FALSE; 1170 int dotdot, walk; 1171 nfs_export_t *ne = nfs_get_export(); 1172 1173 dvp = cs->vp; 1174 exi = cs->exi; 1175 ASSERT(exi != NULL); 1176 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 1177 1178 /* 1179 * If dotdotting, then need to check whether it's above the 1180 * root of a filesystem, or above an export point. 1181 */ 1182 if (dotdot) { 1183 vnode_t *zone_rootvp = ne->exi_root->exi_vp; 1184 1185 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid); 1186 /* 1187 * If dotdotting at the root of a filesystem, then 1188 * need to traverse back to the mounted-on filesystem 1189 * and do the dotdot lookup there. 1190 */ 1191 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) { 1192 1193 /* 1194 * If at the system root, then can 1195 * go up no further. 1196 */ 1197 if (VN_CMP(dvp, zone_rootvp)) 1198 return (puterrno4(ENOENT)); 1199 1200 /* 1201 * Traverse back to the mounted-on filesystem 1202 */ 1203 dvp = untraverse(dvp, zone_rootvp); 1204 1205 /* 1206 * Set the different_export flag so we remember 1207 * to pick up a new exportinfo entry for 1208 * this new filesystem. 1209 */ 1210 different_export = 1; 1211 } else { 1212 1213 /* 1214 * If dotdotting above an export point then set 1215 * the different_export to get new export info. 1216 */ 1217 different_export = nfs_exported(exi, dvp); 1218 } 1219 } 1220 1221 /* 1222 * Get the vnode for the component "nm". 1223 */ 1224 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 1225 NULL, NULL, NULL); 1226 if (error) 1227 return (puterrno4(error)); 1228 1229 /* 1230 * If the vnode is in a pseudo filesystem, or if the security flavor 1231 * used in the request is valid but not an explicitly shared flavor, 1232 * or the access bit indicates that this is a limited access, 1233 * check whether this vnode is visible. 1234 */ 1235 if (!different_export && 1236 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) || 1237 cs->access & CS_ACCESS_LIMITED)) { 1238 if (! nfs_visible(exi, vp, &different_export)) { 1239 VN_RELE(vp); 1240 return (puterrno4(ENOENT)); 1241 } 1242 } 1243 1244 /* 1245 * If it's a mountpoint, then traverse it. 1246 */ 1247 if (vn_ismntpt(vp)) { 1248 if ((error = traverse(&vp)) != 0) { 1249 VN_RELE(vp); 1250 return (puterrno4(error)); 1251 } 1252 /* remember that we had to traverse mountpoint */ 1253 did_traverse = TRUE; 1254 different_export = 1; 1255 } else if (vp->v_vfsp != dvp->v_vfsp) { 1256 /* 1257 * If vp isn't a mountpoint and the vfs ptrs aren't the same, 1258 * then vp is probably an LOFS object. We don't need the 1259 * realvp, we just need to know that we might have crossed 1260 * a server fs boundary and need to call checkexport4. 1261 * (LOFS lookup hides server fs mountpoints, and actually calls 1262 * traverse) 1263 */ 1264 different_export = 1; 1265 } 1266 1267 /* 1268 * Get the export information for it. 1269 */ 1270 if (different_export) { 1271 1272 bzero(&fid, sizeof (fid)); 1273 fid.fid_len = MAXFIDSZ; 1274 error = vop_fid_pseudo(vp, &fid); 1275 if (error) { 1276 VN_RELE(vp); 1277 return (puterrno4(error)); 1278 } 1279 1280 /* We'll need to reassign "exi". */ 1281 if (dotdot) 1282 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 1283 else 1284 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 1285 1286 if (exi == NULL) { 1287 if (did_traverse == TRUE) { 1288 /* 1289 * If this vnode is a mounted-on vnode, 1290 * but the mounted-on file system is not 1291 * exported, send back the secinfo for 1292 * the exported node that the mounted-on 1293 * vnode lives in. 1294 */ 1295 exi = cs->exi; 1296 } else { 1297 VN_RELE(vp); 1298 return (puterrno4(EACCES)); 1299 } 1300 } 1301 } 1302 ASSERT(exi != NULL); 1303 1304 1305 /* 1306 * Create the secinfo result based on the security information 1307 * from the exportinfo structure (exi). 1308 * 1309 * Return all flavors for a pseudo node. 1310 * For a real export node, return the flavor that the client 1311 * has access with. 1312 */ 1313 ASSERT(RW_LOCK_HELD(&ne->exported_lock)); 1314 if (PSEUDO(exi)) { 1315 count = exi->exi_export.ex_seccnt; /* total sec count */ 1316 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); 1317 secp = exi->exi_export.ex_secinfo; 1318 1319 for (i = 0; i < count; i++) { 1320 si = &secp[i].s_secinfo; 1321 resok_val[i].flavor = si->sc_rpcnum; 1322 if (resok_val[i].flavor == RPCSEC_GSS) { 1323 rpcsec_gss_info *info; 1324 1325 info = &resok_val[i].flavor_info; 1326 info->qop = si->sc_qop; 1327 info->service = (rpc_gss_svc_t)si->sc_service; 1328 1329 /* get oid opaque data */ 1330 info->oid.sec_oid4_len = 1331 si->sc_gss_mech_type->length; 1332 info->oid.sec_oid4_val = kmem_alloc( 1333 si->sc_gss_mech_type->length, KM_SLEEP); 1334 bcopy( 1335 si->sc_gss_mech_type->elements, 1336 info->oid.sec_oid4_val, 1337 info->oid.sec_oid4_len); 1338 } 1339 } 1340 resp->SECINFO4resok_len = count; 1341 resp->SECINFO4resok_val = resok_val; 1342 } else { 1343 int ret_cnt = 0, k = 0; 1344 int *flavor_list; 1345 1346 count = exi->exi_export.ex_seccnt; /* total sec count */ 1347 secp = exi->exi_export.ex_secinfo; 1348 1349 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP); 1350 /* find out which flavors to return */ 1351 for (i = 0; i < count; i ++) { 1352 int access, flavor, perm; 1353 1354 flavor = secp[i].s_secinfo.sc_nfsnum; 1355 perm = secp[i].s_flags; 1356 1357 access = nfsauth4_secinfo_access(exi, cs->req, 1358 flavor, perm, cs->basecr); 1359 1360 if (! (access & NFSAUTH_DENIED) && 1361 ! (access & NFSAUTH_WRONGSEC)) { 1362 flavor_list[ret_cnt] = flavor; 1363 ret_cnt++; 1364 } 1365 } 1366 1367 /* Create the returning SECINFO value */ 1368 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP); 1369 1370 for (i = 0; i < count; i++) { 1371 /* 1372 * If the flavor is in the flavor list, 1373 * fill in resok_val. 1374 */ 1375 si = &secp[i].s_secinfo; 1376 if (in_flavor_list(si->sc_nfsnum, 1377 flavor_list, ret_cnt)) { 1378 resok_val[k].flavor = si->sc_rpcnum; 1379 if (resok_val[k].flavor == RPCSEC_GSS) { 1380 rpcsec_gss_info *info; 1381 1382 info = &resok_val[k].flavor_info; 1383 info->qop = si->sc_qop; 1384 info->service = (rpc_gss_svc_t) 1385 si->sc_service; 1386 1387 /* get oid opaque data */ 1388 info->oid.sec_oid4_len = 1389 si->sc_gss_mech_type->length; 1390 info->oid.sec_oid4_val = kmem_alloc( 1391 si->sc_gss_mech_type->length, 1392 KM_SLEEP); 1393 bcopy(si->sc_gss_mech_type->elements, 1394 info->oid.sec_oid4_val, 1395 info->oid.sec_oid4_len); 1396 } 1397 k++; 1398 } 1399 if (k >= ret_cnt) 1400 break; 1401 } 1402 resp->SECINFO4resok_len = ret_cnt; 1403 resp->SECINFO4resok_val = resok_val; 1404 kmem_free(flavor_list, count * sizeof (int)); 1405 } 1406 1407 VN_RELE(vp); 1408 return (NFS4_OK); 1409 } 1410 1411 /* 1412 * SECINFO (Operation 33): Obtain required security information on 1413 * the component name in the format of (security-mechanism-oid, qop, service) 1414 * triplets. 1415 */ 1416 /* ARGSUSED */ 1417 static void 1418 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1419 struct compound_state *cs) 1420 { 1421 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1422 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1423 utf8string *utfnm = &args->name; 1424 uint_t len; 1425 char *nm; 1426 struct sockaddr *ca; 1427 char *name = NULL; 1428 nfsstat4 status = NFS4_OK; 1429 1430 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1431 SECINFO4args *, args); 1432 1433 /* 1434 * Current file handle (cfh) should have been set before getting 1435 * into this function. If not, return error. 1436 */ 1437 if (cs->vp == NULL) { 1438 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1439 goto out; 1440 } 1441 1442 if (cs->vp->v_type != VDIR) { 1443 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1444 goto out; 1445 } 1446 1447 /* 1448 * Verify the component name. If failed, error out, but 1449 * do not error out if the component name is a "..". 1450 * SECINFO will return its parents secinfo data for SECINFO "..". 1451 */ 1452 status = utf8_dir_verify(utfnm); 1453 if (status != NFS4_OK) { 1454 if (utfnm->utf8string_len != 2 || 1455 utfnm->utf8string_val[0] != '.' || 1456 utfnm->utf8string_val[1] != '.') { 1457 *cs->statusp = resp->status = status; 1458 goto out; 1459 } 1460 } 1461 1462 nm = utf8_to_str(utfnm, &len, NULL); 1463 if (nm == NULL) { 1464 *cs->statusp = resp->status = NFS4ERR_INVAL; 1465 goto out; 1466 } 1467 1468 if (len > MAXNAMELEN) { 1469 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1470 kmem_free(nm, len); 1471 goto out; 1472 } 1473 1474 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1475 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1476 MAXPATHLEN + 1); 1477 1478 if (name == NULL) { 1479 *cs->statusp = resp->status = NFS4ERR_INVAL; 1480 kmem_free(nm, len); 1481 goto out; 1482 } 1483 1484 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp); 1485 1486 if (resp->status == NFS4_OK && rfs4_has_session(cs)) { 1487 /* 1488 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3 1489 * 1490 * 2.6.3.1.1.8 1491 * SECINFO and SECINFO_NO_NAME consume the current 1492 * filehandle (note that this is a change from NFSv4.0). 1493 * 1494 * 18.29.3 1495 * On success, the current filehandle is consumed (see 1496 * Section 2.6.3.1.1.8), and if the next operation after 1497 * SECINFO tries to use the current filehandle, that 1498 * operation will fail with the status 1499 * NFS4ERR_NOFILEHANDLE. 1500 */ 1501 VN_RELE(cs->vp); 1502 cs->vp = NULL; 1503 } 1504 1505 if (name != nm) 1506 kmem_free(name, MAXPATHLEN + 1); 1507 kmem_free(nm, len); 1508 1509 out: 1510 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1511 SECINFO4res *, resp); 1512 } 1513 1514 /* 1515 * Free SECINFO result. 1516 */ 1517 /* ARGSUSED */ 1518 static void 1519 rfs4_op_secinfo_free(nfs_resop4 *resop) 1520 { 1521 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1522 int count, i; 1523 secinfo4 *resok_val; 1524 1525 /* If this is not an Ok result, nothing to free. */ 1526 if (resp->status != NFS4_OK) { 1527 return; 1528 } 1529 1530 count = resp->SECINFO4resok_len; 1531 resok_val = resp->SECINFO4resok_val; 1532 1533 for (i = 0; i < count; i++) { 1534 if (resok_val[i].flavor == RPCSEC_GSS) { 1535 rpcsec_gss_info *info; 1536 1537 info = &resok_val[i].flavor_info; 1538 kmem_free(info->oid.sec_oid4_val, 1539 info->oid.sec_oid4_len); 1540 } 1541 } 1542 kmem_free(resok_val, count * sizeof (secinfo4)); 1543 resp->SECINFO4resok_len = 0; 1544 resp->SECINFO4resok_val = NULL; 1545 } 1546 1547 /* ARGSUSED */ 1548 static void 1549 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1550 struct compound_state *cs) 1551 { 1552 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1553 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1554 int error; 1555 vnode_t *vp; 1556 struct vattr va; 1557 int checkwriteperm; 1558 cred_t *cr = cs->cr; 1559 bslabel_t *clabel, *slabel; 1560 ts_label_t *tslabel; 1561 boolean_t admin_low_client; 1562 1563 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1564 ACCESS4args *, args); 1565 1566 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */ 1567 if (cs->access == CS_ACCESS_DENIED) { 1568 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1569 goto out; 1570 } 1571 #endif 1572 if (cs->vp == NULL) { 1573 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1574 goto out; 1575 } 1576 1577 ASSERT(cr != NULL); 1578 1579 vp = cs->vp; 1580 1581 /* 1582 * If the file system is exported read only, it is not appropriate 1583 * to check write permissions for regular files and directories. 1584 * Special files are interpreted by the client, so the underlying 1585 * permissions are sent back to the client for interpretation. 1586 */ 1587 if (rdonly4(req, cs) && 1588 (vp->v_type == VREG || vp->v_type == VDIR)) 1589 checkwriteperm = 0; 1590 else 1591 checkwriteperm = 1; 1592 1593 /* 1594 * XXX 1595 * We need the mode so that we can correctly determine access 1596 * permissions relative to a mandatory lock file. Access to 1597 * mandatory lock files is denied on the server, so it might 1598 * as well be reflected to the server during the open. 1599 */ 1600 va.va_mask = AT_MODE; 1601 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1602 if (error) { 1603 *cs->statusp = resp->status = puterrno4(error); 1604 goto out; 1605 } 1606 resp->access = 0; 1607 resp->supported = 0; 1608 1609 if (is_system_labeled()) { 1610 ASSERT(req->rq_label != NULL); 1611 clabel = req->rq_label; 1612 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1613 "got client label from request(1)", 1614 struct svc_req *, req); 1615 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1616 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) { 1617 *cs->statusp = resp->status = puterrno4(EACCES); 1618 goto out; 1619 } 1620 slabel = label2bslabel(tslabel); 1621 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1622 char *, "got server label(1) for vp(2)", 1623 bslabel_t *, slabel, vnode_t *, vp); 1624 1625 admin_low_client = B_FALSE; 1626 } else 1627 admin_low_client = B_TRUE; 1628 } 1629 1630 if (args->access & ACCESS4_READ) { 1631 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1632 if (!error && !MANDLOCK(vp, va.va_mode) && 1633 (!is_system_labeled() || admin_low_client || 1634 bldominates(clabel, slabel))) 1635 resp->access |= ACCESS4_READ; 1636 resp->supported |= ACCESS4_READ; 1637 } 1638 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1639 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1640 if (!error && (!is_system_labeled() || admin_low_client || 1641 bldominates(clabel, slabel))) 1642 resp->access |= ACCESS4_LOOKUP; 1643 resp->supported |= ACCESS4_LOOKUP; 1644 } 1645 if (checkwriteperm && 1646 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1647 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1648 if (!error && !MANDLOCK(vp, va.va_mode) && 1649 (!is_system_labeled() || admin_low_client || 1650 blequal(clabel, slabel))) 1651 resp->access |= 1652 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND)); 1653 resp->supported |= 1654 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND); 1655 } 1656 1657 if (checkwriteperm && 1658 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1659 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1660 if (!error && (!is_system_labeled() || admin_low_client || 1661 blequal(clabel, slabel))) 1662 resp->access |= ACCESS4_DELETE; 1663 resp->supported |= ACCESS4_DELETE; 1664 } 1665 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1666 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1667 if (!error && !MANDLOCK(vp, va.va_mode) && 1668 (!is_system_labeled() || admin_low_client || 1669 bldominates(clabel, slabel))) 1670 resp->access |= ACCESS4_EXECUTE; 1671 resp->supported |= ACCESS4_EXECUTE; 1672 } 1673 1674 if (is_system_labeled() && !admin_low_client) 1675 label_rele(tslabel); 1676 1677 *cs->statusp = resp->status = NFS4_OK; 1678 out: 1679 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1680 ACCESS4res *, resp); 1681 } 1682 1683 /* ARGSUSED */ 1684 static void 1685 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1686 struct compound_state *cs) 1687 { 1688 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1689 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1690 int error; 1691 vnode_t *vp = cs->vp; 1692 cred_t *cr = cs->cr; 1693 vattr_t va; 1694 nfs4_srv_t *nsrv4; 1695 1696 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1697 COMMIT4args *, args); 1698 1699 if (vp == NULL) { 1700 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1701 goto out; 1702 } 1703 if (cs->access == CS_ACCESS_DENIED) { 1704 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1705 goto out; 1706 } 1707 1708 if (args->offset + args->count < args->offset) { 1709 *cs->statusp = resp->status = NFS4ERR_INVAL; 1710 goto out; 1711 } 1712 1713 va.va_mask = AT_UID; 1714 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1715 1716 /* 1717 * If we can't get the attributes, then we can't do the 1718 * right access checking. So, we'll fail the request. 1719 */ 1720 if (error) { 1721 *cs->statusp = resp->status = puterrno4(error); 1722 goto out; 1723 } 1724 if (rdonly4(req, cs)) { 1725 *cs->statusp = resp->status = NFS4ERR_ROFS; 1726 goto out; 1727 } 1728 1729 if (vp->v_type != VREG) { 1730 if (vp->v_type == VDIR) 1731 resp->status = NFS4ERR_ISDIR; 1732 else 1733 resp->status = NFS4ERR_INVAL; 1734 *cs->statusp = resp->status; 1735 goto out; 1736 } 1737 1738 if (crgetuid(cr) != va.va_uid && 1739 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) { 1740 *cs->statusp = resp->status = puterrno4(error); 1741 goto out; 1742 } 1743 1744 error = VOP_FSYNC(vp, FSYNC, cr, NULL); 1745 1746 if (error) { 1747 *cs->statusp = resp->status = puterrno4(error); 1748 goto out; 1749 } 1750 1751 nsrv4 = nfs4_get_srv(); 1752 *cs->statusp = resp->status = NFS4_OK; 1753 resp->writeverf = nsrv4->write4verf; 1754 out: 1755 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1756 COMMIT4res *, resp); 1757 } 1758 1759 /* 1760 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1761 * was completed. It does the nfsv4 create for special files. 1762 */ 1763 /* ARGSUSED */ 1764 static vnode_t * 1765 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req, 1766 struct compound_state *cs, vattr_t *vap, char *nm) 1767 { 1768 int error; 1769 cred_t *cr = cs->cr; 1770 vnode_t *dvp = cs->vp; 1771 vnode_t *vp = NULL; 1772 int mode; 1773 enum vcexcl excl; 1774 1775 switch (args->type) { 1776 case NF4CHR: 1777 case NF4BLK: 1778 if (secpolicy_sys_devices(cr) != 0) { 1779 *cs->statusp = resp->status = NFS4ERR_PERM; 1780 return (NULL); 1781 } 1782 if (args->type == NF4CHR) 1783 vap->va_type = VCHR; 1784 else 1785 vap->va_type = VBLK; 1786 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1, 1787 args->ftype4_u.devdata.specdata2); 1788 vap->va_mask |= AT_RDEV; 1789 break; 1790 case NF4SOCK: 1791 vap->va_type = VSOCK; 1792 break; 1793 case NF4FIFO: 1794 vap->va_type = VFIFO; 1795 break; 1796 default: 1797 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1798 return (NULL); 1799 } 1800 1801 /* 1802 * Must specify the mode. 1803 */ 1804 if (!(vap->va_mask & AT_MODE)) { 1805 *cs->statusp = resp->status = NFS4ERR_INVAL; 1806 return (NULL); 1807 } 1808 1809 excl = EXCL; 1810 1811 mode = 0; 1812 1813 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL); 1814 if (error) { 1815 *cs->statusp = resp->status = puterrno4(error); 1816 return (NULL); 1817 } 1818 return (vp); 1819 } 1820 1821 /* 1822 * nfsv4 create is used to create non-regular files. For regular files, 1823 * use nfsv4 open. 1824 */ 1825 /* ARGSUSED */ 1826 static void 1827 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1828 struct compound_state *cs) 1829 { 1830 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1831 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1832 int error; 1833 struct vattr bva, iva, iva2, ava, *vap; 1834 cred_t *cr = cs->cr; 1835 vnode_t *dvp = cs->vp; 1836 vnode_t *vp = NULL; 1837 vnode_t *realvp; 1838 char *nm, *lnm; 1839 uint_t len, llen; 1840 int syncval = 0; 1841 struct nfs4_svgetit_arg sarg; 1842 struct nfs4_ntov_table ntov; 1843 struct statvfs64 sb; 1844 nfsstat4 status; 1845 struct sockaddr *ca; 1846 char *name = NULL; 1847 char *lname = NULL; 1848 1849 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1850 CREATE4args *, args); 1851 1852 resp->attrset = 0; 1853 1854 if (dvp == NULL) { 1855 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1856 goto out; 1857 } 1858 1859 /* 1860 * If there is an unshared filesystem mounted on this vnode, 1861 * do not allow to create an object in this directory. 1862 */ 1863 if (vn_ismntpt(dvp)) { 1864 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1865 goto out; 1866 } 1867 1868 /* Verify that type is correct */ 1869 switch (args->type) { 1870 case NF4LNK: 1871 case NF4BLK: 1872 case NF4CHR: 1873 case NF4SOCK: 1874 case NF4FIFO: 1875 case NF4DIR: 1876 break; 1877 default: 1878 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1879 goto out; 1880 }; 1881 1882 if (cs->access == CS_ACCESS_DENIED) { 1883 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1884 goto out; 1885 } 1886 if (dvp->v_type != VDIR) { 1887 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1888 goto out; 1889 } 1890 status = utf8_dir_verify(&args->objname); 1891 if (status != NFS4_OK) { 1892 *cs->statusp = resp->status = status; 1893 goto out; 1894 } 1895 1896 if (rdonly4(req, cs)) { 1897 *cs->statusp = resp->status = NFS4ERR_ROFS; 1898 goto out; 1899 } 1900 1901 /* 1902 * Name of newly created object 1903 */ 1904 nm = utf8_to_fn(&args->objname, &len, NULL); 1905 if (nm == NULL) { 1906 *cs->statusp = resp->status = NFS4ERR_INVAL; 1907 goto out; 1908 } 1909 1910 if (len > MAXNAMELEN) { 1911 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1912 kmem_free(nm, len); 1913 goto out; 1914 } 1915 1916 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1917 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1918 MAXPATHLEN + 1); 1919 1920 if (name == NULL) { 1921 *cs->statusp = resp->status = NFS4ERR_INVAL; 1922 kmem_free(nm, len); 1923 goto out; 1924 } 1925 1926 resp->attrset = 0; 1927 1928 sarg.sbp = &sb; 1929 sarg.is_referral = B_FALSE; 1930 nfs4_ntov_table_init(&ntov); 1931 1932 status = do_rfs4_set_attrs(&resp->attrset, 1933 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1934 1935 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1936 status = NFS4ERR_INVAL; 1937 1938 if (status != NFS4_OK) { 1939 *cs->statusp = resp->status = status; 1940 if (name != nm) 1941 kmem_free(name, MAXPATHLEN + 1); 1942 kmem_free(nm, len); 1943 nfs4_ntov_table_free(&ntov, &sarg); 1944 resp->attrset = 0; 1945 goto out; 1946 } 1947 1948 /* Get "before" change value */ 1949 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE; 1950 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL); 1951 if (error) { 1952 *cs->statusp = resp->status = puterrno4(error); 1953 if (name != nm) 1954 kmem_free(name, MAXPATHLEN + 1); 1955 kmem_free(nm, len); 1956 nfs4_ntov_table_free(&ntov, &sarg); 1957 resp->attrset = 0; 1958 goto out; 1959 } 1960 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1961 1962 vap = sarg.vap; 1963 1964 /* 1965 * Set the default initial values for attributes when the parent 1966 * directory does not have the VSUID/VSGID bit set and they have 1967 * not been specified in createattrs. 1968 */ 1969 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) { 1970 vap->va_uid = crgetuid(cr); 1971 vap->va_mask |= AT_UID; 1972 } 1973 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) { 1974 vap->va_gid = crgetgid(cr); 1975 vap->va_mask |= AT_GID; 1976 } 1977 1978 vap->va_mask |= AT_TYPE; 1979 switch (args->type) { 1980 case NF4DIR: 1981 vap->va_type = VDIR; 1982 if ((vap->va_mask & AT_MODE) == 0) { 1983 vap->va_mode = 0700; /* default: owner rwx only */ 1984 vap->va_mask |= AT_MODE; 1985 } 1986 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL); 1987 if (error) 1988 break; 1989 1990 /* 1991 * Get the initial "after" sequence number, if it fails, 1992 * set to zero 1993 */ 1994 iva.va_mask = AT_SEQ; 1995 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1996 iva.va_seq = 0; 1997 break; 1998 case NF4LNK: 1999 vap->va_type = VLNK; 2000 if ((vap->va_mask & AT_MODE) == 0) { 2001 vap->va_mode = 0700; /* default: owner rwx only */ 2002 vap->va_mask |= AT_MODE; 2003 } 2004 2005 /* 2006 * symlink names must be treated as data 2007 */ 2008 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata, 2009 &llen, NULL); 2010 2011 if (lnm == NULL) { 2012 *cs->statusp = resp->status = NFS4ERR_INVAL; 2013 if (name != nm) 2014 kmem_free(name, MAXPATHLEN + 1); 2015 kmem_free(nm, len); 2016 nfs4_ntov_table_free(&ntov, &sarg); 2017 resp->attrset = 0; 2018 goto out; 2019 } 2020 2021 if (llen > MAXPATHLEN) { 2022 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2023 if (name != nm) 2024 kmem_free(name, MAXPATHLEN + 1); 2025 kmem_free(nm, len); 2026 kmem_free(lnm, llen); 2027 nfs4_ntov_table_free(&ntov, &sarg); 2028 resp->attrset = 0; 2029 goto out; 2030 } 2031 2032 lname = nfscmd_convname(ca, cs->exi, lnm, 2033 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2034 2035 if (lname == NULL) { 2036 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 2037 if (name != nm) 2038 kmem_free(name, MAXPATHLEN + 1); 2039 kmem_free(nm, len); 2040 kmem_free(lnm, llen); 2041 nfs4_ntov_table_free(&ntov, &sarg); 2042 resp->attrset = 0; 2043 goto out; 2044 } 2045 2046 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0); 2047 if (lname != lnm) 2048 kmem_free(lname, MAXPATHLEN + 1); 2049 kmem_free(lnm, llen); 2050 if (error) 2051 break; 2052 2053 /* 2054 * Get the initial "after" sequence number, if it fails, 2055 * set to zero 2056 */ 2057 iva.va_mask = AT_SEQ; 2058 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 2059 iva.va_seq = 0; 2060 2061 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 2062 NULL, NULL, NULL); 2063 if (error) 2064 break; 2065 2066 /* 2067 * va_seq is not safe over VOP calls, check it again 2068 * if it has changed zero out iva to force atomic = FALSE. 2069 */ 2070 iva2.va_mask = AT_SEQ; 2071 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) || 2072 iva2.va_seq != iva.va_seq) 2073 iva.va_seq = 0; 2074 break; 2075 default: 2076 /* 2077 * probably a special file. 2078 */ 2079 if ((vap->va_mask & AT_MODE) == 0) { 2080 vap->va_mode = 0600; /* default: owner rw only */ 2081 vap->va_mask |= AT_MODE; 2082 } 2083 syncval = FNODSYNC; 2084 /* 2085 * We know this will only generate one VOP call 2086 */ 2087 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name); 2088 2089 if (vp == NULL) { 2090 if (name != nm) 2091 kmem_free(name, MAXPATHLEN + 1); 2092 kmem_free(nm, len); 2093 nfs4_ntov_table_free(&ntov, &sarg); 2094 resp->attrset = 0; 2095 goto out; 2096 } 2097 2098 /* 2099 * Get the initial "after" sequence number, if it fails, 2100 * set to zero 2101 */ 2102 iva.va_mask = AT_SEQ; 2103 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 2104 iva.va_seq = 0; 2105 2106 break; 2107 } 2108 if (name != nm) 2109 kmem_free(name, MAXPATHLEN + 1); 2110 kmem_free(nm, len); 2111 2112 if (error) { 2113 *cs->statusp = resp->status = puterrno4(error); 2114 } 2115 2116 /* 2117 * Force modified data and metadata out to stable storage. 2118 */ 2119 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2120 2121 if (resp->status != NFS4_OK) { 2122 if (vp != NULL) 2123 VN_RELE(vp); 2124 nfs4_ntov_table_free(&ntov, &sarg); 2125 resp->attrset = 0; 2126 goto out; 2127 } 2128 2129 /* 2130 * Finish setup of cinfo response, "before" value already set. 2131 * Get "after" change value, if it fails, simply return the 2132 * before value. 2133 */ 2134 ava.va_mask = AT_CTIME|AT_SEQ; 2135 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) { 2136 ava.va_ctime = bva.va_ctime; 2137 ava.va_seq = 0; 2138 } 2139 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 2140 2141 /* 2142 * True verification that object was created with correct 2143 * attrs is impossible. The attrs could have been changed 2144 * immediately after object creation. If attributes did 2145 * not verify, the only recourse for the server is to 2146 * destroy the object. Maybe if some attrs (like gid) 2147 * are set incorrectly, the object should be destroyed; 2148 * however, seems bad as a default policy. Do we really 2149 * want to destroy an object over one of the times not 2150 * verifying correctly? For these reasons, the server 2151 * currently sets bits in attrset for createattrs 2152 * that were set; however, no verification is done. 2153 * 2154 * vmask_to_nmask accounts for vattr bits set on create 2155 * [do_rfs4_set_attrs() only sets resp bits for 2156 * non-vattr/vfs bits.] 2157 * Mask off any bits set by default so as not to return 2158 * more attrset bits than were requested in createattrs 2159 */ 2160 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset); 2161 resp->attrset &= args->createattrs.attrmask; 2162 nfs4_ntov_table_free(&ntov, &sarg); 2163 2164 error = makefh4(&cs->fh, vp, cs->exi); 2165 if (error) { 2166 *cs->statusp = resp->status = puterrno4(error); 2167 } 2168 2169 /* 2170 * The cinfo.atomic = TRUE only if we got no errors, we have 2171 * non-zero va_seq's, and it has incremented by exactly one 2172 * during the creation and it didn't change during the VOP_LOOKUP 2173 * or VOP_FSYNC. 2174 */ 2175 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 2176 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 2177 resp->cinfo.atomic = TRUE; 2178 else 2179 resp->cinfo.atomic = FALSE; 2180 2181 /* 2182 * Force modified metadata out to stable storage. 2183 * 2184 * if a underlying vp exists, pass it to VOP_FSYNC 2185 */ 2186 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2187 (void) VOP_FSYNC(realvp, syncval, cr, NULL); 2188 else 2189 (void) VOP_FSYNC(vp, syncval, cr, NULL); 2190 2191 if (resp->status != NFS4_OK) { 2192 VN_RELE(vp); 2193 goto out; 2194 } 2195 if (cs->vp) 2196 VN_RELE(cs->vp); 2197 2198 cs->vp = vp; 2199 *cs->statusp = resp->status = NFS4_OK; 2200 out: 2201 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 2202 CREATE4res *, resp); 2203 } 2204 2205 /*ARGSUSED*/ 2206 static void 2207 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2208 struct compound_state *cs) 2209 { 2210 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs, 2211 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge); 2212 2213 rfs4_op_inval(argop, resop, req, cs); 2214 2215 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs, 2216 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge); 2217 } 2218 2219 /*ARGSUSED*/ 2220 static void 2221 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2222 struct compound_state *cs) 2223 { 2224 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 2225 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 2226 rfs4_deleg_state_t *dsp; 2227 nfsstat4 status; 2228 2229 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 2230 DELEGRETURN4args *, args); 2231 2232 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp); 2233 resp->status = *cs->statusp = status; 2234 if (status != NFS4_OK) 2235 goto out; 2236 2237 /* Ensure specified filehandle matches */ 2238 if (cs->vp != dsp->rds_finfo->rf_vp) { 2239 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 2240 } else 2241 rfs4_return_deleg(dsp, FALSE); 2242 2243 rfs4_update_lease(dsp->rds_client); 2244 2245 rfs4_deleg_state_rele(dsp); 2246 out: 2247 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 2248 DELEGRETURN4res *, resp); 2249 } 2250 2251 /* 2252 * Check to see if a given "flavor" is an explicitly shared flavor. 2253 * The assumption of this routine is the "flavor" is already a valid 2254 * flavor in the secinfo list of "exi". 2255 * 2256 * e.g. 2257 * # share -o sec=flavor1 /export 2258 * # share -o sec=flavor2 /export/home 2259 * 2260 * flavor2 is not an explicitly shared flavor for /export, 2261 * however it is in the secinfo list for /export thru the 2262 * server namespace setup. 2263 */ 2264 int 2265 is_exported_sec(int flavor, struct exportinfo *exi) 2266 { 2267 int i; 2268 struct secinfo *sp; 2269 2270 sp = exi->exi_export.ex_secinfo; 2271 for (i = 0; i < exi->exi_export.ex_seccnt; i++) { 2272 if (flavor == sp[i].s_secinfo.sc_nfsnum || 2273 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) { 2274 return (SEC_REF_EXPORTED(&sp[i])); 2275 } 2276 } 2277 2278 /* Should not reach this point based on the assumption */ 2279 return (0); 2280 } 2281 2282 /* 2283 * Check if the security flavor used in the request matches what is 2284 * required at the export point or at the root pseudo node (exi_root). 2285 * 2286 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise. 2287 * 2288 */ 2289 static int 2290 secinfo_match_or_authnone(struct compound_state *cs) 2291 { 2292 int i; 2293 struct secinfo *sp; 2294 2295 /* 2296 * Check cs->nfsflavor (from the request) against 2297 * the current export data in cs->exi. 2298 */ 2299 sp = cs->exi->exi_export.ex_secinfo; 2300 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) { 2301 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum || 2302 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) 2303 return (1); 2304 } 2305 2306 return (0); 2307 } 2308 2309 /* 2310 * Check the access authority for the client and return the correct error. 2311 */ 2312 nfsstat4 2313 call_checkauth4(struct compound_state *cs, struct svc_req *req) 2314 { 2315 int authres; 2316 2317 /* 2318 * First, check if the security flavor used in the request 2319 * are among the flavors set in the server namespace. 2320 */ 2321 if (!secinfo_match_or_authnone(cs)) { 2322 *cs->statusp = NFS4ERR_WRONGSEC; 2323 return (*cs->statusp); 2324 } 2325 2326 authres = checkauth4(cs, req); 2327 2328 if (authres > 0) { 2329 *cs->statusp = NFS4_OK; 2330 if (! (cs->access & CS_ACCESS_LIMITED)) 2331 cs->access = CS_ACCESS_OK; 2332 } else if (authres == 0) { 2333 *cs->statusp = NFS4ERR_ACCESS; 2334 } else if (authres == -2) { 2335 *cs->statusp = NFS4ERR_WRONGSEC; 2336 } else { 2337 *cs->statusp = NFS4ERR_DELAY; 2338 } 2339 return (*cs->statusp); 2340 } 2341 2342 /* 2343 * bitmap4_to_attrmask is called by getattr and readdir. 2344 * It sets up the vattr mask and determines whether vfsstat call is needed 2345 * based on the input bitmap. 2346 * Returns nfsv4 status. 2347 */ 2348 static nfsstat4 2349 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp) 2350 { 2351 int i; 2352 uint_t va_mask; 2353 struct statvfs64 *sbp = sargp->sbp; 2354 2355 sargp->sbp = NULL; 2356 sargp->flag = 0; 2357 sargp->rdattr_error = NFS4_OK; 2358 sargp->mntdfid_set = FALSE; 2359 if (sargp->cs->vp) 2360 sargp->xattr = get_fh4_flag(&sargp->cs->fh, 2361 FH4_ATTRDIR | FH4_NAMEDATTR); 2362 else 2363 sargp->xattr = 0; 2364 2365 /* 2366 * Set rdattr_error_req to true if return error per 2367 * failed entry rather than fail the readdir. 2368 */ 2369 if (breq & FATTR4_RDATTR_ERROR_MASK) 2370 sargp->rdattr_error_req = 1; 2371 else 2372 sargp->rdattr_error_req = 0; 2373 2374 /* 2375 * generate the va_mask 2376 * Handle the easy cases first 2377 */ 2378 switch (breq) { 2379 case NFS4_NTOV_ATTR_MASK: 2380 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK; 2381 return (NFS4_OK); 2382 2383 case NFS4_FS_ATTR_MASK: 2384 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK; 2385 sargp->sbp = sbp; 2386 return (NFS4_OK); 2387 2388 case NFS4_NTOV_ATTR_CACHE_MASK: 2389 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK; 2390 return (NFS4_OK); 2391 2392 case FATTR4_LEASE_TIME_MASK: 2393 sargp->vap->va_mask = 0; 2394 return (NFS4_OK); 2395 2396 default: 2397 va_mask = 0; 2398 for (i = 0; i < nfs4_ntov_map_size; i++) { 2399 if ((breq & nfs4_ntov_map[i].fbit) && 2400 nfs4_ntov_map[i].vbit) 2401 va_mask |= nfs4_ntov_map[i].vbit; 2402 } 2403 2404 /* 2405 * Check is vfsstat is needed 2406 */ 2407 if (breq & NFS4_FS_ATTR_MASK) 2408 sargp->sbp = sbp; 2409 2410 sargp->vap->va_mask = va_mask; 2411 return (NFS4_OK); 2412 } 2413 /* NOTREACHED */ 2414 } 2415 2416 /* 2417 * bitmap4_get_sysattrs is called by getattr and readdir. 2418 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs. 2419 * Returns nfsv4 status. 2420 */ 2421 static nfsstat4 2422 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp) 2423 { 2424 int error; 2425 struct compound_state *cs = sargp->cs; 2426 vnode_t *vp = cs->vp; 2427 2428 if (sargp->sbp != NULL) { 2429 error = VFS_STATVFS(vp->v_vfsp, sargp->sbp); 2430 if (error != 0) { 2431 sargp->sbp = NULL; /* to identify error */ 2432 return (puterrno4(error)); 2433 } 2434 } 2435 2436 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr)); 2437 } 2438 2439 static void 2440 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp) 2441 { 2442 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size, 2443 KM_SLEEP); 2444 ntovp->attrcnt = 0; 2445 ntovp->vfsstat = FALSE; 2446 } 2447 2448 static void 2449 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 2450 struct nfs4_svgetit_arg *sargp) 2451 { 2452 int i; 2453 union nfs4_attr_u *na; 2454 uint8_t *amap; 2455 2456 /* 2457 * XXX Should do the same checks for whether the bit is set 2458 */ 2459 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2460 i < ntovp->attrcnt; i++, na++, amap++) { 2461 (void) (*nfs4_ntov_map[*amap].sv_getit)( 2462 NFS4ATTR_FREEIT, sargp, na); 2463 } 2464 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) { 2465 /* 2466 * xdr_free for getattr will be done later 2467 */ 2468 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2469 i < ntovp->attrcnt; i++, na++, amap++) { 2470 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na); 2471 } 2472 } 2473 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size); 2474 } 2475 2476 /* 2477 * do_rfs4_op_getattr gets the system attrs and converts into fattr4. 2478 */ 2479 static nfsstat4 2480 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp, 2481 struct nfs4_svgetit_arg *sargp) 2482 { 2483 int error = 0; 2484 int i, k; 2485 struct nfs4_ntov_table ntov; 2486 XDR xdr; 2487 ulong_t xdr_size; 2488 char *xdr_attrs; 2489 nfsstat4 status = NFS4_OK; 2490 nfsstat4 prev_rdattr_error = sargp->rdattr_error; 2491 union nfs4_attr_u *na; 2492 uint8_t *amap; 2493 2494 sargp->op = NFS4ATTR_GETIT; 2495 sargp->flag = 0; 2496 2497 fattrp->attrmask = 0; 2498 /* if no bits requested, then return empty fattr4 */ 2499 if (breq == 0) { 2500 fattrp->attrlist4_len = 0; 2501 fattrp->attrlist4 = NULL; 2502 return (NFS4_OK); 2503 } 2504 2505 /* 2506 * return NFS4ERR_INVAL when client requests write-only attrs 2507 */ 2508 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK)) 2509 return (NFS4ERR_INVAL); 2510 2511 nfs4_ntov_table_init(&ntov); 2512 na = ntov.na; 2513 amap = ntov.amap; 2514 2515 /* 2516 * Now loop to get or verify the attrs 2517 */ 2518 for (i = 0; i < nfs4_ntov_map_size; i++) { 2519 if (breq & nfs4_ntov_map[i].fbit) { 2520 if ((*nfs4_ntov_map[i].sv_getit)( 2521 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) { 2522 2523 error = (*nfs4_ntov_map[i].sv_getit)( 2524 NFS4ATTR_GETIT, sargp, na); 2525 2526 /* 2527 * Possible error values: 2528 * >0 if sv_getit failed to 2529 * get the attr; 0 if succeeded; 2530 * <0 if rdattr_error and the 2531 * attribute cannot be returned. 2532 */ 2533 if (error && !(sargp->rdattr_error_req)) 2534 goto done; 2535 /* 2536 * If error then just for entry 2537 */ 2538 if (error == 0) { 2539 fattrp->attrmask |= 2540 nfs4_ntov_map[i].fbit; 2541 *amap++ = 2542 (uint8_t)nfs4_ntov_map[i].nval; 2543 na++; 2544 (ntov.attrcnt)++; 2545 } else if ((error > 0) && 2546 (sargp->rdattr_error == NFS4_OK)) { 2547 sargp->rdattr_error = puterrno4(error); 2548 } 2549 error = 0; 2550 } 2551 } 2552 } 2553 2554 /* 2555 * If rdattr_error was set after the return value for it was assigned, 2556 * update it. 2557 */ 2558 if (prev_rdattr_error != sargp->rdattr_error) { 2559 na = ntov.na; 2560 amap = ntov.amap; 2561 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2562 k = *amap; 2563 if (k < FATTR4_RDATTR_ERROR) { 2564 continue; 2565 } 2566 if ((k == FATTR4_RDATTR_ERROR) && 2567 ((*nfs4_ntov_map[k].sv_getit)( 2568 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) { 2569 2570 (void) (*nfs4_ntov_map[k].sv_getit)( 2571 NFS4ATTR_GETIT, sargp, na); 2572 } 2573 break; 2574 } 2575 } 2576 2577 xdr_size = 0; 2578 na = ntov.na; 2579 amap = ntov.amap; 2580 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2581 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na); 2582 } 2583 2584 fattrp->attrlist4_len = xdr_size; 2585 if (xdr_size) { 2586 /* freed by rfs4_op_getattr_free() */ 2587 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP); 2588 2589 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE); 2590 2591 na = ntov.na; 2592 amap = ntov.amap; 2593 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2594 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) { 2595 DTRACE_PROBE1(nfss__e__getattr4_encfail, 2596 int, *amap); 2597 status = NFS4ERR_SERVERFAULT; 2598 break; 2599 } 2600 } 2601 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 2602 } else { 2603 fattrp->attrlist4 = NULL; 2604 } 2605 done: 2606 2607 nfs4_ntov_table_free(&ntov, sargp); 2608 2609 if (error != 0) 2610 status = puterrno4(error); 2611 2612 return (status); 2613 } 2614 2615 /* ARGSUSED */ 2616 static void 2617 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2618 struct compound_state *cs) 2619 { 2620 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 2621 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2622 struct nfs4_svgetit_arg sarg; 2623 struct statvfs64 sb; 2624 nfsstat4 status; 2625 2626 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 2627 GETATTR4args *, args); 2628 2629 if (cs->vp == NULL) { 2630 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2631 goto out; 2632 } 2633 2634 if (cs->access == CS_ACCESS_DENIED) { 2635 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2636 goto out; 2637 } 2638 2639 sarg.sbp = &sb; 2640 sarg.cs = cs; 2641 sarg.is_referral = B_FALSE; 2642 2643 status = bitmap4_to_attrmask(args->attr_request, &sarg); 2644 if (status == NFS4_OK) { 2645 2646 status = bitmap4_get_sysattrs(&sarg); 2647 if (status == NFS4_OK) { 2648 2649 /* Is this a referral? */ 2650 if (vn_is_nfs_reparse(cs->vp, cs->cr)) { 2651 /* Older V4 Solaris client sees a link */ 2652 if (client_is_downrev(req)) 2653 sarg.vap->va_type = VLNK; 2654 else 2655 sarg.is_referral = B_TRUE; 2656 } 2657 2658 status = do_rfs4_op_getattr(args->attr_request, 2659 &resp->obj_attributes, &sarg); 2660 } 2661 } 2662 *cs->statusp = resp->status = status; 2663 out: 2664 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 2665 GETATTR4res *, resp); 2666 } 2667 2668 static void 2669 rfs4_op_getattr_free(nfs_resop4 *resop) 2670 { 2671 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2672 2673 nfs4_fattr4_free(&resp->obj_attributes); 2674 } 2675 2676 /* ARGSUSED */ 2677 static void 2678 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2679 struct compound_state *cs) 2680 { 2681 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2682 2683 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs); 2684 2685 if (cs->vp == NULL) { 2686 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2687 goto out; 2688 } 2689 if (cs->access == CS_ACCESS_DENIED) { 2690 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2691 goto out; 2692 } 2693 2694 /* check for reparse point at the share point */ 2695 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) { 2696 /* it's all bad */ 2697 cs->exi->exi_moved = 1; 2698 *cs->statusp = resp->status = NFS4ERR_MOVED; 2699 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved, 2700 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2701 return; 2702 } 2703 2704 /* check for reparse point at vp */ 2705 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) { 2706 /* it's not all bad */ 2707 *cs->statusp = resp->status = NFS4ERR_MOVED; 2708 DTRACE_PROBE2(nfs4serv__func__referral__moved, 2709 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2710 return; 2711 } 2712 2713 resp->object.nfs_fh4_val = 2714 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 2715 nfs_fh4_copy(&cs->fh, &resp->object); 2716 *cs->statusp = resp->status = NFS4_OK; 2717 out: 2718 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 2719 GETFH4res *, resp); 2720 } 2721 2722 static void 2723 rfs4_op_getfh_free(nfs_resop4 *resop) 2724 { 2725 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2726 2727 if (resp->status == NFS4_OK && 2728 resp->object.nfs_fh4_val != NULL) { 2729 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len); 2730 resp->object.nfs_fh4_val = NULL; 2731 resp->object.nfs_fh4_len = 0; 2732 } 2733 } 2734 2735 /* 2736 * illegal: args: void 2737 * res : status (NFS4ERR_OP_ILLEGAL) 2738 */ 2739 /* ARGSUSED */ 2740 static void 2741 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, 2742 struct svc_req *req, struct compound_state *cs) 2743 { 2744 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal; 2745 2746 resop->resop = OP_ILLEGAL; 2747 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 2748 } 2749 2750 /* ARGSUSED */ 2751 static void 2752 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2753 struct compound_state *cs) 2754 { 2755 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP; 2756 } 2757 2758 /* 2759 * link: args: SAVED_FH: file, CURRENT_FH: target directory 2760 * res: status. If success - CURRENT_FH unchanged, return change_info 2761 */ 2762 /* ARGSUSED */ 2763 static void 2764 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2765 struct compound_state *cs) 2766 { 2767 LINK4args *args = &argop->nfs_argop4_u.oplink; 2768 LINK4res *resp = &resop->nfs_resop4_u.oplink; 2769 int error; 2770 vnode_t *vp; 2771 vnode_t *dvp; 2772 struct vattr bdva, idva, adva; 2773 char *nm; 2774 uint_t len; 2775 struct sockaddr *ca; 2776 char *name = NULL; 2777 nfsstat4 status; 2778 2779 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 2780 LINK4args *, args); 2781 2782 /* SAVED_FH: source object */ 2783 vp = cs->saved_vp; 2784 if (vp == NULL) { 2785 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2786 goto out; 2787 } 2788 2789 /* CURRENT_FH: target directory */ 2790 dvp = cs->vp; 2791 if (dvp == NULL) { 2792 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2793 goto out; 2794 } 2795 2796 /* 2797 * If there is a non-shared filesystem mounted on this vnode, 2798 * do not allow to link any file in this directory. 2799 */ 2800 if (vn_ismntpt(dvp)) { 2801 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2802 goto out; 2803 } 2804 2805 if (cs->access == CS_ACCESS_DENIED) { 2806 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2807 goto out; 2808 } 2809 2810 /* Check source object's type validity */ 2811 if (vp->v_type == VDIR) { 2812 *cs->statusp = resp->status = NFS4ERR_ISDIR; 2813 goto out; 2814 } 2815 2816 /* Check target directory's type */ 2817 if (dvp->v_type != VDIR) { 2818 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2819 goto out; 2820 } 2821 2822 if (cs->saved_exi != cs->exi) { 2823 *cs->statusp = resp->status = NFS4ERR_XDEV; 2824 goto out; 2825 } 2826 2827 status = utf8_dir_verify(&args->newname); 2828 if (status != NFS4_OK) { 2829 *cs->statusp = resp->status = status; 2830 goto out; 2831 } 2832 2833 nm = utf8_to_fn(&args->newname, &len, NULL); 2834 if (nm == NULL) { 2835 *cs->statusp = resp->status = NFS4ERR_INVAL; 2836 goto out; 2837 } 2838 2839 if (len > MAXNAMELEN) { 2840 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2841 kmem_free(nm, len); 2842 goto out; 2843 } 2844 2845 if (rdonly4(req, cs)) { 2846 *cs->statusp = resp->status = NFS4ERR_ROFS; 2847 kmem_free(nm, len); 2848 goto out; 2849 } 2850 2851 /* Get "before" change value */ 2852 bdva.va_mask = AT_CTIME|AT_SEQ; 2853 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 2854 if (error) { 2855 *cs->statusp = resp->status = puterrno4(error); 2856 kmem_free(nm, len); 2857 goto out; 2858 } 2859 2860 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2861 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 2862 MAXPATHLEN + 1); 2863 2864 if (name == NULL) { 2865 *cs->statusp = resp->status = NFS4ERR_INVAL; 2866 kmem_free(nm, len); 2867 goto out; 2868 } 2869 2870 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2871 2872 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0); 2873 2874 if (nm != name) 2875 kmem_free(name, MAXPATHLEN + 1); 2876 kmem_free(nm, len); 2877 2878 /* 2879 * Get the initial "after" sequence number, if it fails, set to zero 2880 */ 2881 idva.va_mask = AT_SEQ; 2882 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 2883 idva.va_seq = 0; 2884 2885 /* 2886 * Force modified data and metadata out to stable storage. 2887 */ 2888 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 2889 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 2890 2891 if (error) { 2892 *cs->statusp = resp->status = puterrno4(error); 2893 goto out; 2894 } 2895 2896 /* 2897 * Get "after" change value, if it fails, simply return the 2898 * before value. 2899 */ 2900 adva.va_mask = AT_CTIME|AT_SEQ; 2901 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 2902 adva.va_ctime = bdva.va_ctime; 2903 adva.va_seq = 0; 2904 } 2905 2906 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2907 2908 /* 2909 * The cinfo.atomic = TRUE only if we have 2910 * non-zero va_seq's, and it has incremented by exactly one 2911 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2912 */ 2913 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2914 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 2915 resp->cinfo.atomic = TRUE; 2916 else 2917 resp->cinfo.atomic = FALSE; 2918 2919 *cs->statusp = resp->status = NFS4_OK; 2920 out: 2921 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2922 LINK4res *, resp); 2923 } 2924 2925 /* 2926 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work. 2927 */ 2928 2929 /* ARGSUSED */ 2930 static nfsstat4 2931 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) 2932 { 2933 int error; 2934 int different_export = 0; 2935 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL; 2936 struct exportinfo *exi = NULL, *pre_exi = NULL; 2937 nfsstat4 stat; 2938 fid_t fid; 2939 int attrdir, dotdot, walk; 2940 bool_t is_newvp = FALSE; 2941 2942 if (cs->vp->v_flag & V_XATTRDIR) { 2943 attrdir = 1; 2944 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2945 } else { 2946 attrdir = 0; 2947 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2948 } 2949 2950 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2951 2952 /* 2953 * If dotdotting, then need to check whether it's 2954 * above the root of a filesystem, or above an 2955 * export point. 2956 */ 2957 if (dotdot) { 2958 vnode_t *zone_rootvp; 2959 2960 ASSERT(cs->exi != NULL); 2961 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp; 2962 /* 2963 * If dotdotting at the root of a filesystem, then 2964 * need to traverse back to the mounted-on filesystem 2965 * and do the dotdot lookup there. 2966 */ 2967 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) { 2968 2969 /* 2970 * If at the system root, then can 2971 * go up no further. 2972 */ 2973 if (VN_CMP(cs->vp, zone_rootvp)) 2974 return (puterrno4(ENOENT)); 2975 2976 /* 2977 * Traverse back to the mounted-on filesystem 2978 */ 2979 cs->vp = untraverse(cs->vp, zone_rootvp); 2980 2981 /* 2982 * Set the different_export flag so we remember 2983 * to pick up a new exportinfo entry for 2984 * this new filesystem. 2985 */ 2986 different_export = 1; 2987 } else { 2988 2989 /* 2990 * If dotdotting above an export point then set 2991 * the different_export to get new export info. 2992 */ 2993 different_export = nfs_exported(cs->exi, cs->vp); 2994 } 2995 } 2996 2997 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2998 NULL, NULL, NULL); 2999 if (error) 3000 return (puterrno4(error)); 3001 3002 /* 3003 * If the vnode is in a pseudo filesystem, check whether it is visible. 3004 * 3005 * XXX if the vnode is a symlink and it is not visible in 3006 * a pseudo filesystem, return ENOENT (not following symlink). 3007 * V4 client can not mount such symlink. This is a regression 3008 * from V2/V3. 3009 * 3010 * In the same exported filesystem, if the security flavor used 3011 * is not an explicitly shared flavor, limit the view to the visible 3012 * list entries only. This is not a WRONGSEC case because it's already 3013 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 3014 */ 3015 if (!different_export && 3016 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 3017 cs->access & CS_ACCESS_LIMITED)) { 3018 if (! nfs_visible(cs->exi, vp, &different_export)) { 3019 VN_RELE(vp); 3020 return (puterrno4(ENOENT)); 3021 } 3022 } 3023 3024 /* 3025 * If it's a mountpoint, then traverse it. 3026 */ 3027 if (vn_ismntpt(vp)) { 3028 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 3029 pre_tvp = vp; /* save pre-traversed vnode */ 3030 3031 /* 3032 * hold pre_tvp to counteract rele by traverse. We will 3033 * need pre_tvp below if checkexport4 fails 3034 */ 3035 VN_HOLD(pre_tvp); 3036 if ((error = traverse(&vp)) != 0) { 3037 VN_RELE(vp); 3038 VN_RELE(pre_tvp); 3039 return (puterrno4(error)); 3040 } 3041 different_export = 1; 3042 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 3043 /* 3044 * The vfsp comparison is to handle the case where 3045 * a LOFS mount is shared. lo_lookup traverses mount points, 3046 * and NFS is unaware of local fs transistions because 3047 * v_vfsmountedhere isn't set. For this special LOFS case, 3048 * the dir and the obj returned by lookup will have different 3049 * vfs ptrs. 3050 */ 3051 different_export = 1; 3052 } 3053 3054 if (different_export) { 3055 3056 bzero(&fid, sizeof (fid)); 3057 fid.fid_len = MAXFIDSZ; 3058 error = vop_fid_pseudo(vp, &fid); 3059 if (error) { 3060 VN_RELE(vp); 3061 if (pre_tvp) 3062 VN_RELE(pre_tvp); 3063 return (puterrno4(error)); 3064 } 3065 3066 if (dotdot) 3067 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 3068 else 3069 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 3070 3071 if (exi == NULL) { 3072 if (pre_tvp) { 3073 /* 3074 * If this vnode is a mounted-on vnode, 3075 * but the mounted-on file system is not 3076 * exported, send back the filehandle for 3077 * the mounted-on vnode, not the root of 3078 * the mounted-on file system. 3079 */ 3080 VN_RELE(vp); 3081 vp = pre_tvp; 3082 exi = pre_exi; 3083 } else { 3084 VN_RELE(vp); 3085 return (puterrno4(EACCES)); 3086 } 3087 } else if (pre_tvp) { 3088 /* we're done with pre_tvp now. release extra hold */ 3089 VN_RELE(pre_tvp); 3090 } 3091 3092 cs->exi = exi; 3093 3094 /* 3095 * Now we do a checkauth4. The reason is that 3096 * this client/user may not have access to the new 3097 * exported file system, and if they do, 3098 * the client/user may be mapped to a different uid. 3099 * 3100 * We start with a new cr, because the checkauth4 done 3101 * in the PUT*FH operation over wrote the cred's uid, 3102 * gid, etc, and we want the real thing before calling 3103 * checkauth4() 3104 */ 3105 crfree(cs->cr); 3106 cs->cr = crdup(cs->basecr); 3107 3108 oldvp = cs->vp; 3109 cs->vp = vp; 3110 is_newvp = TRUE; 3111 3112 stat = call_checkauth4(cs, req); 3113 if (stat != NFS4_OK) { 3114 VN_RELE(cs->vp); 3115 cs->vp = oldvp; 3116 return (stat); 3117 } 3118 } 3119 3120 /* 3121 * After various NFS checks, do a label check on the path 3122 * component. The label on this path should either be the 3123 * global zone's label or a zone's label. We are only 3124 * interested in the zone's label because exported files 3125 * in global zone is accessible (though read-only) to 3126 * clients. The exportability/visibility check is already 3127 * done before reaching this code. 3128 */ 3129 if (is_system_labeled()) { 3130 bslabel_t *clabel; 3131 3132 ASSERT(req->rq_label != NULL); 3133 clabel = req->rq_label; 3134 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 3135 "got client label from request(1)", struct svc_req *, req); 3136 3137 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3138 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3139 cs->exi)) { 3140 error = EACCES; 3141 goto err_out; 3142 } 3143 } else { 3144 /* 3145 * We grant access to admin_low label clients 3146 * only if the client is trusted, i.e. also 3147 * running Solaris Trusted Extension. 3148 */ 3149 struct sockaddr *ca; 3150 int addr_type; 3151 void *ipaddr; 3152 tsol_tpc_t *tp; 3153 3154 ca = (struct sockaddr *)svc_getrpccaller( 3155 req->rq_xprt)->buf; 3156 if (ca->sa_family == AF_INET) { 3157 addr_type = IPV4_VERSION; 3158 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 3159 } else if (ca->sa_family == AF_INET6) { 3160 addr_type = IPV6_VERSION; 3161 ipaddr = &((struct sockaddr_in6 *) 3162 ca)->sin6_addr; 3163 } 3164 tp = find_tpc(ipaddr, addr_type, B_FALSE); 3165 if (tp == NULL || tp->tpc_tp.tp_doi != 3166 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 3167 SUN_CIPSO) { 3168 if (tp != NULL) 3169 TPC_RELE(tp); 3170 error = EACCES; 3171 goto err_out; 3172 } 3173 TPC_RELE(tp); 3174 } 3175 } 3176 3177 error = makefh4(&cs->fh, vp, cs->exi); 3178 3179 err_out: 3180 if (error) { 3181 if (is_newvp) { 3182 VN_RELE(cs->vp); 3183 cs->vp = oldvp; 3184 } else 3185 VN_RELE(vp); 3186 return (puterrno4(error)); 3187 } 3188 3189 if (!is_newvp) { 3190 if (cs->vp) 3191 VN_RELE(cs->vp); 3192 cs->vp = vp; 3193 } else if (oldvp) 3194 VN_RELE(oldvp); 3195 3196 /* 3197 * if did lookup on attrdir and didn't lookup .., set named 3198 * attr fh flag 3199 */ 3200 if (attrdir && ! dotdot) 3201 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 3202 3203 /* Assume false for now, open proc will set this */ 3204 cs->mandlock = FALSE; 3205 3206 return (NFS4_OK); 3207 } 3208 3209 /* ARGSUSED */ 3210 static void 3211 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3212 struct compound_state *cs) 3213 { 3214 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 3215 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 3216 char *nm; 3217 uint_t len; 3218 struct sockaddr *ca; 3219 char *name = NULL; 3220 nfsstat4 status; 3221 3222 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 3223 LOOKUP4args *, args); 3224 3225 if (cs->vp == NULL) { 3226 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3227 goto out; 3228 } 3229 3230 if (cs->vp->v_type == VLNK) { 3231 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 3232 goto out; 3233 } 3234 3235 if (cs->vp->v_type != VDIR) { 3236 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3237 goto out; 3238 } 3239 3240 status = utf8_dir_verify(&args->objname); 3241 if (status != NFS4_OK) { 3242 *cs->statusp = resp->status = status; 3243 goto out; 3244 } 3245 3246 nm = utf8_to_str(&args->objname, &len, NULL); 3247 if (nm == NULL) { 3248 *cs->statusp = resp->status = NFS4ERR_INVAL; 3249 goto out; 3250 } 3251 3252 if (len > MAXNAMELEN) { 3253 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3254 kmem_free(nm, len); 3255 goto out; 3256 } 3257 3258 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3259 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 3260 MAXPATHLEN + 1); 3261 3262 if (name == NULL) { 3263 *cs->statusp = resp->status = NFS4ERR_INVAL; 3264 kmem_free(nm, len); 3265 goto out; 3266 } 3267 3268 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs); 3269 3270 if (name != nm) 3271 kmem_free(name, MAXPATHLEN + 1); 3272 kmem_free(nm, len); 3273 3274 out: 3275 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 3276 LOOKUP4res *, resp); 3277 } 3278 3279 /* ARGSUSED */ 3280 static void 3281 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3282 struct compound_state *cs) 3283 { 3284 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 3285 3286 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 3287 3288 if (cs->vp == NULL) { 3289 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3290 goto out; 3291 } 3292 3293 if (cs->vp->v_type == VLNK) { 3294 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 3295 goto out; 3296 } 3297 3298 if (cs->vp->v_type != VDIR) { 3299 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3300 goto out; 3301 } 3302 3303 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs); 3304 3305 /* 3306 * From NFSV4 Specification, LOOKUPP should not check for 3307 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 3308 */ 3309 if (resp->status == NFS4ERR_WRONGSEC) { 3310 *cs->statusp = resp->status = NFS4_OK; 3311 } 3312 3313 out: 3314 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 3315 LOOKUPP4res *, resp); 3316 } 3317 3318 3319 /*ARGSUSED2*/ 3320 static void 3321 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3322 struct compound_state *cs) 3323 { 3324 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 3325 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 3326 vnode_t *avp = NULL; 3327 int lookup_flags = LOOKUP_XATTR, error; 3328 int exp_ro = 0; 3329 3330 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 3331 OPENATTR4args *, args); 3332 3333 if (cs->vp == NULL) { 3334 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3335 goto out; 3336 } 3337 3338 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 && 3339 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) { 3340 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3341 goto out; 3342 } 3343 3344 /* 3345 * If file system supports passing ACE mask to VOP_ACCESS then 3346 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks 3347 */ 3348 3349 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS)) 3350 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS, 3351 V_ACE_MASK, cs->cr, NULL); 3352 else 3353 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) && 3354 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) && 3355 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0)); 3356 3357 if (error) { 3358 *cs->statusp = resp->status = puterrno4(EACCES); 3359 goto out; 3360 } 3361 3362 /* 3363 * The CREATE_XATTR_DIR VOP flag cannot be specified if 3364 * the file system is exported read-only -- regardless of 3365 * createdir flag. Otherwise the attrdir would be created 3366 * (assuming server fs isn't mounted readonly locally). If 3367 * VOP_LOOKUP returns ENOENT in this case, the error will 3368 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 3369 * because specfs has no VOP_LOOKUP op, so the macro would 3370 * return ENOSYS. EINVAL is returned by all (current) 3371 * Solaris file system implementations when any of their 3372 * restrictions are violated (xattr(dir) can't have xattrdir). 3373 * Returning NOTSUPP is more appropriate in this case 3374 * because the object will never be able to have an attrdir. 3375 */ 3376 if (args->createdir && ! (exp_ro = rdonly4(req, cs))) 3377 lookup_flags |= CREATE_XATTR_DIR; 3378 3379 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr, 3380 NULL, NULL, NULL); 3381 3382 if (error) { 3383 if (error == ENOENT && args->createdir && exp_ro) 3384 *cs->statusp = resp->status = puterrno4(EROFS); 3385 else if (error == EINVAL || error == ENOSYS) 3386 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3387 else 3388 *cs->statusp = resp->status = puterrno4(error); 3389 goto out; 3390 } 3391 3392 ASSERT(avp->v_flag & V_XATTRDIR); 3393 3394 error = makefh4(&cs->fh, avp, cs->exi); 3395 3396 if (error) { 3397 VN_RELE(avp); 3398 *cs->statusp = resp->status = puterrno4(error); 3399 goto out; 3400 } 3401 3402 VN_RELE(cs->vp); 3403 cs->vp = avp; 3404 3405 /* 3406 * There is no requirement for an attrdir fh flag 3407 * because the attrdir has a vnode flag to distinguish 3408 * it from regular (non-xattr) directories. The 3409 * FH4_ATTRDIR flag is set for future sanity checks. 3410 */ 3411 set_fh4_flag(&cs->fh, FH4_ATTRDIR); 3412 *cs->statusp = resp->status = NFS4_OK; 3413 3414 out: 3415 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 3416 OPENATTR4res *, resp); 3417 } 3418 3419 static int 3420 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 3421 caller_context_t *ct) 3422 { 3423 int error; 3424 int i; 3425 clock_t delaytime; 3426 3427 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 3428 3429 /* 3430 * Don't block on mandatory locks. If this routine returns 3431 * EAGAIN, the caller should return NFS4ERR_LOCKED. 3432 */ 3433 uio->uio_fmode = FNONBLOCK; 3434 3435 for (i = 0; i < rfs4_maxlock_tries; i++) { 3436 3437 3438 if (direction == FREAD) { 3439 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 3440 error = VOP_READ(vp, uio, ioflag, cred, ct); 3441 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 3442 } else { 3443 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 3444 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 3445 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 3446 } 3447 3448 if (error != EAGAIN) 3449 break; 3450 3451 if (i < rfs4_maxlock_tries - 1) { 3452 delay(delaytime); 3453 delaytime *= 2; 3454 } 3455 } 3456 3457 return (error); 3458 } 3459 3460 /* ARGSUSED */ 3461 static void 3462 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3463 struct compound_state *cs) 3464 { 3465 READ4args *args = &argop->nfs_argop4_u.opread; 3466 READ4res *resp = &resop->nfs_resop4_u.opread; 3467 int error; 3468 int verror; 3469 vnode_t *vp; 3470 struct vattr va; 3471 struct iovec iov, *iovp = NULL; 3472 int iovcnt; 3473 struct uio uio; 3474 u_offset_t offset; 3475 bool_t *deleg = &cs->deleg; 3476 nfsstat4 stat; 3477 int in_crit = 0; 3478 mblk_t *mp = NULL; 3479 int alloc_err = 0; 3480 int rdma_used = 0; 3481 int loaned_buffers; 3482 caller_context_t ct; 3483 struct uio *uiop; 3484 3485 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 3486 READ4args, args); 3487 3488 vp = cs->vp; 3489 if (vp == NULL) { 3490 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3491 goto out; 3492 } 3493 if (cs->access == CS_ACCESS_DENIED) { 3494 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3495 goto out; 3496 } 3497 3498 get_stateid4(cs, &args->stateid); 3499 3500 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE, 3501 deleg, TRUE, &ct, cs)) != NFS4_OK) { 3502 *cs->statusp = resp->status = stat; 3503 goto out; 3504 } 3505 3506 /* 3507 * Enter the critical region before calling VOP_RWLOCK 3508 * to avoid a deadlock with write requests. 3509 */ 3510 if (nbl_need_check(vp)) { 3511 nbl_start_crit(vp, RW_READER); 3512 in_crit = 1; 3513 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0, 3514 &ct)) { 3515 *cs->statusp = resp->status = NFS4ERR_LOCKED; 3516 goto out; 3517 } 3518 } 3519 3520 if (args->wlist) { 3521 if (args->count > clist_len(args->wlist)) { 3522 *cs->statusp = resp->status = NFS4ERR_INVAL; 3523 goto out; 3524 } 3525 rdma_used = 1; 3526 } 3527 3528 /* use loaned buffers for TCP */ 3529 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0; 3530 3531 va.va_mask = AT_MODE|AT_SIZE|AT_UID; 3532 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3533 3534 /* 3535 * If we can't get the attributes, then we can't do the 3536 * right access checking. So, we'll fail the request. 3537 */ 3538 if (verror) { 3539 *cs->statusp = resp->status = puterrno4(verror); 3540 goto out; 3541 } 3542 3543 if (vp->v_type != VREG) { 3544 *cs->statusp = resp->status = 3545 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 3546 goto out; 3547 } 3548 3549 if (crgetuid(cs->cr) != va.va_uid && 3550 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) && 3551 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) { 3552 *cs->statusp = resp->status = puterrno4(error); 3553 goto out; 3554 } 3555 3556 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */ 3557 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3558 goto out; 3559 } 3560 3561 offset = args->offset; 3562 if (offset >= va.va_size) { 3563 *cs->statusp = resp->status = NFS4_OK; 3564 resp->eof = TRUE; 3565 resp->data_len = 0; 3566 resp->data_val = NULL; 3567 resp->mblk = NULL; 3568 /* RDMA */ 3569 resp->wlist = args->wlist; 3570 resp->wlist_len = resp->data_len; 3571 *cs->statusp = resp->status = NFS4_OK; 3572 if (resp->wlist) 3573 clist_zero_len(resp->wlist); 3574 goto out; 3575 } 3576 3577 if (args->count == 0) { 3578 *cs->statusp = resp->status = NFS4_OK; 3579 resp->eof = FALSE; 3580 resp->data_len = 0; 3581 resp->data_val = NULL; 3582 resp->mblk = NULL; 3583 /* RDMA */ 3584 resp->wlist = args->wlist; 3585 resp->wlist_len = resp->data_len; 3586 if (resp->wlist) 3587 clist_zero_len(resp->wlist); 3588 goto out; 3589 } 3590 3591 /* 3592 * Do not allocate memory more than maximum allowed 3593 * transfer size 3594 */ 3595 if (args->count > rfs4_tsize(req)) 3596 args->count = rfs4_tsize(req); 3597 3598 if (loaned_buffers) { 3599 uiop = (uio_t *)rfs_setup_xuio(vp); 3600 ASSERT(uiop != NULL); 3601 uiop->uio_segflg = UIO_SYSSPACE; 3602 uiop->uio_loffset = args->offset; 3603 uiop->uio_resid = args->count; 3604 3605 /* Jump to do the read if successful */ 3606 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) { 3607 /* 3608 * Need to hold the vnode until after VOP_RETZCBUF() 3609 * is called. 3610 */ 3611 VN_HOLD(vp); 3612 goto doio_read; 3613 } 3614 3615 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int, 3616 uiop->uio_loffset, int, uiop->uio_resid); 3617 3618 uiop->uio_extflg = 0; 3619 3620 /* failure to setup for zero copy */ 3621 rfs_free_xuio((void *)uiop); 3622 loaned_buffers = 0; 3623 } 3624 3625 /* 3626 * If returning data via RDMA Write, then grab the chunk list. If we 3627 * aren't returning READ data w/RDMA_WRITE, then grab a mblk. 3628 */ 3629 if (rdma_used) { 3630 mp = NULL; 3631 (void) rdma_get_wchunk(req, &iov, args->wlist); 3632 uio.uio_iov = &iov; 3633 uio.uio_iovcnt = 1; 3634 } else { 3635 /* 3636 * mp will contain the data to be sent out in the read reply. 3637 * It will be freed after the reply has been sent. 3638 */ 3639 mp = rfs_read_alloc(args->count, &iovp, &iovcnt); 3640 ASSERT(mp != NULL); 3641 ASSERT(alloc_err == 0); 3642 uio.uio_iov = iovp; 3643 uio.uio_iovcnt = iovcnt; 3644 } 3645 3646 uio.uio_segflg = UIO_SYSSPACE; 3647 uio.uio_extflg = UIO_COPY_CACHED; 3648 uio.uio_loffset = args->offset; 3649 uio.uio_resid = args->count; 3650 uiop = &uio; 3651 3652 doio_read: 3653 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct); 3654 3655 va.va_mask = AT_SIZE; 3656 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3657 3658 if (error) { 3659 if (mp) 3660 freemsg(mp); 3661 *cs->statusp = resp->status = puterrno4(error); 3662 goto out; 3663 } 3664 3665 /* make mblk using zc buffers */ 3666 if (loaned_buffers) { 3667 mp = uio_to_mblk(uiop); 3668 ASSERT(mp != NULL); 3669 } 3670 3671 *cs->statusp = resp->status = NFS4_OK; 3672 3673 ASSERT(uiop->uio_resid >= 0); 3674 resp->data_len = args->count - uiop->uio_resid; 3675 if (mp) { 3676 resp->data_val = (char *)mp->b_datap->db_base; 3677 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers); 3678 } else { 3679 resp->data_val = (caddr_t)iov.iov_base; 3680 } 3681 3682 resp->mblk = mp; 3683 3684 if (!verror && offset + resp->data_len == va.va_size) 3685 resp->eof = TRUE; 3686 else 3687 resp->eof = FALSE; 3688 3689 if (rdma_used) { 3690 if (!rdma_setup_read_data4(args, resp)) { 3691 *cs->statusp = resp->status = NFS4ERR_INVAL; 3692 } 3693 } else { 3694 resp->wlist = NULL; 3695 } 3696 3697 out: 3698 if (in_crit) 3699 nbl_end_crit(vp); 3700 3701 if (iovp != NULL) 3702 kmem_free(iovp, iovcnt * sizeof (struct iovec)); 3703 3704 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 3705 READ4res *, resp); 3706 } 3707 3708 static void 3709 rfs4_op_read_free(nfs_resop4 *resop) 3710 { 3711 READ4res *resp = &resop->nfs_resop4_u.opread; 3712 3713 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3714 freemsg(resp->mblk); 3715 resp->mblk = NULL; 3716 resp->data_val = NULL; 3717 resp->data_len = 0; 3718 } 3719 } 3720 3721 static void 3722 rfs4_op_readdir_free(nfs_resop4 * resop) 3723 { 3724 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir; 3725 3726 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3727 freeb(resp->mblk); 3728 resp->mblk = NULL; 3729 resp->data_len = 0; 3730 } 3731 } 3732 3733 3734 /* ARGSUSED */ 3735 static void 3736 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3737 struct compound_state *cs) 3738 { 3739 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 3740 int error; 3741 vnode_t *vp; 3742 struct exportinfo *exi, *sav_exi; 3743 nfs_fh4_fmt_t *fh_fmtp; 3744 nfs_export_t *ne = nfs_get_export(); 3745 3746 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 3747 3748 if (cs->vp) { 3749 VN_RELE(cs->vp); 3750 cs->vp = NULL; 3751 } 3752 3753 if (cs->cr) 3754 crfree(cs->cr); 3755 3756 cs->cr = crdup(cs->basecr); 3757 3758 vp = ne->exi_public->exi_vp; 3759 if (vp == NULL) { 3760 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3761 goto out; 3762 } 3763 3764 error = makefh4(&cs->fh, vp, ne->exi_public); 3765 if (error != 0) { 3766 *cs->statusp = resp->status = puterrno4(error); 3767 goto out; 3768 } 3769 sav_exi = cs->exi; 3770 if (ne->exi_public == ne->exi_root) { 3771 /* 3772 * No filesystem is actually shared public, so we default 3773 * to exi_root. In this case, we must check whether root 3774 * is exported. 3775 */ 3776 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val; 3777 3778 /* 3779 * if root filesystem is exported, the exportinfo struct that we 3780 * should use is what checkexport4 returns, because root_exi is 3781 * actually a mostly empty struct. 3782 */ 3783 exi = checkexport4(&fh_fmtp->fh4_fsid, 3784 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 3785 cs->exi = ((exi != NULL) ? exi : ne->exi_public); 3786 } else { 3787 /* 3788 * it's a properly shared filesystem 3789 */ 3790 cs->exi = ne->exi_public; 3791 } 3792 3793 if (is_system_labeled()) { 3794 bslabel_t *clabel; 3795 3796 ASSERT(req->rq_label != NULL); 3797 clabel = req->rq_label; 3798 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *, 3799 "got client label from request(1)", 3800 struct svc_req *, req); 3801 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3802 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3803 cs->exi)) { 3804 *cs->statusp = resp->status = 3805 NFS4ERR_SERVERFAULT; 3806 goto out; 3807 } 3808 } 3809 } 3810 3811 VN_HOLD(vp); 3812 cs->vp = vp; 3813 3814 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3815 VN_RELE(cs->vp); 3816 cs->vp = NULL; 3817 cs->exi = sav_exi; 3818 goto out; 3819 } 3820 3821 *cs->statusp = resp->status = NFS4_OK; 3822 out: 3823 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 3824 PUTPUBFH4res *, resp); 3825 } 3826 3827 /* 3828 * XXX - issue with put*fh operations. Suppose /export/home is exported. 3829 * Suppose an NFS client goes to mount /export/home/joe. If /export, home, 3830 * or joe have restrictive search permissions, then we shouldn't let 3831 * the client get a file handle. This is easy to enforce. However, we 3832 * don't know what security flavor should be used until we resolve the 3833 * path name. Another complication is uid mapping. If root is 3834 * the user, then it will be mapped to the anonymous user by default, 3835 * but we won't know that till we've resolved the path name. And we won't 3836 * know what the anonymous user is. 3837 * Luckily, SECINFO is specified to take a full filename. 3838 * So what we will have to in rfs4_op_lookup is check that flavor of 3839 * the target object matches that of the request, and if root was the 3840 * caller, check for the root= and anon= options, and if necessary, 3841 * repeat the lookup using the right cred_t. But that's not done yet. 3842 */ 3843 /* ARGSUSED */ 3844 static void 3845 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3846 struct compound_state *cs) 3847 { 3848 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 3849 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 3850 nfs_fh4_fmt_t *fh_fmtp; 3851 3852 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 3853 PUTFH4args *, args); 3854 3855 if (cs->vp) { 3856 VN_RELE(cs->vp); 3857 cs->vp = NULL; 3858 } 3859 3860 if (cs->cr) { 3861 crfree(cs->cr); 3862 cs->cr = NULL; 3863 } 3864 3865 3866 if (args->object.nfs_fh4_len < NFS_FH4_LEN) { 3867 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 3868 goto out; 3869 } 3870 3871 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val; 3872 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, 3873 NULL); 3874 3875 if (cs->exi == NULL) { 3876 *cs->statusp = resp->status = NFS4ERR_STALE; 3877 goto out; 3878 } 3879 3880 cs->cr = crdup(cs->basecr); 3881 3882 ASSERT(cs->cr != NULL); 3883 3884 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) { 3885 *cs->statusp = resp->status; 3886 goto out; 3887 } 3888 3889 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3890 VN_RELE(cs->vp); 3891 cs->vp = NULL; 3892 goto out; 3893 } 3894 3895 nfs_fh4_copy(&args->object, &cs->fh); 3896 *cs->statusp = resp->status = NFS4_OK; 3897 cs->deleg = FALSE; 3898 3899 out: 3900 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 3901 PUTFH4res *, resp); 3902 } 3903 3904 /* ARGSUSED */ 3905 static void 3906 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3907 struct compound_state *cs) 3908 { 3909 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 3910 int error; 3911 fid_t fid; 3912 struct exportinfo *exi, *sav_exi; 3913 3914 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 3915 3916 if (cs->vp) { 3917 VN_RELE(cs->vp); 3918 cs->vp = NULL; 3919 } 3920 3921 if (cs->cr) 3922 crfree(cs->cr); 3923 3924 cs->cr = crdup(cs->basecr); 3925 3926 /* 3927 * Using rootdir, the system root vnode, 3928 * get its fid. 3929 */ 3930 bzero(&fid, sizeof (fid)); 3931 fid.fid_len = MAXFIDSZ; 3932 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid); 3933 if (error != 0) { 3934 *cs->statusp = resp->status = puterrno4(error); 3935 goto out; 3936 } 3937 3938 /* 3939 * Then use the root fsid & fid it to find out if it's exported 3940 * 3941 * If the server root isn't exported directly, then 3942 * it should at least be a pseudo export based on 3943 * one or more exports further down in the server's 3944 * file tree. 3945 */ 3946 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL); 3947 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 3948 NFS4_DEBUG(rfs4_debug, 3949 (CE_WARN, "rfs4_op_putrootfh: export check failure")); 3950 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3951 goto out; 3952 } 3953 3954 /* 3955 * Now make a filehandle based on the root 3956 * export and root vnode. 3957 */ 3958 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi); 3959 if (error != 0) { 3960 *cs->statusp = resp->status = puterrno4(error); 3961 goto out; 3962 } 3963 3964 sav_exi = cs->exi; 3965 cs->exi = exi; 3966 3967 VN_HOLD(ZONE_ROOTVP()); 3968 cs->vp = ZONE_ROOTVP(); 3969 3970 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3971 VN_RELE(cs->vp); 3972 cs->vp = NULL; 3973 cs->exi = sav_exi; 3974 goto out; 3975 } 3976 3977 *cs->statusp = resp->status = NFS4_OK; 3978 cs->deleg = FALSE; 3979 out: 3980 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 3981 PUTROOTFH4res *, resp); 3982 } 3983 3984 /* 3985 * readlink: args: CURRENT_FH. 3986 * res: status. If success - CURRENT_FH unchanged, return linktext. 3987 */ 3988 3989 /* ARGSUSED */ 3990 static void 3991 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3992 struct compound_state *cs) 3993 { 3994 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3995 int error; 3996 vnode_t *vp; 3997 struct iovec iov; 3998 struct vattr va; 3999 struct uio uio; 4000 char *data; 4001 struct sockaddr *ca; 4002 char *name = NULL; 4003 int is_referral; 4004 4005 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 4006 4007 /* CURRENT_FH: directory */ 4008 vp = cs->vp; 4009 if (vp == NULL) { 4010 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4011 goto out; 4012 } 4013 4014 if (cs->access == CS_ACCESS_DENIED) { 4015 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4016 goto out; 4017 } 4018 4019 /* Is it a referral? */ 4020 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) { 4021 4022 is_referral = 1; 4023 4024 } else { 4025 4026 is_referral = 0; 4027 4028 if (vp->v_type == VDIR) { 4029 *cs->statusp = resp->status = NFS4ERR_ISDIR; 4030 goto out; 4031 } 4032 4033 if (vp->v_type != VLNK) { 4034 *cs->statusp = resp->status = NFS4ERR_INVAL; 4035 goto out; 4036 } 4037 4038 } 4039 4040 va.va_mask = AT_MODE; 4041 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 4042 if (error) { 4043 *cs->statusp = resp->status = puterrno4(error); 4044 goto out; 4045 } 4046 4047 if (MANDLOCK(vp, va.va_mode)) { 4048 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4049 goto out; 4050 } 4051 4052 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 4053 4054 if (is_referral) { 4055 char *s; 4056 size_t strsz; 4057 kstat_named_t *stat = 4058 cs->exi->exi_ne->ne_globals->svstat[NFS_V4]; 4059 4060 /* Get an artificial symlink based on a referral */ 4061 s = build_symlink(vp, cs->cr, &strsz); 4062 stat[NFS_REFERLINKS].value.ui64++; 4063 DTRACE_PROBE2(nfs4serv__func__referral__reflink, 4064 vnode_t *, vp, char *, s); 4065 if (s == NULL) 4066 error = EINVAL; 4067 else { 4068 error = 0; 4069 (void) strlcpy(data, s, MAXPATHLEN + 1); 4070 kmem_free(s, strsz); 4071 } 4072 4073 } else { 4074 4075 iov.iov_base = data; 4076 iov.iov_len = MAXPATHLEN; 4077 uio.uio_iov = &iov; 4078 uio.uio_iovcnt = 1; 4079 uio.uio_segflg = UIO_SYSSPACE; 4080 uio.uio_extflg = UIO_COPY_CACHED; 4081 uio.uio_loffset = 0; 4082 uio.uio_resid = MAXPATHLEN; 4083 4084 error = VOP_READLINK(vp, &uio, cs->cr, NULL); 4085 4086 if (!error) 4087 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 4088 } 4089 4090 if (error) { 4091 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4092 *cs->statusp = resp->status = puterrno4(error); 4093 goto out; 4094 } 4095 4096 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4097 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND, 4098 MAXPATHLEN + 1); 4099 4100 if (name == NULL) { 4101 /* 4102 * Even though the conversion failed, we return 4103 * something. We just don't translate it. 4104 */ 4105 name = data; 4106 } 4107 4108 /* 4109 * treat link name as data 4110 */ 4111 (void) str_to_utf8(name, (utf8string *)&resp->link); 4112 4113 if (name != data) 4114 kmem_free(name, MAXPATHLEN + 1); 4115 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4116 *cs->statusp = resp->status = NFS4_OK; 4117 4118 out: 4119 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 4120 READLINK4res *, resp); 4121 } 4122 4123 static void 4124 rfs4_op_readlink_free(nfs_resop4 *resop) 4125 { 4126 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 4127 utf8string *symlink = (utf8string *)&resp->link; 4128 4129 if (symlink->utf8string_val) { 4130 UTF8STRING_FREE(*symlink) 4131 } 4132 } 4133 4134 /* 4135 * release_lockowner: 4136 * Release any state associated with the supplied 4137 * lockowner. Note if any lo_state is holding locks we will not 4138 * rele that lo_state and thus the lockowner will not be destroyed. 4139 * A client using lock after the lock owner stateid has been released 4140 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have 4141 * to reissue the lock with new_lock_owner set to TRUE. 4142 * args: lock_owner 4143 * res: status 4144 */ 4145 /* ARGSUSED */ 4146 static void 4147 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop, 4148 struct svc_req *req, struct compound_state *cs) 4149 { 4150 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner; 4151 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner; 4152 rfs4_lockowner_t *lo; 4153 rfs4_openowner_t *oo; 4154 rfs4_state_t *sp; 4155 rfs4_lo_state_t *lsp; 4156 rfs4_client_t *cp; 4157 bool_t create = FALSE; 4158 locklist_t *llist; 4159 sysid_t sysid; 4160 4161 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *, 4162 cs, RELEASE_LOCKOWNER4args *, ap); 4163 4164 /* Make sure there is a clientid around for this request */ 4165 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE); 4166 4167 if (cp == NULL) { 4168 *cs->statusp = resp->status = 4169 rfs4_check_clientid(&ap->lock_owner.clientid, 0); 4170 goto out; 4171 } 4172 rfs4_client_rele(cp); 4173 4174 lo = rfs4_findlockowner(&ap->lock_owner, &create); 4175 if (lo == NULL) { 4176 *cs->statusp = resp->status = NFS4_OK; 4177 goto out; 4178 } 4179 ASSERT(lo->rl_client != NULL); 4180 4181 /* 4182 * Check for EXPIRED client. If so will reap state with in a lease 4183 * period or on next set_clientid_confirm step 4184 */ 4185 if (rfs4_lease_expired(lo->rl_client)) { 4186 rfs4_lockowner_rele(lo); 4187 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 4188 goto out; 4189 } 4190 4191 /* 4192 * If no sysid has been assigned, then no locks exist; just return. 4193 */ 4194 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4195 if (lo->rl_client->rc_sysidt == LM_NOSYSID) { 4196 rfs4_lockowner_rele(lo); 4197 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4198 goto out; 4199 } 4200 4201 sysid = lo->rl_client->rc_sysidt; 4202 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4203 4204 /* 4205 * Mark the lockowner invalid. 4206 */ 4207 rfs4_dbe_hide(lo->rl_dbe); 4208 4209 /* 4210 * sysid-pid pair should now not be used since the lockowner is 4211 * invalid. If the client were to instantiate the lockowner again 4212 * it would be assigned a new pid. Thus we can get the list of 4213 * current locks. 4214 */ 4215 4216 llist = flk_get_active_locks(sysid, lo->rl_pid); 4217 /* If we are still holding locks fail */ 4218 if (llist != NULL) { 4219 4220 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD; 4221 4222 flk_free_locklist(llist); 4223 /* 4224 * We need to unhide the lockowner so the client can 4225 * try it again. The bad thing here is if the client 4226 * has a logic error that took it here in the first place 4227 * they probably have lost accounting of the locks that it 4228 * is holding. So we may have dangling state until the 4229 * open owner state is reaped via close. One scenario 4230 * that could possibly occur is that the client has 4231 * sent the unlock request(s) in separate threads 4232 * and has not waited for the replies before sending the 4233 * RELEASE_LOCKOWNER request. Presumably, it would expect 4234 * and deal appropriately with NFS4ERR_LOCKS_HELD, by 4235 * reissuing the request. 4236 */ 4237 rfs4_dbe_unhide(lo->rl_dbe); 4238 rfs4_lockowner_rele(lo); 4239 goto out; 4240 } 4241 4242 /* 4243 * For the corresponding client we need to check each open 4244 * owner for any opens that have lockowner state associated 4245 * with this lockowner. 4246 */ 4247 4248 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4249 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL; 4250 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) { 4251 4252 rfs4_dbe_lock(oo->ro_dbe); 4253 for (sp = list_head(&oo->ro_statelist); sp != NULL; 4254 sp = list_next(&oo->ro_statelist, sp)) { 4255 4256 rfs4_dbe_lock(sp->rs_dbe); 4257 for (lsp = list_head(&sp->rs_lostatelist); 4258 lsp != NULL; 4259 lsp = list_next(&sp->rs_lostatelist, lsp)) { 4260 if (lsp->rls_locker == lo) { 4261 rfs4_dbe_lock(lsp->rls_dbe); 4262 rfs4_dbe_invalidate(lsp->rls_dbe); 4263 rfs4_dbe_unlock(lsp->rls_dbe); 4264 } 4265 } 4266 rfs4_dbe_unlock(sp->rs_dbe); 4267 } 4268 rfs4_dbe_unlock(oo->ro_dbe); 4269 } 4270 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4271 4272 rfs4_lockowner_rele(lo); 4273 4274 *cs->statusp = resp->status = NFS4_OK; 4275 4276 out: 4277 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *, 4278 cs, RELEASE_LOCKOWNER4res *, resp); 4279 } 4280 4281 /* 4282 * short utility function to lookup a file and recall the delegation 4283 */ 4284 static rfs4_file_t * 4285 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 4286 int *lkup_error, cred_t *cr) 4287 { 4288 vnode_t *vp; 4289 rfs4_file_t *fp = NULL; 4290 bool_t fcreate = FALSE; 4291 int error; 4292 4293 if (vpp) 4294 *vpp = NULL; 4295 4296 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL, 4297 NULL)) == 0) { 4298 if (vp->v_type == VREG) 4299 fp = rfs4_findfile(vp, NULL, &fcreate); 4300 if (vpp) 4301 *vpp = vp; 4302 else 4303 VN_RELE(vp); 4304 } 4305 4306 if (lkup_error) 4307 *lkup_error = error; 4308 4309 return (fp); 4310 } 4311 4312 /* 4313 * remove: args: CURRENT_FH: directory; name. 4314 * res: status. If success - CURRENT_FH unchanged, return change_info 4315 * for directory. 4316 */ 4317 /* ARGSUSED */ 4318 static void 4319 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4320 struct compound_state *cs) 4321 { 4322 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 4323 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 4324 int error; 4325 vnode_t *dvp, *vp; 4326 struct vattr bdva, idva, adva; 4327 char *nm; 4328 uint_t len; 4329 rfs4_file_t *fp; 4330 int in_crit = 0; 4331 bslabel_t *clabel; 4332 struct sockaddr *ca; 4333 char *name = NULL; 4334 nfsstat4 status; 4335 4336 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 4337 REMOVE4args *, args); 4338 4339 /* CURRENT_FH: directory */ 4340 dvp = cs->vp; 4341 if (dvp == NULL) { 4342 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4343 goto out; 4344 } 4345 4346 if (cs->access == CS_ACCESS_DENIED) { 4347 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4348 goto out; 4349 } 4350 4351 /* 4352 * If there is an unshared filesystem mounted on this vnode, 4353 * Do not allow to remove anything in this directory. 4354 */ 4355 if (vn_ismntpt(dvp)) { 4356 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4357 goto out; 4358 } 4359 4360 if (dvp->v_type != VDIR) { 4361 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4362 goto out; 4363 } 4364 4365 status = utf8_dir_verify(&args->target); 4366 if (status != NFS4_OK) { 4367 *cs->statusp = resp->status = status; 4368 goto out; 4369 } 4370 4371 /* 4372 * Lookup the file so that we can check if it's a directory 4373 */ 4374 nm = utf8_to_fn(&args->target, &len, NULL); 4375 if (nm == NULL) { 4376 *cs->statusp = resp->status = NFS4ERR_INVAL; 4377 goto out; 4378 } 4379 4380 if (len > MAXNAMELEN) { 4381 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4382 kmem_free(nm, len); 4383 goto out; 4384 } 4385 4386 if (rdonly4(req, cs)) { 4387 *cs->statusp = resp->status = NFS4ERR_ROFS; 4388 kmem_free(nm, len); 4389 goto out; 4390 } 4391 4392 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4393 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 4394 MAXPATHLEN + 1); 4395 4396 if (name == NULL) { 4397 *cs->statusp = resp->status = NFS4ERR_INVAL; 4398 kmem_free(nm, len); 4399 goto out; 4400 } 4401 4402 /* 4403 * Lookup the file to determine type and while we are see if 4404 * there is a file struct around and check for delegation. 4405 * We don't need to acquire va_seq before this lookup, if 4406 * it causes an update, cinfo.before will not match, which will 4407 * trigger a cache flush even if atomic is TRUE. 4408 */ 4409 fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr); 4410 if (fp != NULL) { 4411 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4412 NULL)) { 4413 VN_RELE(vp); 4414 rfs4_file_rele(fp); 4415 *cs->statusp = resp->status = NFS4ERR_DELAY; 4416 if (nm != name) 4417 kmem_free(name, MAXPATHLEN + 1); 4418 kmem_free(nm, len); 4419 goto out; 4420 } 4421 } 4422 4423 /* Didn't find anything to remove */ 4424 if (vp == NULL) { 4425 *cs->statusp = resp->status = error; 4426 if (nm != name) 4427 kmem_free(name, MAXPATHLEN + 1); 4428 kmem_free(nm, len); 4429 goto out; 4430 } 4431 4432 if (nbl_need_check(vp)) { 4433 nbl_start_crit(vp, RW_READER); 4434 in_crit = 1; 4435 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { 4436 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4437 if (nm != name) 4438 kmem_free(name, MAXPATHLEN + 1); 4439 kmem_free(nm, len); 4440 nbl_end_crit(vp); 4441 VN_RELE(vp); 4442 if (fp) { 4443 rfs4_clear_dont_grant(fp); 4444 rfs4_file_rele(fp); 4445 } 4446 goto out; 4447 } 4448 } 4449 4450 /* check label before allowing removal */ 4451 if (is_system_labeled()) { 4452 ASSERT(req->rq_label != NULL); 4453 clabel = req->rq_label; 4454 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 4455 "got client label from request(1)", 4456 struct svc_req *, req); 4457 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4458 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 4459 cs->exi)) { 4460 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4461 if (name != nm) 4462 kmem_free(name, MAXPATHLEN + 1); 4463 kmem_free(nm, len); 4464 if (in_crit) 4465 nbl_end_crit(vp); 4466 VN_RELE(vp); 4467 if (fp) { 4468 rfs4_clear_dont_grant(fp); 4469 rfs4_file_rele(fp); 4470 } 4471 goto out; 4472 } 4473 } 4474 } 4475 4476 /* Get dir "before" change value */ 4477 bdva.va_mask = AT_CTIME|AT_SEQ; 4478 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 4479 if (error) { 4480 *cs->statusp = resp->status = puterrno4(error); 4481 if (nm != name) 4482 kmem_free(name, MAXPATHLEN + 1); 4483 kmem_free(nm, len); 4484 if (in_crit) 4485 nbl_end_crit(vp); 4486 VN_RELE(vp); 4487 if (fp) { 4488 rfs4_clear_dont_grant(fp); 4489 rfs4_file_rele(fp); 4490 } 4491 goto out; 4492 } 4493 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 4494 4495 /* Actually do the REMOVE operation */ 4496 if (vp->v_type == VDIR) { 4497 /* 4498 * Can't remove a directory that has a mounted-on filesystem. 4499 */ 4500 if (vn_ismntpt(vp)) { 4501 error = EACCES; 4502 } else { 4503 /* 4504 * System V defines rmdir to return EEXIST, 4505 * not ENOTEMPTY, if the directory is not 4506 * empty. A System V NFS server needs to map 4507 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 4508 * transmit over the wire. 4509 */ 4510 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr, 4511 NULL, 0)) == EEXIST) 4512 error = ENOTEMPTY; 4513 } 4514 } else { 4515 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 && 4516 fp != NULL) { 4517 struct vattr va; 4518 vnode_t *tvp; 4519 4520 rfs4_dbe_lock(fp->rf_dbe); 4521 tvp = fp->rf_vp; 4522 if (tvp) 4523 VN_HOLD(tvp); 4524 rfs4_dbe_unlock(fp->rf_dbe); 4525 4526 if (tvp) { 4527 /* 4528 * This is va_seq safe because we are not 4529 * manipulating dvp. 4530 */ 4531 va.va_mask = AT_NLINK; 4532 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4533 va.va_nlink == 0) { 4534 /* Remove state on file remove */ 4535 if (in_crit) { 4536 nbl_end_crit(vp); 4537 in_crit = 0; 4538 } 4539 rfs4_close_all_state(fp); 4540 } 4541 VN_RELE(tvp); 4542 } 4543 } 4544 } 4545 4546 if (in_crit) 4547 nbl_end_crit(vp); 4548 VN_RELE(vp); 4549 4550 if (fp) { 4551 rfs4_clear_dont_grant(fp); 4552 rfs4_file_rele(fp); 4553 } 4554 if (nm != name) 4555 kmem_free(name, MAXPATHLEN + 1); 4556 kmem_free(nm, len); 4557 4558 if (error) { 4559 *cs->statusp = resp->status = puterrno4(error); 4560 goto out; 4561 } 4562 4563 /* 4564 * Get the initial "after" sequence number, if it fails, set to zero 4565 */ 4566 idva.va_mask = AT_SEQ; 4567 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 4568 idva.va_seq = 0; 4569 4570 /* 4571 * Force modified data and metadata out to stable storage. 4572 */ 4573 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 4574 4575 /* 4576 * Get "after" change value, if it fails, simply return the 4577 * before value. 4578 */ 4579 adva.va_mask = AT_CTIME|AT_SEQ; 4580 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 4581 adva.va_ctime = bdva.va_ctime; 4582 adva.va_seq = 0; 4583 } 4584 4585 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 4586 4587 /* 4588 * The cinfo.atomic = TRUE only if we have 4589 * non-zero va_seq's, and it has incremented by exactly one 4590 * during the VOP_REMOVE/RMDIR and it didn't change during 4591 * the VOP_FSYNC. 4592 */ 4593 if (bdva.va_seq && idva.va_seq && adva.va_seq && 4594 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 4595 resp->cinfo.atomic = TRUE; 4596 else 4597 resp->cinfo.atomic = FALSE; 4598 4599 *cs->statusp = resp->status = NFS4_OK; 4600 4601 out: 4602 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 4603 REMOVE4res *, resp); 4604 } 4605 4606 /* 4607 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 4608 * oldname and newname. 4609 * res: status. If success - CURRENT_FH unchanged, return change_info 4610 * for both from and target directories. 4611 */ 4612 /* ARGSUSED */ 4613 static void 4614 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4615 struct compound_state *cs) 4616 { 4617 RENAME4args *args = &argop->nfs_argop4_u.oprename; 4618 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 4619 int error; 4620 vnode_t *odvp; 4621 vnode_t *ndvp; 4622 vnode_t *srcvp, *targvp, *tvp; 4623 struct vattr obdva, oidva, oadva; 4624 struct vattr nbdva, nidva, nadva; 4625 char *onm, *nnm; 4626 uint_t olen, nlen; 4627 rfs4_file_t *fp, *sfp; 4628 int in_crit_src, in_crit_targ; 4629 int fp_rele_grant_hold, sfp_rele_grant_hold; 4630 int unlinked; 4631 bslabel_t *clabel; 4632 struct sockaddr *ca; 4633 char *converted_onm = NULL; 4634 char *converted_nnm = NULL; 4635 nfsstat4 status; 4636 4637 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 4638 RENAME4args *, args); 4639 4640 fp = sfp = NULL; 4641 srcvp = targvp = tvp = NULL; 4642 in_crit_src = in_crit_targ = 0; 4643 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 4644 unlinked = 0; 4645 4646 /* CURRENT_FH: target directory */ 4647 ndvp = cs->vp; 4648 if (ndvp == NULL) { 4649 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4650 goto out; 4651 } 4652 4653 /* SAVED_FH: from directory */ 4654 odvp = cs->saved_vp; 4655 if (odvp == NULL) { 4656 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4657 goto out; 4658 } 4659 4660 if (cs->access == CS_ACCESS_DENIED) { 4661 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4662 goto out; 4663 } 4664 4665 /* 4666 * If there is an unshared filesystem mounted on this vnode, 4667 * do not allow to rename objects in this directory. 4668 */ 4669 if (vn_ismntpt(odvp)) { 4670 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4671 goto out; 4672 } 4673 4674 /* 4675 * If there is an unshared filesystem mounted on this vnode, 4676 * do not allow to rename to this directory. 4677 */ 4678 if (vn_ismntpt(ndvp)) { 4679 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4680 goto out; 4681 } 4682 4683 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 4684 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4685 goto out; 4686 } 4687 4688 if (cs->saved_exi != cs->exi) { 4689 *cs->statusp = resp->status = NFS4ERR_XDEV; 4690 goto out; 4691 } 4692 4693 status = utf8_dir_verify(&args->oldname); 4694 if (status != NFS4_OK) { 4695 *cs->statusp = resp->status = status; 4696 goto out; 4697 } 4698 4699 status = utf8_dir_verify(&args->newname); 4700 if (status != NFS4_OK) { 4701 *cs->statusp = resp->status = status; 4702 goto out; 4703 } 4704 4705 onm = utf8_to_fn(&args->oldname, &olen, NULL); 4706 if (onm == NULL) { 4707 *cs->statusp = resp->status = NFS4ERR_INVAL; 4708 goto out; 4709 } 4710 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4711 nlen = MAXPATHLEN + 1; 4712 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND, 4713 nlen); 4714 4715 if (converted_onm == NULL) { 4716 *cs->statusp = resp->status = NFS4ERR_INVAL; 4717 kmem_free(onm, olen); 4718 goto out; 4719 } 4720 4721 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4722 if (nnm == NULL) { 4723 *cs->statusp = resp->status = NFS4ERR_INVAL; 4724 if (onm != converted_onm) 4725 kmem_free(converted_onm, MAXPATHLEN + 1); 4726 kmem_free(onm, olen); 4727 goto out; 4728 } 4729 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND, 4730 MAXPATHLEN + 1); 4731 4732 if (converted_nnm == NULL) { 4733 *cs->statusp = resp->status = NFS4ERR_INVAL; 4734 kmem_free(nnm, nlen); 4735 nnm = NULL; 4736 if (onm != converted_onm) 4737 kmem_free(converted_onm, MAXPATHLEN + 1); 4738 kmem_free(onm, olen); 4739 goto out; 4740 } 4741 4742 4743 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 4744 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4745 kmem_free(onm, olen); 4746 kmem_free(nnm, nlen); 4747 goto out; 4748 } 4749 4750 4751 if (rdonly4(req, cs)) { 4752 *cs->statusp = resp->status = NFS4ERR_ROFS; 4753 if (onm != converted_onm) 4754 kmem_free(converted_onm, MAXPATHLEN + 1); 4755 kmem_free(onm, olen); 4756 if (nnm != converted_nnm) 4757 kmem_free(converted_nnm, MAXPATHLEN + 1); 4758 kmem_free(nnm, nlen); 4759 goto out; 4760 } 4761 4762 /* check label of the target dir */ 4763 if (is_system_labeled()) { 4764 ASSERT(req->rq_label != NULL); 4765 clabel = req->rq_label; 4766 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *, 4767 "got client label from request(1)", 4768 struct svc_req *, req); 4769 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4770 if (!do_rfs_label_check(clabel, ndvp, 4771 EQUALITY_CHECK, cs->exi)) { 4772 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4773 goto err_out; 4774 } 4775 } 4776 } 4777 4778 /* 4779 * Is the source a file and have a delegation? 4780 * We don't need to acquire va_seq before these lookups, if 4781 * it causes an update, cinfo.before will not match, which will 4782 * trigger a cache flush even if atomic is TRUE. 4783 */ 4784 sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp, 4785 &error, cs->cr); 4786 if (sfp != NULL) { 4787 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE, 4788 NULL)) { 4789 *cs->statusp = resp->status = NFS4ERR_DELAY; 4790 goto err_out; 4791 } 4792 } 4793 4794 if (srcvp == NULL) { 4795 *cs->statusp = resp->status = puterrno4(error); 4796 if (onm != converted_onm) 4797 kmem_free(converted_onm, MAXPATHLEN + 1); 4798 kmem_free(onm, olen); 4799 if (nnm != converted_nnm) 4800 kmem_free(converted_nnm, MAXPATHLEN + 1); 4801 kmem_free(nnm, nlen); 4802 goto out; 4803 } 4804 4805 sfp_rele_grant_hold = 1; 4806 4807 /* Does the destination exist and a file and have a delegation? */ 4808 fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL, 4809 cs->cr); 4810 if (fp != NULL) { 4811 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4812 NULL)) { 4813 *cs->statusp = resp->status = NFS4ERR_DELAY; 4814 goto err_out; 4815 } 4816 } 4817 fp_rele_grant_hold = 1; 4818 4819 /* Check for NBMAND lock on both source and target */ 4820 if (nbl_need_check(srcvp)) { 4821 nbl_start_crit(srcvp, RW_READER); 4822 in_crit_src = 1; 4823 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 4824 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4825 goto err_out; 4826 } 4827 } 4828 4829 if (targvp && nbl_need_check(targvp)) { 4830 nbl_start_crit(targvp, RW_READER); 4831 in_crit_targ = 1; 4832 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 4833 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4834 goto err_out; 4835 } 4836 } 4837 4838 /* Get source "before" change value */ 4839 obdva.va_mask = AT_CTIME|AT_SEQ; 4840 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL); 4841 if (!error) { 4842 nbdva.va_mask = AT_CTIME|AT_SEQ; 4843 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL); 4844 } 4845 if (error) { 4846 *cs->statusp = resp->status = puterrno4(error); 4847 goto err_out; 4848 } 4849 4850 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) 4851 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) 4852 4853 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr, 4854 NULL, 0); 4855 4856 /* 4857 * If target existed and was unlinked by VOP_RENAME, state will need 4858 * closed. To avoid deadlock, rfs4_close_all_state will be done after 4859 * any necessary nbl_end_crit on srcvp and tgtvp. 4860 */ 4861 if (error == 0 && fp != NULL) { 4862 rfs4_dbe_lock(fp->rf_dbe); 4863 tvp = fp->rf_vp; 4864 if (tvp) 4865 VN_HOLD(tvp); 4866 rfs4_dbe_unlock(fp->rf_dbe); 4867 4868 if (tvp) { 4869 struct vattr va; 4870 va.va_mask = AT_NLINK; 4871 4872 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4873 va.va_nlink == 0) { 4874 unlinked = 1; 4875 4876 /* DEBUG data */ 4877 if ((srcvp == targvp) || (tvp != targvp)) { 4878 cmn_err(CE_WARN, "rfs4_op_rename: " 4879 "srcvp %p, targvp: %p, tvp: %p", 4880 (void *)srcvp, (void *)targvp, 4881 (void *)tvp); 4882 } 4883 } else { 4884 VN_RELE(tvp); 4885 } 4886 } 4887 } 4888 if (error == 0) 4889 vn_renamepath(ndvp, srcvp, nnm, nlen - 1); 4890 4891 if (in_crit_src) 4892 nbl_end_crit(srcvp); 4893 if (srcvp) 4894 VN_RELE(srcvp); 4895 if (in_crit_targ) 4896 nbl_end_crit(targvp); 4897 if (targvp) 4898 VN_RELE(targvp); 4899 4900 if (unlinked) { 4901 ASSERT(fp != NULL); 4902 ASSERT(tvp != NULL); 4903 4904 /* DEBUG data */ 4905 if (RW_READ_HELD(&tvp->v_nbllock)) { 4906 cmn_err(CE_WARN, "rfs4_op_rename: " 4907 "RW_READ_HELD(%p)", (void *)tvp); 4908 } 4909 4910 /* The file is gone and so should the state */ 4911 rfs4_close_all_state(fp); 4912 VN_RELE(tvp); 4913 } 4914 4915 if (sfp) { 4916 rfs4_clear_dont_grant(sfp); 4917 rfs4_file_rele(sfp); 4918 } 4919 if (fp) { 4920 rfs4_clear_dont_grant(fp); 4921 rfs4_file_rele(fp); 4922 } 4923 4924 if (converted_onm != onm) 4925 kmem_free(converted_onm, MAXPATHLEN + 1); 4926 kmem_free(onm, olen); 4927 if (converted_nnm != nnm) 4928 kmem_free(converted_nnm, MAXPATHLEN + 1); 4929 kmem_free(nnm, nlen); 4930 4931 /* 4932 * Get the initial "after" sequence number, if it fails, set to zero 4933 */ 4934 oidva.va_mask = AT_SEQ; 4935 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL)) 4936 oidva.va_seq = 0; 4937 4938 nidva.va_mask = AT_SEQ; 4939 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL)) 4940 nidva.va_seq = 0; 4941 4942 /* 4943 * Force modified data and metadata out to stable storage. 4944 */ 4945 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL); 4946 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL); 4947 4948 if (error) { 4949 *cs->statusp = resp->status = puterrno4(error); 4950 goto out; 4951 } 4952 4953 /* 4954 * Get "after" change values, if it fails, simply return the 4955 * before value. 4956 */ 4957 oadva.va_mask = AT_CTIME|AT_SEQ; 4958 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) { 4959 oadva.va_ctime = obdva.va_ctime; 4960 oadva.va_seq = 0; 4961 } 4962 4963 nadva.va_mask = AT_CTIME|AT_SEQ; 4964 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) { 4965 nadva.va_ctime = nbdva.va_ctime; 4966 nadva.va_seq = 0; 4967 } 4968 4969 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime) 4970 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime) 4971 4972 /* 4973 * The cinfo.atomic = TRUE only if we have 4974 * non-zero va_seq's, and it has incremented by exactly one 4975 * during the VOP_RENAME and it didn't change during the VOP_FSYNC. 4976 */ 4977 if (obdva.va_seq && oidva.va_seq && oadva.va_seq && 4978 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq) 4979 resp->source_cinfo.atomic = TRUE; 4980 else 4981 resp->source_cinfo.atomic = FALSE; 4982 4983 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq && 4984 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq) 4985 resp->target_cinfo.atomic = TRUE; 4986 else 4987 resp->target_cinfo.atomic = FALSE; 4988 4989 #ifdef VOLATILE_FH_TEST 4990 { 4991 extern void add_volrnm_fh(struct exportinfo *, vnode_t *); 4992 4993 /* 4994 * Add the renamed file handle to the volatile rename list 4995 */ 4996 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) { 4997 /* file handles may expire on rename */ 4998 vnode_t *vp; 4999 5000 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 5001 /* 5002 * Already know that nnm will be a valid string 5003 */ 5004 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr, 5005 NULL, NULL, NULL); 5006 kmem_free(nnm, nlen); 5007 if (!error) { 5008 add_volrnm_fh(cs->exi, vp); 5009 VN_RELE(vp); 5010 } 5011 } 5012 } 5013 #endif /* VOLATILE_FH_TEST */ 5014 5015 *cs->statusp = resp->status = NFS4_OK; 5016 out: 5017 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 5018 RENAME4res *, resp); 5019 return; 5020 5021 err_out: 5022 if (onm != converted_onm) 5023 kmem_free(converted_onm, MAXPATHLEN + 1); 5024 if (onm != NULL) 5025 kmem_free(onm, olen); 5026 if (nnm != converted_nnm) 5027 kmem_free(converted_nnm, MAXPATHLEN + 1); 5028 if (nnm != NULL) 5029 kmem_free(nnm, nlen); 5030 5031 if (in_crit_src) nbl_end_crit(srcvp); 5032 if (in_crit_targ) nbl_end_crit(targvp); 5033 if (targvp) VN_RELE(targvp); 5034 if (srcvp) VN_RELE(srcvp); 5035 if (sfp) { 5036 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp); 5037 rfs4_file_rele(sfp); 5038 } 5039 if (fp) { 5040 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp); 5041 rfs4_file_rele(fp); 5042 } 5043 5044 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 5045 RENAME4res *, resp); 5046 } 5047 5048 /* ARGSUSED */ 5049 static void 5050 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5051 struct compound_state *cs) 5052 { 5053 RENEW4args *args = &argop->nfs_argop4_u.oprenew; 5054 RENEW4res *resp = &resop->nfs_resop4_u.oprenew; 5055 rfs4_client_t *cp; 5056 5057 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs, 5058 RENEW4args *, args); 5059 5060 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) { 5061 *cs->statusp = resp->status = 5062 rfs4_check_clientid(&args->clientid, 0); 5063 goto out; 5064 } 5065 5066 if (rfs4_lease_expired(cp)) { 5067 rfs4_client_rele(cp); 5068 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 5069 goto out; 5070 } 5071 5072 rfs4_update_lease(cp); 5073 5074 mutex_enter(cp->rc_cbinfo.cb_lock); 5075 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) { 5076 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE; 5077 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN; 5078 } else { 5079 *cs->statusp = resp->status = NFS4_OK; 5080 } 5081 mutex_exit(cp->rc_cbinfo.cb_lock); 5082 5083 rfs4_client_rele(cp); 5084 5085 out: 5086 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs, 5087 RENEW4res *, resp); 5088 } 5089 5090 /* ARGSUSED */ 5091 static void 5092 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 5093 struct compound_state *cs) 5094 { 5095 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh; 5096 5097 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs); 5098 5099 /* No need to check cs->access - we are not accessing any object */ 5100 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) { 5101 *cs->statusp = resp->status = NFS4ERR_RESTOREFH; 5102 goto out; 5103 } 5104 if (cs->vp != NULL) { 5105 VN_RELE(cs->vp); 5106 } 5107 cs->vp = cs->saved_vp; 5108 cs->saved_vp = NULL; 5109 cs->exi = cs->saved_exi; 5110 nfs_fh4_copy(&cs->saved_fh, &cs->fh); 5111 *cs->statusp = resp->status = NFS4_OK; 5112 cs->deleg = FALSE; 5113 5114 if (cs->cs_flags & RFS4_SAVED_STATEID) { 5115 cs->current_stateid = cs->save_stateid; 5116 cs->cs_flags |= RFS4_CURRENT_STATEID; 5117 } 5118 out: 5119 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs, 5120 RESTOREFH4res *, resp); 5121 } 5122 5123 /* ARGSUSED */ 5124 static void 5125 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5126 struct compound_state *cs) 5127 { 5128 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh; 5129 5130 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs); 5131 5132 /* No need to check cs->access - we are not accessing any object */ 5133 if (cs->vp == NULL) { 5134 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5135 goto out; 5136 } 5137 if (cs->saved_vp != NULL) { 5138 VN_RELE(cs->saved_vp); 5139 } 5140 cs->saved_vp = cs->vp; 5141 VN_HOLD(cs->saved_vp); 5142 cs->saved_exi = cs->exi; 5143 /* 5144 * since SAVEFH is fairly rare, don't alloc space for its fh 5145 * unless necessary. 5146 */ 5147 if (cs->saved_fh.nfs_fh4_val == NULL) { 5148 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 5149 } 5150 nfs_fh4_copy(&cs->fh, &cs->saved_fh); 5151 *cs->statusp = resp->status = NFS4_OK; 5152 5153 if (cs->cs_flags & RFS4_CURRENT_STATEID) { 5154 cs->save_stateid = cs->current_stateid; 5155 cs->cs_flags |= RFS4_SAVED_STATEID; 5156 } 5157 out: 5158 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs, 5159 SAVEFH4res *, resp); 5160 } 5161 5162 /* 5163 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to 5164 * return the bitmap of attrs that were set successfully. It is also 5165 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should 5166 * always be called only after rfs4_do_set_attrs(). 5167 * 5168 * Verify that the attributes are same as the expected ones. sargp->vap 5169 * and sargp->sbp contain the input attributes as translated from fattr4. 5170 * 5171 * This function verifies only the attrs that correspond to a vattr or 5172 * vfsstat struct. That is because of the extra step needed to get the 5173 * corresponding system structs. Other attributes have already been set or 5174 * verified by do_rfs4_set_attrs. 5175 * 5176 * Return 0 if all attrs match, -1 if some don't, error if error processing. 5177 */ 5178 static int 5179 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp, 5180 bitmap4 *resp, struct nfs4_ntov_table *ntovp) 5181 { 5182 int error, ret_error = 0; 5183 int i, k; 5184 uint_t sva_mask = sargp->vap->va_mask; 5185 uint_t vbit; 5186 union nfs4_attr_u *na; 5187 uint8_t *amap; 5188 bool_t getsb = ntovp->vfsstat; 5189 5190 if (sva_mask != 0) { 5191 /* 5192 * Okay to overwrite sargp->vap because we verify based 5193 * on the incoming values. 5194 */ 5195 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0, 5196 sargp->cs->cr, NULL); 5197 if (ret_error) { 5198 if (resp == NULL) 5199 return (ret_error); 5200 /* 5201 * Must return bitmap of successful attrs 5202 */ 5203 sva_mask = 0; /* to prevent checking vap later */ 5204 } else { 5205 /* 5206 * Some file systems clobber va_mask. it is probably 5207 * wrong of them to do so, nonethless we practice 5208 * defensive coding. 5209 * See bug id 4276830. 5210 */ 5211 sargp->vap->va_mask = sva_mask; 5212 } 5213 } 5214 5215 if (getsb) { 5216 /* 5217 * Now get the superblock and loop on the bitmap, as there is 5218 * no simple way of translating from superblock to bitmap4. 5219 */ 5220 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp); 5221 if (ret_error) { 5222 if (resp == NULL) 5223 goto errout; 5224 getsb = FALSE; 5225 } 5226 } 5227 5228 /* 5229 * Now loop and verify each attribute which getattr returned 5230 * whether it's the same as the input. 5231 */ 5232 if (resp == NULL && !getsb && (sva_mask == 0)) 5233 goto errout; 5234 5235 na = ntovp->na; 5236 amap = ntovp->amap; 5237 k = 0; 5238 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) { 5239 k = *amap; 5240 ASSERT(nfs4_ntov_map[k].nval == k); 5241 vbit = nfs4_ntov_map[k].vbit; 5242 5243 /* 5244 * If vattr attribute but VOP_GETATTR failed, or it's 5245 * superblock attribute but VFS_STATVFS failed, skip 5246 */ 5247 if (vbit) { 5248 if ((vbit & sva_mask) == 0) 5249 continue; 5250 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) { 5251 continue; 5252 } 5253 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na); 5254 if (resp != NULL) { 5255 if (error) 5256 ret_error = -1; /* not all match */ 5257 else /* update response bitmap */ 5258 *resp |= nfs4_ntov_map[k].fbit; 5259 continue; 5260 } 5261 if (error) { 5262 ret_error = -1; /* not all match */ 5263 break; 5264 } 5265 } 5266 errout: 5267 return (ret_error); 5268 } 5269 5270 /* 5271 * Decode the attribute to be set/verified. If the attr requires a sys op 5272 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't 5273 * call the sv_getit function for it, because the sys op hasn't yet been done. 5274 * Return 0 for success, error code if failed. 5275 * 5276 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free. 5277 */ 5278 static int 5279 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp, 5280 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap) 5281 { 5282 int error = 0; 5283 bool_t set_later; 5284 5285 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit; 5286 5287 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) { 5288 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat; 5289 /* 5290 * don't verify yet if a vattr or sb dependent attr, 5291 * because we don't have their sys values yet. 5292 * Will be done later. 5293 */ 5294 if (! (set_later && (cmd == NFS4ATTR_VERIT))) { 5295 /* 5296 * ACLs are a special case, since setting the MODE 5297 * conflicts with setting the ACL. We delay setting 5298 * the ACL until all other attributes have been set. 5299 * The ACL gets set in do_rfs4_op_setattr(). 5300 */ 5301 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) { 5302 error = (*nfs4_ntov_map[k].sv_getit)(cmd, 5303 sargp, nap); 5304 if (error) { 5305 xdr_free(nfs4_ntov_map[k].xfunc, 5306 (caddr_t)nap); 5307 } 5308 } 5309 } 5310 } else { 5311 #ifdef DEBUG 5312 cmn_err(CE_NOTE, "decode_fattr4_attr: error " 5313 "decoding attribute %d\n", k); 5314 #endif 5315 error = EINVAL; 5316 } 5317 if (!error && resp_bval && !set_later) { 5318 *resp_bval |= nfs4_ntov_map[k].fbit; 5319 } 5320 5321 return (error); 5322 } 5323 5324 /* 5325 * Set vattr based on incoming fattr4 attrs - used by setattr. 5326 * Set response mask. Ignore any values that are not writable vattr attrs. 5327 */ 5328 static nfsstat4 5329 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5330 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, 5331 nfs4_attr_cmd_t cmd) 5332 { 5333 int error = 0; 5334 int i; 5335 char *attrs = fattrp->attrlist4; 5336 uint32_t attrslen = fattrp->attrlist4_len; 5337 XDR xdr; 5338 nfsstat4 status = NFS4_OK; 5339 vnode_t *vp = cs->vp; 5340 union nfs4_attr_u *na; 5341 uint8_t *amap; 5342 5343 #ifndef lint 5344 /* 5345 * Make sure that maximum attribute number can be expressed as an 5346 * 8 bit quantity. 5347 */ 5348 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1)); 5349 #endif 5350 5351 if (vp == NULL) { 5352 if (resp) 5353 *resp = 0; 5354 return (NFS4ERR_NOFILEHANDLE); 5355 } 5356 if (cs->access == CS_ACCESS_DENIED) { 5357 if (resp) 5358 *resp = 0; 5359 return (NFS4ERR_ACCESS); 5360 } 5361 5362 sargp->op = cmd; 5363 sargp->cs = cs; 5364 sargp->flag = 0; /* may be set later */ 5365 sargp->vap->va_mask = 0; 5366 sargp->rdattr_error = NFS4_OK; 5367 sargp->rdattr_error_req = FALSE; 5368 /* sargp->sbp is set by the caller */ 5369 5370 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE); 5371 5372 na = ntovp->na; 5373 amap = ntovp->amap; 5374 5375 /* 5376 * The following loop iterates on the nfs4_ntov_map checking 5377 * if the fbit is set in the requested bitmap. 5378 * If set then we process the arguments using the 5379 * rfs4_fattr4 conversion functions to populate the setattr 5380 * vattr and va_mask. Any settable attrs that are not using vattr 5381 * will be set in this loop. 5382 */ 5383 for (i = 0; i < nfs4_ntov_map_size; i++) { 5384 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) { 5385 continue; 5386 } 5387 /* 5388 * If setattr, must be a writable attr. 5389 * If verify/nverify, must be a readable attr. 5390 */ 5391 if ((error = (*nfs4_ntov_map[i].sv_getit)( 5392 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) { 5393 /* 5394 * Client tries to set/verify an 5395 * unsupported attribute, tries to set 5396 * a read only attr or verify a write 5397 * only one - error! 5398 */ 5399 break; 5400 } 5401 /* 5402 * Decode the attribute to set/verify 5403 */ 5404 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval, 5405 &xdr, resp ? resp : NULL, na); 5406 if (error) 5407 break; 5408 *amap++ = (uint8_t)nfs4_ntov_map[i].nval; 5409 na++; 5410 (ntovp->attrcnt)++; 5411 if (nfs4_ntov_map[i].vfsstat) 5412 ntovp->vfsstat = TRUE; 5413 } 5414 5415 if (error != 0) 5416 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP : 5417 puterrno4(error)); 5418 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 5419 return (status); 5420 } 5421 5422 static nfsstat4 5423 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5424 stateid4 *stateid) 5425 { 5426 int error = 0; 5427 struct nfs4_svgetit_arg sarg; 5428 bool_t trunc; 5429 5430 nfsstat4 status = NFS4_OK; 5431 cred_t *cr = cs->cr; 5432 vnode_t *vp = cs->vp; 5433 struct nfs4_ntov_table ntov; 5434 struct statvfs64 sb; 5435 struct vattr bva; 5436 struct flock64 bf; 5437 int in_crit = 0; 5438 uint_t saved_mask = 0; 5439 caller_context_t ct; 5440 5441 *resp = 0; 5442 sarg.sbp = &sb; 5443 sarg.is_referral = B_FALSE; 5444 nfs4_ntov_table_init(&ntov); 5445 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 5446 NFS4ATTR_SETIT); 5447 if (status != NFS4_OK) { 5448 /* 5449 * failed set attrs 5450 */ 5451 goto done; 5452 } 5453 5454 if ((sarg.vap->va_mask == 0) && 5455 (! (fattrp->attrmask & FATTR4_ACL_MASK))) { 5456 /* 5457 * no further work to be done 5458 */ 5459 goto done; 5460 } 5461 5462 /* 5463 * If we got a request to set the ACL and the MODE, only 5464 * allow changing VSUID, VSGID, and VSVTX. Attempting 5465 * to change any other bits, along with setting an ACL, 5466 * gives NFS4ERR_INVAL. 5467 */ 5468 if ((fattrp->attrmask & FATTR4_ACL_MASK) && 5469 (fattrp->attrmask & FATTR4_MODE_MASK)) { 5470 vattr_t va; 5471 5472 va.va_mask = AT_MODE; 5473 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 5474 if (error) { 5475 status = puterrno4(error); 5476 goto done; 5477 } 5478 if ((sarg.vap->va_mode ^ va.va_mode) & 5479 ~(VSUID | VSGID | VSVTX)) { 5480 status = NFS4ERR_INVAL; 5481 goto done; 5482 } 5483 } 5484 5485 /* Check stateid only if size has been set */ 5486 if (sarg.vap->va_mask & AT_SIZE) { 5487 trunc = (sarg.vap->va_size == 0); 5488 status = rfs4_check_stateid(FWRITE, cs->vp, stateid, 5489 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs); 5490 if (status != NFS4_OK) 5491 goto done; 5492 } else { 5493 ct.cc_sysid = 0; 5494 ct.cc_pid = 0; 5495 ct.cc_caller_id = nfs4_srv_caller_id; 5496 ct.cc_flags = CC_DONTBLOCK; 5497 } 5498 5499 /* XXX start of possible race with delegations */ 5500 5501 /* 5502 * We need to specially handle size changes because it is 5503 * possible for the client to create a file with read-only 5504 * modes, but with the file opened for writing. If the client 5505 * then tries to set the file size, e.g. ftruncate(3C), 5506 * fcntl(F_FREESP), the normal access checking done in 5507 * VOP_SETATTR would prevent the client from doing it even though 5508 * it should be allowed to do so. To get around this, we do the 5509 * access checking for ourselves and use VOP_SPACE which doesn't 5510 * do the access checking. 5511 * Also the client should not be allowed to change the file 5512 * size if there is a conflicting non-blocking mandatory lock in 5513 * the region of the change. 5514 */ 5515 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 5516 u_offset_t offset; 5517 ssize_t length; 5518 5519 /* 5520 * ufs_setattr clears AT_SIZE from vap->va_mask, but 5521 * before returning, sarg.vap->va_mask is used to 5522 * generate the setattr reply bitmap. We also clear 5523 * AT_SIZE below before calling VOP_SPACE. For both 5524 * of these cases, the va_mask needs to be saved here 5525 * and restored after calling VOP_SETATTR. 5526 */ 5527 saved_mask = sarg.vap->va_mask; 5528 5529 /* 5530 * Check any possible conflict due to NBMAND locks. 5531 * Get into critical region before VOP_GETATTR, so the 5532 * size attribute is valid when checking conflicts. 5533 */ 5534 if (nbl_need_check(vp)) { 5535 nbl_start_crit(vp, RW_READER); 5536 in_crit = 1; 5537 } 5538 5539 bva.va_mask = AT_UID|AT_SIZE; 5540 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5541 if (error != 0) { 5542 status = puterrno4(error); 5543 goto done; 5544 } 5545 5546 if (in_crit) { 5547 if (sarg.vap->va_size < bva.va_size) { 5548 offset = sarg.vap->va_size; 5549 length = bva.va_size - sarg.vap->va_size; 5550 } else { 5551 offset = bva.va_size; 5552 length = sarg.vap->va_size - bva.va_size; 5553 } 5554 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 5555 &ct)) { 5556 status = NFS4ERR_LOCKED; 5557 goto done; 5558 } 5559 } 5560 5561 if (crgetuid(cr) == bva.va_uid) { 5562 sarg.vap->va_mask &= ~AT_SIZE; 5563 bf.l_type = F_WRLCK; 5564 bf.l_whence = 0; 5565 bf.l_start = (off64_t)sarg.vap->va_size; 5566 bf.l_len = 0; 5567 bf.l_sysid = 0; 5568 bf.l_pid = 0; 5569 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 5570 (offset_t)sarg.vap->va_size, cr, &ct); 5571 } 5572 } 5573 5574 if (!error && sarg.vap->va_mask != 0) 5575 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 5576 5577 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 5578 if (saved_mask & AT_SIZE) 5579 sarg.vap->va_mask |= AT_SIZE; 5580 5581 /* 5582 * If an ACL was being set, it has been delayed until now, 5583 * in order to set the mode (via the VOP_SETATTR() above) first. 5584 */ 5585 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) { 5586 int i; 5587 5588 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++) 5589 if (ntov.amap[i] == FATTR4_ACL) 5590 break; 5591 if (i < NFS4_MAXNUM_ATTRS) { 5592 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)( 5593 NFS4ATTR_SETIT, &sarg, &ntov.na[i]); 5594 if (error == 0) { 5595 *resp |= FATTR4_ACL_MASK; 5596 } else if (error == ENOTSUP) { 5597 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5598 status = NFS4ERR_ATTRNOTSUPP; 5599 goto done; 5600 } 5601 } else { 5602 NFS4_DEBUG(rfs4_debug, 5603 (CE_NOTE, "do_rfs4_op_setattr: " 5604 "unable to find ACL in fattr4")); 5605 error = EINVAL; 5606 } 5607 } 5608 5609 if (error) { 5610 /* check if a monitor detected a delegation conflict */ 5611 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 5612 status = NFS4ERR_DELAY; 5613 else 5614 status = puterrno4(error); 5615 5616 /* 5617 * Set the response bitmap when setattr failed. 5618 * If VOP_SETATTR partially succeeded, test by doing a 5619 * VOP_GETATTR on the object and comparing the data 5620 * to the setattr arguments. 5621 */ 5622 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5623 } else { 5624 /* 5625 * Force modified metadata out to stable storage. 5626 */ 5627 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 5628 /* 5629 * Set response bitmap 5630 */ 5631 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 5632 } 5633 5634 /* Return early and already have a NFSv4 error */ 5635 done: 5636 /* 5637 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 5638 * conversion sets both readable and writeable NFS4 attrs 5639 * for AT_MTIME and AT_ATIME. The line below masks out 5640 * unrequested attrs from the setattr result bitmap. This 5641 * is placed after the done: label to catch the ATTRNOTSUP 5642 * case. 5643 */ 5644 *resp &= fattrp->attrmask; 5645 5646 if (in_crit) 5647 nbl_end_crit(vp); 5648 5649 nfs4_ntov_table_free(&ntov, &sarg); 5650 5651 return (status); 5652 } 5653 5654 /* ARGSUSED */ 5655 static void 5656 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5657 struct compound_state *cs) 5658 { 5659 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr; 5660 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr; 5661 bslabel_t *clabel; 5662 5663 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs, 5664 SETATTR4args *, args); 5665 5666 if (cs->vp == NULL) { 5667 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5668 goto out; 5669 } 5670 5671 /* 5672 * If there is an unshared filesystem mounted on this vnode, 5673 * do not allow to setattr on this vnode. 5674 */ 5675 if (vn_ismntpt(cs->vp)) { 5676 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5677 goto out; 5678 } 5679 5680 resp->attrsset = 0; 5681 5682 if (rdonly4(req, cs)) { 5683 *cs->statusp = resp->status = NFS4ERR_ROFS; 5684 goto out; 5685 } 5686 5687 /* check label before setting attributes */ 5688 if (is_system_labeled()) { 5689 ASSERT(req->rq_label != NULL); 5690 clabel = req->rq_label; 5691 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *, 5692 "got client label from request(1)", 5693 struct svc_req *, req); 5694 if (!blequal(&l_admin_low->tsl_label, clabel)) { 5695 if (!do_rfs_label_check(clabel, cs->vp, 5696 EQUALITY_CHECK, cs->exi)) { 5697 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5698 goto out; 5699 } 5700 } 5701 } 5702 5703 get_stateid4(cs, &args->stateid); 5704 *cs->statusp = resp->status = 5705 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs, 5706 &args->stateid); 5707 5708 out: 5709 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs, 5710 SETATTR4res *, resp); 5711 } 5712 5713 /* ARGSUSED */ 5714 static void 5715 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5716 struct compound_state *cs) 5717 { 5718 /* 5719 * verify and nverify are exactly the same, except that nverify 5720 * succeeds when some argument changed, and verify succeeds when 5721 * when none changed. 5722 */ 5723 5724 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 5725 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 5726 5727 int error; 5728 struct nfs4_svgetit_arg sarg; 5729 struct statvfs64 sb; 5730 struct nfs4_ntov_table ntov; 5731 5732 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 5733 VERIFY4args *, args); 5734 5735 if (cs->vp == NULL) { 5736 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5737 goto out; 5738 } 5739 5740 sarg.sbp = &sb; 5741 sarg.is_referral = B_FALSE; 5742 nfs4_ntov_table_init(&ntov); 5743 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5744 &sarg, &ntov, NFS4ATTR_VERIT); 5745 if (resp->status != NFS4_OK) { 5746 /* 5747 * do_rfs4_set_attrs will try to verify systemwide attrs, 5748 * so could return -1 for "no match". 5749 */ 5750 if (resp->status == -1) 5751 resp->status = NFS4ERR_NOT_SAME; 5752 goto done; 5753 } 5754 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5755 switch (error) { 5756 case 0: 5757 resp->status = NFS4_OK; 5758 break; 5759 case -1: 5760 resp->status = NFS4ERR_NOT_SAME; 5761 break; 5762 default: 5763 resp->status = puterrno4(error); 5764 break; 5765 } 5766 done: 5767 *cs->statusp = resp->status; 5768 nfs4_ntov_table_free(&ntov, &sarg); 5769 out: 5770 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 5771 VERIFY4res *, resp); 5772 } 5773 5774 /* ARGSUSED */ 5775 static void 5776 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5777 struct compound_state *cs) 5778 { 5779 /* 5780 * verify and nverify are exactly the same, except that nverify 5781 * succeeds when some argument changed, and verify succeeds when 5782 * when none changed. 5783 */ 5784 5785 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 5786 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 5787 5788 int error; 5789 struct nfs4_svgetit_arg sarg; 5790 struct statvfs64 sb; 5791 struct nfs4_ntov_table ntov; 5792 5793 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 5794 NVERIFY4args *, args); 5795 5796 if (cs->vp == NULL) { 5797 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5798 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5799 NVERIFY4res *, resp); 5800 return; 5801 } 5802 sarg.sbp = &sb; 5803 sarg.is_referral = B_FALSE; 5804 nfs4_ntov_table_init(&ntov); 5805 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5806 &sarg, &ntov, NFS4ATTR_VERIT); 5807 if (resp->status != NFS4_OK) { 5808 /* 5809 * do_rfs4_set_attrs will try to verify systemwide attrs, 5810 * so could return -1 for "no match". 5811 */ 5812 if (resp->status == -1) 5813 resp->status = NFS4_OK; 5814 goto done; 5815 } 5816 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5817 switch (error) { 5818 case 0: 5819 resp->status = NFS4ERR_SAME; 5820 break; 5821 case -1: 5822 resp->status = NFS4_OK; 5823 break; 5824 default: 5825 resp->status = puterrno4(error); 5826 break; 5827 } 5828 done: 5829 *cs->statusp = resp->status; 5830 nfs4_ntov_table_free(&ntov, &sarg); 5831 5832 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5833 NVERIFY4res *, resp); 5834 } 5835 5836 /* 5837 * XXX - This should live in an NFS header file. 5838 */ 5839 #define MAX_IOVECS 12 5840 5841 /* ARGSUSED */ 5842 static void 5843 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5844 struct compound_state *cs) 5845 { 5846 WRITE4args *args = &argop->nfs_argop4_u.opwrite; 5847 WRITE4res *resp = &resop->nfs_resop4_u.opwrite; 5848 int error; 5849 vnode_t *vp; 5850 struct vattr bva; 5851 u_offset_t rlimit; 5852 struct uio uio; 5853 struct iovec iov[MAX_IOVECS]; 5854 struct iovec *iovp; 5855 int iovcnt; 5856 int ioflag; 5857 cred_t *savecred, *cr; 5858 bool_t *deleg = &cs->deleg; 5859 nfsstat4 stat; 5860 int in_crit = 0; 5861 caller_context_t ct; 5862 nfs4_srv_t *nsrv4; 5863 5864 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, 5865 WRITE4args *, args); 5866 5867 vp = cs->vp; 5868 if (vp == NULL) { 5869 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5870 goto out; 5871 } 5872 5873 if (cs->access == CS_ACCESS_DENIED) { 5874 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5875 goto out; 5876 } 5877 5878 get_stateid4(cs, &args->stateid); 5879 5880 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE, 5881 deleg, TRUE, &ct, cs)) != NFS4_OK) { 5882 *cs->statusp = resp->status = stat; 5883 goto out; 5884 } 5885 5886 /* 5887 * We have to enter the critical region before calling VOP_RWLOCK 5888 * to avoid a deadlock with ufs. 5889 */ 5890 if (nbl_need_check(vp)) { 5891 nbl_start_crit(vp, RW_READER); 5892 in_crit = 1; 5893 if (nbl_conflict(vp, NBL_WRITE, 5894 args->offset, args->data_len, 0, &ct)) { 5895 *cs->statusp = resp->status = NFS4ERR_LOCKED; 5896 goto out; 5897 } 5898 } 5899 5900 cr = cs->cr; 5901 bva.va_mask = AT_MODE | AT_UID; 5902 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5903 5904 /* 5905 * If we can't get the attributes, then we can't do the 5906 * right access checking. So, we'll fail the request. 5907 */ 5908 if (error) { 5909 *cs->statusp = resp->status = puterrno4(error); 5910 goto out; 5911 } 5912 5913 if (rdonly4(req, cs)) { 5914 *cs->statusp = resp->status = NFS4ERR_ROFS; 5915 goto out; 5916 } 5917 5918 if (vp->v_type != VREG) { 5919 *cs->statusp = resp->status = 5920 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 5921 goto out; 5922 } 5923 5924 if (crgetuid(cr) != bva.va_uid && 5925 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) { 5926 *cs->statusp = resp->status = puterrno4(error); 5927 goto out; 5928 } 5929 5930 if (MANDLOCK(vp, bva.va_mode)) { 5931 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5932 goto out; 5933 } 5934 5935 nsrv4 = nfs4_get_srv(); 5936 if (args->data_len == 0) { 5937 *cs->statusp = resp->status = NFS4_OK; 5938 resp->count = 0; 5939 resp->committed = args->stable; 5940 resp->writeverf = nsrv4->write4verf; 5941 goto out; 5942 } 5943 5944 if (args->mblk != NULL) { 5945 mblk_t *m; 5946 uint_t bytes, round_len; 5947 5948 iovcnt = 0; 5949 bytes = 0; 5950 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT); 5951 for (m = args->mblk; 5952 m != NULL && bytes < round_len; 5953 m = m->b_cont) { 5954 iovcnt++; 5955 bytes += MBLKL(m); 5956 } 5957 #ifdef DEBUG 5958 /* should have ended on an mblk boundary */ 5959 if (bytes != round_len) { 5960 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n", 5961 bytes, round_len, args->data_len); 5962 printf("args=%p, args->mblk=%p, m=%p", (void *)args, 5963 (void *)args->mblk, (void *)m); 5964 ASSERT(bytes == round_len); 5965 } 5966 #endif 5967 if (iovcnt <= MAX_IOVECS) { 5968 iovp = iov; 5969 } else { 5970 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 5971 } 5972 mblk_to_iov(args->mblk, iovcnt, iovp); 5973 } else if (args->rlist != NULL) { 5974 iovcnt = 1; 5975 iovp = iov; 5976 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3); 5977 iovp->iov_len = args->data_len; 5978 } else { 5979 iovcnt = 1; 5980 iovp = iov; 5981 iovp->iov_base = args->data_val; 5982 iovp->iov_len = args->data_len; 5983 } 5984 5985 uio.uio_iov = iovp; 5986 uio.uio_iovcnt = iovcnt; 5987 5988 uio.uio_segflg = UIO_SYSSPACE; 5989 uio.uio_extflg = UIO_COPY_DEFAULT; 5990 uio.uio_loffset = args->offset; 5991 uio.uio_resid = args->data_len; 5992 uio.uio_llimit = curproc->p_fsz_ctl; 5993 rlimit = uio.uio_llimit - args->offset; 5994 if (rlimit < (u_offset_t)uio.uio_resid) 5995 uio.uio_resid = (int)rlimit; 5996 5997 if (args->stable == UNSTABLE4) 5998 ioflag = 0; 5999 else if (args->stable == FILE_SYNC4) 6000 ioflag = FSYNC; 6001 else if (args->stable == DATA_SYNC4) 6002 ioflag = FDSYNC; 6003 else { 6004 if (iovp != iov) 6005 kmem_free(iovp, sizeof (*iovp) * iovcnt); 6006 *cs->statusp = resp->status = NFS4ERR_INVAL; 6007 goto out; 6008 } 6009 6010 /* 6011 * We're changing creds because VM may fault and we need 6012 * the cred of the current thread to be used if quota 6013 * checking is enabled. 6014 */ 6015 savecred = curthread->t_cred; 6016 curthread->t_cred = cr; 6017 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct); 6018 curthread->t_cred = savecred; 6019 6020 if (iovp != iov) 6021 kmem_free(iovp, sizeof (*iovp) * iovcnt); 6022 6023 if (error) { 6024 *cs->statusp = resp->status = puterrno4(error); 6025 goto out; 6026 } 6027 6028 *cs->statusp = resp->status = NFS4_OK; 6029 resp->count = args->data_len - uio.uio_resid; 6030 6031 if (ioflag == 0) 6032 resp->committed = UNSTABLE4; 6033 else 6034 resp->committed = FILE_SYNC4; 6035 6036 resp->writeverf = nsrv4->write4verf; 6037 6038 out: 6039 if (in_crit) 6040 nbl_end_crit(vp); 6041 6042 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs, 6043 WRITE4res *, resp); 6044 } 6045 6046 static inline int 6047 rfs4_opnum_in_range(const compound_state_t *cs, int opnum) 6048 { 6049 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP) 6050 return (0); 6051 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP) 6052 return (0); 6053 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP) 6054 return (0); 6055 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP) 6056 return (0); 6057 return (1); 6058 } 6059 6060 void 6061 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs, 6062 struct svc_req *req, int *rv) 6063 { 6064 uint_t i; 6065 cred_t *cr; 6066 nfs4_srv_t *nsrv4; 6067 nfs_export_t *ne = nfs_get_export(); 6068 6069 if (rv != NULL) 6070 *rv = 0; 6071 /* 6072 * Form a reply tag by copying over the request tag. 6073 */ 6074 resp->tag.utf8string_len = args->tag.utf8string_len; 6075 if (args->tag.utf8string_len != 0) { 6076 resp->tag.utf8string_val = 6077 kmem_alloc(args->tag.utf8string_len, KM_SLEEP); 6078 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 6079 resp->tag.utf8string_len); 6080 } else { 6081 resp->tag.utf8string_val = NULL; 6082 } 6083 6084 cs->statusp = &resp->status; 6085 cs->req = req; 6086 cs->minorversion = args->minorversion; 6087 resp->array = NULL; 6088 resp->array_len = 0; 6089 6090 if (args->array_len == 0) { 6091 resp->status = NFS4_OK; 6092 return; 6093 } 6094 6095 cr = svc_xprt_cred(req->rq_xprt); 6096 ASSERT(cr != NULL); 6097 6098 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) { 6099 DTRACE_NFSV4_2(compound__start, struct compound_state *, 6100 cs, COMPOUND4args *, args); 6101 DTRACE_NFSV4_2(compound__done, struct compound_state *, 6102 cs, COMPOUND4res *, resp); 6103 svcerr_badcred(req->rq_xprt); 6104 if (rv != NULL) 6105 *rv = 1; 6106 return; 6107 } 6108 6109 resp->array_len = args->array_len; 6110 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4), 6111 KM_SLEEP); 6112 6113 cs->op_len = args->array_len; 6114 cs->basecr = cr; 6115 nsrv4 = nfs4_get_srv(); 6116 6117 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs, 6118 COMPOUND4args *, args); 6119 6120 /* 6121 * For now, NFS4 compound processing must be protected by 6122 * exported_lock because it can access more than one exportinfo 6123 * per compound and share/unshare can now change multiple 6124 * exinfo structs. The NFS2/3 code only refs 1 exportinfo 6125 * per proc (excluding public exinfo), and exi_count design 6126 * is sufficient to protect concurrent execution of NFS2/3 6127 * ops along with unexport. This lock will be removed as 6128 * part of the NFSv4 phase 2 namespace redesign work. 6129 */ 6130 rw_enter(&ne->exported_lock, RW_READER); 6131 6132 /* 6133 * If this is the first compound we've seen, we need to start all 6134 * new instances' grace periods. 6135 */ 6136 if (nsrv4->seen_first_compound == 0) { 6137 rfs4_grace_start_new(nsrv4); 6138 /* 6139 * This must be set after rfs4_grace_start_new(), otherwise 6140 * another thread could proceed past here before the former 6141 * is finished. 6142 */ 6143 nsrv4->seen_first_compound = 1; 6144 } 6145 6146 for (i = 0; i < args->array_len && cs->cont; i++) { 6147 nfs_argop4 *argop; 6148 nfs_resop4 *resop; 6149 uint_t op; 6150 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4]; 6151 6152 argop = &args->array[i]; 6153 resop = &resp->array[i]; 6154 resop->resop = argop->argop; 6155 op = (uint_t)resop->resop; 6156 6157 cs->op_pos = i; 6158 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) { 6159 /* 6160 * Count the individual ops here; NULL and COMPOUND 6161 * are counted in common_dispatch() 6162 */ 6163 stat[op].value.ui64++; 6164 6165 NFS4_DEBUG(rfs4_debug > 1, 6166 (CE_NOTE, "Executing %s", rfs4_op_string[op])); 6167 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs); 6168 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d", 6169 rfs4_op_string[op], *cs->statusp)); 6170 if (*cs->statusp != NFS4_OK) 6171 cs->cont = FALSE; 6172 if (rfsv4disptab[op].dis_flags & OP_CLEAR_STATEID) 6173 cs->cs_flags &= ~RFS4_CURRENT_STATEID; 6174 } else { 6175 /* 6176 * This is effectively dead code since XDR code 6177 * will have already returned BADXDR if op doesn't 6178 * decode to legal value. This only done for a 6179 * day when XDR code doesn't verify v4 opcodes. 6180 */ 6181 op = OP_ILLEGAL; 6182 stat[OP_ILLEGAL_IDX].value.ui64++; 6183 6184 rfs4_op_illegal(argop, resop, req, cs); 6185 cs->cont = FALSE; 6186 } 6187 6188 /* 6189 * If not at last op, and if we are to stop, then 6190 * compact the results array. 6191 */ 6192 if ((i + 1) < args->array_len && !cs->cont) { 6193 nfs_resop4 *new_res = kmem_alloc( 6194 (i+1) * sizeof (nfs_resop4), KM_SLEEP); 6195 bcopy(resp->array, 6196 new_res, (i+1) * sizeof (nfs_resop4)); 6197 kmem_free(resp->array, 6198 args->array_len * sizeof (nfs_resop4)); 6199 6200 resp->array_len = i + 1; 6201 resp->array = new_res; 6202 } 6203 } 6204 6205 rw_exit(&ne->exported_lock); 6206 6207 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs, 6208 COMPOUND4res *, resp); 6209 6210 /* 6211 * done with this compound request, free the label 6212 */ 6213 6214 if (req->rq_label != NULL) { 6215 kmem_free(req->rq_label, sizeof (bslabel_t)); 6216 req->rq_label = NULL; 6217 } 6218 } 6219 6220 /* 6221 * XXX because of what appears to be duplicate calls to rfs4_compound_free 6222 * XXX zero out the tag and array values. Need to investigate why the 6223 * XXX calls occur, but at least prevent the panic for now. 6224 */ 6225 void 6226 rfs4_compound_free(COMPOUND4res *resp) 6227 { 6228 uint_t i; 6229 6230 if (resp->tag.utf8string_val) { 6231 UTF8STRING_FREE(resp->tag) 6232 } 6233 6234 for (i = 0; i < resp->array_len; i++) { 6235 nfs_resop4 *resop; 6236 uint_t op; 6237 6238 resop = &resp->array[i]; 6239 op = (uint_t)resop->resop; 6240 if (op < rfsv4disp_cnt) { 6241 (*rfsv4disptab[op].dis_resfree)(resop); 6242 } 6243 } 6244 if (resp->array != NULL) { 6245 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4)); 6246 } 6247 } 6248 6249 /* 6250 * Check if entire requst is idempotent 6251 */ 6252 bool_t 6253 rfs4_idempotent_req(const COMPOUND4args *args) 6254 { 6255 int i; 6256 6257 for (i = 0; i < args->array_len; i++) { 6258 uint_t op; 6259 6260 op = (uint_t)args->array[i].argop; 6261 6262 if (op >= rfsv4disp_cnt || 6263 !(rfsv4disptab[op].dis_flags & OP_IDEMPOTENT)) { 6264 return (FALSE); 6265 } 6266 } 6267 return (TRUE); 6268 } 6269 6270 nfsstat4 6271 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp) 6272 { 6273 nfsstat4 e; 6274 6275 rfs4_dbe_lock(cp->rc_dbe); 6276 6277 if (cp->rc_sysidt != LM_NOSYSID) { 6278 *sp = cp->rc_sysidt; 6279 e = NFS4_OK; 6280 6281 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) { 6282 *sp = cp->rc_sysidt; 6283 e = NFS4_OK; 6284 6285 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 6286 "rfs4_client_sysid: allocated 0x%x\n", *sp)); 6287 } else 6288 e = NFS4ERR_DELAY; 6289 6290 rfs4_dbe_unlock(cp->rc_dbe); 6291 return (e); 6292 } 6293 6294 #if defined(DEBUG) && ! defined(lint) 6295 static void lock_print(char *str, int operation, struct flock64 *flk) 6296 { 6297 char *op, *type; 6298 6299 switch (operation) { 6300 case F_GETLK: op = "F_GETLK"; 6301 break; 6302 case F_SETLK: op = "F_SETLK"; 6303 break; 6304 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND"; 6305 break; 6306 default: op = "F_UNKNOWN"; 6307 break; 6308 } 6309 switch (flk->l_type) { 6310 case F_UNLCK: type = "F_UNLCK"; 6311 break; 6312 case F_RDLCK: type = "F_RDLCK"; 6313 break; 6314 case F_WRLCK: type = "F_WRLCK"; 6315 break; 6316 default: type = "F_UNKNOWN"; 6317 break; 6318 } 6319 6320 ASSERT(flk->l_whence == 0); 6321 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d", 6322 str, op, type, (longlong_t)flk->l_start, 6323 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid); 6324 } 6325 6326 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f) 6327 #else 6328 #define LOCK_PRINT(d, s, t, f) 6329 #endif 6330 6331 /*ARGSUSED*/ 6332 static bool_t 6333 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs) 6334 { 6335 return (TRUE); 6336 } 6337 6338 /* 6339 * Look up the pathname using the vp in cs as the directory vnode. 6340 * cs->vp will be the vnode for the file on success 6341 */ 6342 6343 static nfsstat4 6344 rfs4_lookup(component4 *component, struct svc_req *req, 6345 struct compound_state *cs) 6346 { 6347 char *nm; 6348 uint32_t len; 6349 nfsstat4 status; 6350 struct sockaddr *ca; 6351 char *name; 6352 6353 if (cs->vp == NULL) { 6354 return (NFS4ERR_NOFILEHANDLE); 6355 } 6356 if (cs->vp->v_type != VDIR) { 6357 return (NFS4ERR_NOTDIR); 6358 } 6359 6360 status = utf8_dir_verify(component); 6361 if (status != NFS4_OK) 6362 return (status); 6363 6364 nm = utf8_to_fn(component, &len, NULL); 6365 if (nm == NULL) { 6366 return (NFS4ERR_INVAL); 6367 } 6368 6369 if (len > MAXNAMELEN) { 6370 kmem_free(nm, len); 6371 return (NFS4ERR_NAMETOOLONG); 6372 } 6373 6374 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6375 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6376 MAXPATHLEN + 1); 6377 6378 if (name == NULL) { 6379 kmem_free(nm, len); 6380 return (NFS4ERR_INVAL); 6381 } 6382 6383 status = do_rfs4_op_lookup(name, req, cs); 6384 6385 if (name != nm) 6386 kmem_free(name, MAXPATHLEN + 1); 6387 6388 kmem_free(nm, len); 6389 6390 return (status); 6391 } 6392 6393 static nfsstat4 6394 rfs4_lookupfile(component4 *component, struct svc_req *req, 6395 struct compound_state *cs, uint32_t access, change_info4 *cinfo) 6396 { 6397 nfsstat4 status; 6398 vnode_t *dvp = cs->vp; 6399 vattr_t bva, ava, fva; 6400 int error; 6401 6402 /* Get "before" change value */ 6403 bva.va_mask = AT_CTIME|AT_SEQ; 6404 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6405 if (error) 6406 return (puterrno4(error)); 6407 6408 /* rfs4_lookup may VN_RELE directory */ 6409 VN_HOLD(dvp); 6410 6411 status = rfs4_lookup(component, req, cs); 6412 if (status != NFS4_OK) { 6413 VN_RELE(dvp); 6414 return (status); 6415 } 6416 6417 /* 6418 * Get "after" change value, if it fails, simply return the 6419 * before value. 6420 */ 6421 ava.va_mask = AT_CTIME|AT_SEQ; 6422 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6423 ava.va_ctime = bva.va_ctime; 6424 ava.va_seq = 0; 6425 } 6426 VN_RELE(dvp); 6427 6428 /* 6429 * Validate the file is a file 6430 */ 6431 fva.va_mask = AT_TYPE|AT_MODE; 6432 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL); 6433 if (error) 6434 return (puterrno4(error)); 6435 6436 if (fva.va_type != VREG) { 6437 if (fva.va_type == VDIR) 6438 return (NFS4ERR_ISDIR); 6439 if (fva.va_type == VLNK) 6440 return (NFS4ERR_SYMLINK); 6441 return (NFS4ERR_INVAL); 6442 } 6443 6444 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime); 6445 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6446 6447 /* 6448 * It is undefined if VOP_LOOKUP will change va_seq, so 6449 * cinfo.atomic = TRUE only if we have 6450 * non-zero va_seq's, and they have not changed. 6451 */ 6452 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq) 6453 cinfo->atomic = TRUE; 6454 else 6455 cinfo->atomic = FALSE; 6456 6457 /* Check for mandatory locking */ 6458 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode); 6459 return (check_open_access(access, cs, req)); 6460 } 6461 6462 static nfsstat4 6463 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode, 6464 cred_t *cr, vnode_t **vpp, bool_t *created) 6465 { 6466 int error; 6467 nfsstat4 status = NFS4_OK; 6468 vattr_t va; 6469 6470 tryagain: 6471 6472 /* 6473 * The file open mode used is VWRITE. If the client needs 6474 * some other semantic, then it should do the access checking 6475 * itself. It would have been nice to have the file open mode 6476 * passed as part of the arguments. 6477 */ 6478 6479 *created = TRUE; 6480 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL); 6481 6482 if (error) { 6483 *created = FALSE; 6484 6485 /* 6486 * If we got something other than file already exists 6487 * then just return this error. Otherwise, we got 6488 * EEXIST. If we were doing a GUARDED create, then 6489 * just return this error. Otherwise, we need to 6490 * make sure that this wasn't a duplicate of an 6491 * exclusive create request. 6492 * 6493 * The assumption is made that a non-exclusive create 6494 * request will never return EEXIST. 6495 */ 6496 6497 if (error != EEXIST || mode == GUARDED4) { 6498 status = puterrno4(error); 6499 return (status); 6500 } 6501 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr, 6502 NULL, NULL, NULL); 6503 6504 if (error) { 6505 /* 6506 * We couldn't find the file that we thought that 6507 * we just created. So, we'll just try creating 6508 * it again. 6509 */ 6510 if (error == ENOENT) 6511 goto tryagain; 6512 6513 status = puterrno4(error); 6514 return (status); 6515 } 6516 6517 if (mode == UNCHECKED4) { 6518 /* existing object must be regular file */ 6519 if ((*vpp)->v_type != VREG) { 6520 if ((*vpp)->v_type == VDIR) 6521 status = NFS4ERR_ISDIR; 6522 else if ((*vpp)->v_type == VLNK) 6523 status = NFS4ERR_SYMLINK; 6524 else 6525 status = NFS4ERR_INVAL; 6526 VN_RELE(*vpp); 6527 return (status); 6528 } 6529 6530 return (NFS4_OK); 6531 } 6532 6533 /* Check for duplicate request */ 6534 va.va_mask = AT_MTIME; 6535 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL); 6536 if (!error) { 6537 /* We found the file */ 6538 const timestruc_t *mtime = &vap->va_mtime; 6539 6540 if (va.va_mtime.tv_sec != mtime->tv_sec || 6541 va.va_mtime.tv_nsec != mtime->tv_nsec) { 6542 /* but its not our creation */ 6543 VN_RELE(*vpp); 6544 return (NFS4ERR_EXIST); 6545 } 6546 *created = TRUE; /* retrans of create == created */ 6547 return (NFS4_OK); 6548 } 6549 VN_RELE(*vpp); 6550 return (NFS4ERR_EXIST); 6551 } 6552 6553 return (NFS4_OK); 6554 } 6555 6556 static nfsstat4 6557 check_open_access(uint32_t access, struct compound_state *cs, 6558 struct svc_req *req) 6559 { 6560 int error; 6561 vnode_t *vp; 6562 bool_t readonly; 6563 cred_t *cr = cs->cr; 6564 6565 /* For now we don't allow mandatory locking as per V2/V3 */ 6566 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) { 6567 return (NFS4ERR_ACCESS); 6568 } 6569 6570 vp = cs->vp; 6571 ASSERT(cr != NULL && vp->v_type == VREG); 6572 6573 /* 6574 * If the file system is exported read only and we are trying 6575 * to open for write, then return NFS4ERR_ROFS 6576 */ 6577 6578 readonly = rdonly4(req, cs); 6579 6580 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly) 6581 return (NFS4ERR_ROFS); 6582 6583 if (access & OPEN4_SHARE_ACCESS_READ) { 6584 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) && 6585 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) { 6586 return (NFS4ERR_ACCESS); 6587 } 6588 } 6589 6590 if (access & OPEN4_SHARE_ACCESS_WRITE) { 6591 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 6592 if (error) 6593 return (NFS4ERR_ACCESS); 6594 } 6595 6596 return (NFS4_OK); 6597 } 6598 6599 static void 6600 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime) 6601 { 6602 timespec32_t *time = (timespec32_t *)&v; 6603 6604 /* 6605 * Ensure no time overflows. Assumes underlying 6606 * filesystem supports at least 32 bits. 6607 * Truncate nsec to usec resolution to allow valid 6608 * compares even if the underlying filesystem truncates. 6609 */ 6610 mtime->tv_sec = time->tv_sec % TIME32_MAX; 6611 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000; 6612 } 6613 6614 static nfsstat4 6615 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, 6616 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid) 6617 { 6618 struct nfs4_svgetit_arg sarg; 6619 struct nfs4_ntov_table ntov; 6620 6621 bool_t ntov_table_init = FALSE; 6622 struct statvfs64 sb; 6623 nfsstat4 status; 6624 vnode_t *vp; 6625 vattr_t bva, ava, iva, cva, *vap; 6626 vnode_t *dvp; 6627 char *nm = NULL; 6628 uint_t buflen; 6629 bool_t created; 6630 bool_t setsize = FALSE; 6631 len_t reqsize; 6632 int error; 6633 bool_t trunc; 6634 caller_context_t ct; 6635 component4 *component; 6636 bslabel_t *clabel; 6637 struct sockaddr *ca; 6638 char *name = NULL; 6639 fattr4 *fattr = NULL; 6640 6641 ASSERT(*attrset == 0); 6642 6643 sarg.sbp = &sb; 6644 sarg.is_referral = B_FALSE; 6645 6646 dvp = cs->vp; 6647 6648 /* Check if the file system is read only */ 6649 if (rdonly4(req, cs)) 6650 return (NFS4ERR_ROFS); 6651 6652 /* check the label of including directory */ 6653 if (is_system_labeled()) { 6654 ASSERT(req->rq_label != NULL); 6655 clabel = req->rq_label; 6656 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 6657 "got client label from request(1)", 6658 struct svc_req *, req); 6659 if (!blequal(&l_admin_low->tsl_label, clabel)) { 6660 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 6661 cs->exi)) { 6662 return (NFS4ERR_ACCESS); 6663 } 6664 } 6665 } 6666 6667 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) && 6668 dvp->v_flag & V_XATTRDIR) { 6669 /* prohibit EXCL create of named attributes */ 6670 return (NFS4ERR_INVAL); 6671 } 6672 6673 /* 6674 * Get the last component of path name in nm. cs will reference 6675 * the including directory on success. 6676 */ 6677 component = &args->claim.open_claim4_u.file; 6678 status = utf8_dir_verify(component); 6679 if (status != NFS4_OK) 6680 return (status); 6681 6682 nm = utf8_to_fn(component, &buflen, NULL); 6683 6684 if (nm == NULL) 6685 return (NFS4ERR_RESOURCE); 6686 6687 if (buflen > MAXNAMELEN) { 6688 kmem_free(nm, buflen); 6689 return (NFS4ERR_NAMETOOLONG); 6690 } 6691 6692 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ; 6693 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6694 if (error) { 6695 kmem_free(nm, buflen); 6696 return (puterrno4(error)); 6697 } 6698 6699 if (bva.va_type != VDIR) { 6700 kmem_free(nm, buflen); 6701 return (NFS4ERR_NOTDIR); 6702 } 6703 6704 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime) 6705 6706 switch (args->mode) { 6707 case GUARDED4: 6708 /*FALLTHROUGH*/ 6709 case UNCHECKED4: 6710 case EXCLUSIVE4_1: 6711 nfs4_ntov_table_init(&ntov); 6712 ntov_table_init = TRUE; 6713 6714 if (args->mode == EXCLUSIVE4_1) 6715 fattr = &args->createhow4_u.ch_createboth.cva_attrs; 6716 else 6717 fattr = &args->createhow4_u.createattrs; 6718 6719 status = do_rfs4_set_attrs(attrset, 6720 fattr, 6721 cs, &sarg, &ntov, NFS4ATTR_SETIT); 6722 6723 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) && 6724 sarg.vap->va_type != VREG) { 6725 if (sarg.vap->va_type == VDIR) 6726 status = NFS4ERR_ISDIR; 6727 else if (sarg.vap->va_type == VLNK) 6728 status = NFS4ERR_SYMLINK; 6729 else 6730 status = NFS4ERR_INVAL; 6731 } 6732 6733 if (status != NFS4_OK) { 6734 kmem_free(nm, buflen); 6735 nfs4_ntov_table_free(&ntov, &sarg); 6736 *attrset = 0; 6737 return (status); 6738 } 6739 6740 vap = sarg.vap; 6741 vap->va_type = VREG; 6742 vap->va_mask |= AT_TYPE; 6743 6744 if ((vap->va_mask & AT_MODE) == 0) { 6745 vap->va_mask |= AT_MODE; 6746 vap->va_mode = (mode_t)0600; 6747 } 6748 6749 if (vap->va_mask & AT_SIZE) { 6750 6751 /* Disallow create with a non-zero size */ 6752 6753 if ((reqsize = sarg.vap->va_size) != 0) { 6754 kmem_free(nm, buflen); 6755 nfs4_ntov_table_free(&ntov, &sarg); 6756 *attrset = 0; 6757 return (NFS4ERR_INVAL); 6758 } 6759 setsize = TRUE; 6760 } 6761 if (args->mode == EXCLUSIVE4_1) { 6762 rfs4_verifier_to_mtime( 6763 args->createhow4_u.ch_createboth.cva_verf, 6764 &vap->va_mtime); 6765 /* attrset will be set later */ 6766 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK; 6767 vap->va_mask |= AT_MTIME; 6768 } 6769 break; 6770 6771 case EXCLUSIVE4: 6772 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE; 6773 cva.va_type = VREG; 6774 cva.va_mode = (mode_t)0; 6775 6776 rfs4_verifier_to_mtime(args->createhow4_u.createverf, 6777 &cva.va_mtime); 6778 6779 vap = &cva; 6780 6781 /* 6782 * For EXCL create, attrset is set to the server attr 6783 * used to cache the client's verifier. 6784 */ 6785 *attrset = FATTR4_TIME_MODIFY_MASK; 6786 break; 6787 } 6788 6789 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6790 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6791 MAXPATHLEN + 1); 6792 6793 if (name == NULL) { 6794 kmem_free(nm, buflen); 6795 return (NFS4ERR_SERVERFAULT); 6796 } 6797 6798 status = create_vnode(dvp, name, vap, args->mode, 6799 cs->cr, &vp, &created); 6800 if (nm != name) 6801 kmem_free(name, MAXPATHLEN + 1); 6802 kmem_free(nm, buflen); 6803 6804 if (status != NFS4_OK) { 6805 if (ntov_table_init) 6806 nfs4_ntov_table_free(&ntov, &sarg); 6807 *attrset = 0; 6808 return (status); 6809 } 6810 6811 trunc = (setsize && !created); 6812 6813 if (args->mode != EXCLUSIVE4) { 6814 bitmap4 createmask = fattr->attrmask; 6815 6816 /* 6817 * True verification that object was created with correct 6818 * attrs is impossible. The attrs could have been changed 6819 * immediately after object creation. If attributes did 6820 * not verify, the only recourse for the server is to 6821 * destroy the object. Maybe if some attrs (like gid) 6822 * are set incorrectly, the object should be destroyed; 6823 * however, seems bad as a default policy. Do we really 6824 * want to destroy an object over one of the times not 6825 * verifying correctly? For these reasons, the server 6826 * currently sets bits in attrset for createattrs 6827 * that were set; however, no verification is done. 6828 * 6829 * vmask_to_nmask accounts for vattr bits set on create 6830 * [do_rfs4_set_attrs() only sets resp bits for 6831 * non-vattr/vfs bits.] 6832 * Mask off any bits we set by default so as not to return 6833 * more attrset bits than were requested in createattrs 6834 */ 6835 if (created) { 6836 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset); 6837 *attrset &= createmask; 6838 } else { 6839 /* 6840 * We did not create the vnode (we tried but it 6841 * already existed). In this case, the only createattr 6842 * that the spec allows the server to set is size, 6843 * and even then, it can only be set if it is 0. 6844 */ 6845 *attrset = 0; 6846 if (trunc) 6847 *attrset = FATTR4_SIZE_MASK; 6848 } 6849 } 6850 if (ntov_table_init) 6851 nfs4_ntov_table_free(&ntov, &sarg); 6852 6853 /* 6854 * Get the initial "after" sequence number, if it fails, 6855 * set to zero, time to before. 6856 */ 6857 iva.va_mask = AT_CTIME|AT_SEQ; 6858 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) { 6859 iva.va_seq = 0; 6860 iva.va_ctime = bva.va_ctime; 6861 } 6862 6863 /* 6864 * create_vnode attempts to create the file exclusive, 6865 * if it already exists the VOP_CREATE will fail and 6866 * may not increase va_seq. It is atomic if 6867 * we haven't changed the directory, but if it has changed 6868 * we don't know what changed it. 6869 */ 6870 if (!created) { 6871 if (bva.va_seq && iva.va_seq && 6872 bva.va_seq == iva.va_seq) 6873 cinfo->atomic = TRUE; 6874 else 6875 cinfo->atomic = FALSE; 6876 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime); 6877 } else { 6878 /* 6879 * The entry was created, we need to sync the 6880 * directory metadata. 6881 */ 6882 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 6883 6884 /* 6885 * Get "after" change value, if it fails, simply return the 6886 * before value. 6887 */ 6888 ava.va_mask = AT_CTIME|AT_SEQ; 6889 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6890 ava.va_ctime = bva.va_ctime; 6891 ava.va_seq = 0; 6892 } 6893 6894 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6895 6896 /* 6897 * The cinfo->atomic = TRUE only if we have 6898 * non-zero va_seq's, and it has incremented by exactly one 6899 * during the create_vnode and it didn't 6900 * change during the VOP_FSYNC. 6901 */ 6902 if (bva.va_seq && iva.va_seq && ava.va_seq && 6903 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 6904 cinfo->atomic = TRUE; 6905 else 6906 cinfo->atomic = FALSE; 6907 } 6908 6909 /* Check for mandatory locking and that the size gets set. */ 6910 cva.va_mask = AT_MODE; 6911 if (setsize) 6912 cva.va_mask |= AT_SIZE; 6913 6914 /* Assume the worst */ 6915 cs->mandlock = TRUE; 6916 6917 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) { 6918 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode); 6919 6920 /* 6921 * Truncate the file if necessary; this would be 6922 * the case for create over an existing file. 6923 */ 6924 6925 if (trunc) { 6926 int in_crit = 0; 6927 rfs4_file_t *fp; 6928 nfs4_srv_t *nsrv4; 6929 bool_t create = FALSE; 6930 6931 /* 6932 * We are writing over an existing file. 6933 * Check to see if we need to recall a delegation. 6934 */ 6935 nsrv4 = nfs4_get_srv(); 6936 rfs4_hold_deleg_policy(nsrv4); 6937 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { 6938 if (rfs4_check_delegated_byfp(FWRITE, fp, 6939 (reqsize == 0), FALSE, FALSE, &clientid)) { 6940 rfs4_file_rele(fp); 6941 rfs4_rele_deleg_policy(nsrv4); 6942 VN_RELE(vp); 6943 *attrset = 0; 6944 return (NFS4ERR_DELAY); 6945 } 6946 rfs4_file_rele(fp); 6947 } 6948 rfs4_rele_deleg_policy(nsrv4); 6949 6950 if (nbl_need_check(vp)) { 6951 in_crit = 1; 6952 6953 ASSERT(reqsize == 0); 6954 6955 nbl_start_crit(vp, RW_READER); 6956 if (nbl_conflict(vp, NBL_WRITE, 0, 6957 cva.va_size, 0, NULL)) { 6958 in_crit = 0; 6959 nbl_end_crit(vp); 6960 VN_RELE(vp); 6961 *attrset = 0; 6962 return (NFS4ERR_ACCESS); 6963 } 6964 } 6965 ct.cc_sysid = 0; 6966 ct.cc_pid = 0; 6967 ct.cc_caller_id = nfs4_srv_caller_id; 6968 ct.cc_flags = CC_DONTBLOCK; 6969 6970 cva.va_mask = AT_SIZE; 6971 cva.va_size = reqsize; 6972 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct); 6973 if (in_crit) 6974 nbl_end_crit(vp); 6975 } 6976 } 6977 6978 error = makefh4(&cs->fh, vp, cs->exi); 6979 6980 /* 6981 * Force modified data and metadata out to stable storage. 6982 */ 6983 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 6984 6985 if (error) { 6986 VN_RELE(vp); 6987 *attrset = 0; 6988 return (puterrno4(error)); 6989 } 6990 6991 /* if parent dir is attrdir, set namedattr fh flag */ 6992 if (dvp->v_flag & V_XATTRDIR) 6993 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 6994 6995 if (cs->vp) 6996 VN_RELE(cs->vp); 6997 6998 cs->vp = vp; 6999 7000 /* 7001 * if we did not create the file, we will need to check 7002 * the access bits on the file 7003 */ 7004 7005 if (!created) { 7006 if (setsize) 7007 args->share_access |= OPEN4_SHARE_ACCESS_WRITE; 7008 status = check_open_access(args->share_access, cs, req); 7009 if (status != NFS4_OK) 7010 *attrset = 0; 7011 } 7012 return (status); 7013 } 7014 7015 static void 7016 close_expired_state(rfs4_entry_t u_entry) 7017 { 7018 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 7019 7020 if (sp->rs_closed) 7021 return; 7022 7023 /* not expired ? */ 7024 if (gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access 7025 <= rfs4_lease_time) 7026 return; 7027 7028 rfs4_state_close(sp, TRUE, TRUE, CRED()); 7029 rfs4_dbe_invalidate(sp->rs_dbe); 7030 } 7031 7032 /*ARGSUSED*/ 7033 static void 7034 rfs4_do_open(struct compound_state *cs, struct svc_req *req, 7035 rfs4_openowner_t *oo, delegreq_t deleg, 7036 uint32_t access, uint32_t deny, 7037 OPEN4res *resp, int deleg_cur) 7038 { 7039 /* XXX Currently not using req */ 7040 rfs4_state_t *sp; 7041 rfs4_file_t *fp; 7042 bool_t screate = TRUE; 7043 bool_t fcreate = TRUE; 7044 uint32_t open_a, share_a; 7045 uint32_t open_d, share_d; 7046 rfs4_deleg_state_t *dsp; 7047 sysid_t sysid; 7048 nfsstat4 status; 7049 caller_context_t ct; 7050 int fflags = 0; 7051 int recall = 0; 7052 int err; 7053 int first_open; 7054 int tries = 0; 7055 7056 /* get the file struct and hold a lock on it during initial open */ 7057 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate); 7058 if (fp == NULL) { 7059 resp->status = NFS4ERR_RESOURCE; 7060 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status); 7061 return; 7062 } 7063 7064 sp = rfs4_findstate_by_owner_file(oo, fp, &screate); 7065 if (sp == NULL) { 7066 resp->status = NFS4ERR_RESOURCE; 7067 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status); 7068 /* No need to keep any reference */ 7069 rw_exit(&fp->rf_file_rwlock); 7070 rfs4_file_rele(fp); 7071 return; 7072 } 7073 7074 /* try to get the sysid before continuing */ 7075 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) { 7076 resp->status = status; 7077 rfs4_file_rele(fp); 7078 /* Not a fully formed open; "close" it */ 7079 if (screate == TRUE) 7080 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7081 rfs4_state_rele(sp); 7082 return; 7083 } 7084 7085 /* Calculate the fflags for this OPEN. */ 7086 if (access & OPEN4_SHARE_ACCESS_READ) 7087 fflags |= FREAD; 7088 if (access & OPEN4_SHARE_ACCESS_WRITE) 7089 fflags |= FWRITE; 7090 7091 again: 7092 rfs4_dbe_lock(sp->rs_dbe); 7093 7094 /* 7095 * Calculate the new deny and access mode that this open is adding to 7096 * the file for this open owner; 7097 */ 7098 open_d = (deny & ~sp->rs_open_deny); 7099 open_a = (access & ~sp->rs_open_access); 7100 7101 /* 7102 * Calculate the new share access and share deny modes that this open 7103 * is adding to the file for this open owner; 7104 */ 7105 share_a = (access & ~sp->rs_share_access); 7106 share_d = (deny & ~sp->rs_share_deny); 7107 7108 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0; 7109 7110 /* 7111 * Check to see the client has already sent an open for this 7112 * open owner on this file with the same share/deny modes. 7113 * If so, we don't need to check for a conflict and we don't 7114 * need to add another shrlock. If not, then we need to 7115 * check for conflicts in deny and access before checking for 7116 * conflicts in delegation. We don't want to recall a 7117 * delegation based on an open that will eventually fail based 7118 * on shares modes. 7119 */ 7120 7121 if (share_a || share_d) { 7122 if ((err = rfs4_share(sp, access, deny)) != 0) { 7123 rfs4_dbe_unlock(sp->rs_dbe); 7124 if (err == NFS4ERR_SHARE_DENIED && ++tries < 2) { 7125 /* 7126 * Cleanup recently expired (not yet cleaned by 7127 * reaper thread) and re-try. 7128 */ 7129 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 7130 7131 rfs4_dbsearch_cb(nsrv4->rfs4_state_file_idx, 7132 sp->rs_finfo, rfs4_lookup_exp_state_max, 7133 close_expired_state); 7134 goto again; 7135 } 7136 7137 resp->status = err; 7138 7139 rfs4_file_rele(fp); 7140 /* Not a fully formed open; "close" it */ 7141 if (screate == TRUE) 7142 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7143 rfs4_state_rele(sp); 7144 return; 7145 } 7146 } 7147 7148 rfs4_dbe_lock(fp->rf_dbe); 7149 7150 /* 7151 * Check to see if this file is delegated and if so, if a 7152 * recall needs to be done. 7153 */ 7154 if (rfs4_check_recall(sp, access)) { 7155 rfs4_dbe_unlock(fp->rf_dbe); 7156 rfs4_dbe_unlock(sp->rs_dbe); 7157 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client); 7158 delay(NFS4_DELEGATION_CONFLICT_DELAY); 7159 rfs4_dbe_lock(sp->rs_dbe); 7160 7161 /* if state closed while lock was dropped */ 7162 if (sp->rs_closed) { 7163 if (share_a || share_d) 7164 (void) rfs4_unshare(sp); 7165 rfs4_dbe_unlock(sp->rs_dbe); 7166 rfs4_file_rele(fp); 7167 /* Not a fully formed open; "close" it */ 7168 if (screate == TRUE) 7169 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7170 rfs4_state_rele(sp); 7171 resp->status = NFS4ERR_OLD_STATEID; 7172 return; 7173 } 7174 7175 rfs4_dbe_lock(fp->rf_dbe); 7176 /* Let's see if the delegation was returned */ 7177 if (rfs4_check_recall(sp, access)) { 7178 rfs4_dbe_unlock(fp->rf_dbe); 7179 if (share_a || share_d) 7180 (void) rfs4_unshare(sp); 7181 rfs4_dbe_unlock(sp->rs_dbe); 7182 rfs4_file_rele(fp); 7183 rfs4_update_lease(sp->rs_owner->ro_client); 7184 7185 /* Not a fully formed open; "close" it */ 7186 if (screate == TRUE) 7187 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7188 rfs4_state_rele(sp); 7189 resp->status = NFS4ERR_DELAY; 7190 return; 7191 } 7192 } 7193 /* 7194 * the share check passed and any delegation conflict has been 7195 * taken care of, now call vop_open. 7196 * if this is the first open then call vop_open with fflags. 7197 * if not, call vn_open_upgrade with just the upgrade flags. 7198 * 7199 * if the file has been opened already, it will have the current 7200 * access mode in the state struct. if it has no share access, then 7201 * this is a new open. 7202 * 7203 * However, if this is open with CLAIM_DLEGATE_CUR, then don't 7204 * call VOP_OPEN(), just do the open upgrade. 7205 */ 7206 if (first_open && !deleg_cur) { 7207 ct.cc_sysid = sysid; 7208 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 7209 ct.cc_caller_id = nfs4_srv_caller_id; 7210 ct.cc_flags = CC_DONTBLOCK; 7211 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct); 7212 if (err) { 7213 rfs4_dbe_unlock(fp->rf_dbe); 7214 if (share_a || share_d) 7215 (void) rfs4_unshare(sp); 7216 rfs4_dbe_unlock(sp->rs_dbe); 7217 rfs4_file_rele(fp); 7218 7219 /* Not a fully formed open; "close" it */ 7220 if (screate == TRUE) 7221 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7222 rfs4_state_rele(sp); 7223 /* check if a monitor detected a delegation conflict */ 7224 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 7225 resp->status = NFS4ERR_DELAY; 7226 else 7227 resp->status = NFS4ERR_SERVERFAULT; 7228 return; 7229 } 7230 } else { /* open upgrade */ 7231 /* 7232 * calculate the fflags for the new mode that is being added 7233 * by this upgrade. 7234 */ 7235 fflags = 0; 7236 if (open_a & OPEN4_SHARE_ACCESS_READ) 7237 fflags |= FREAD; 7238 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7239 fflags |= FWRITE; 7240 vn_open_upgrade(cs->vp, fflags); 7241 } 7242 sp->rs_open_access |= access; 7243 sp->rs_open_deny |= deny; 7244 7245 if (open_d & OPEN4_SHARE_DENY_READ) 7246 fp->rf_deny_read++; 7247 if (open_d & OPEN4_SHARE_DENY_WRITE) 7248 fp->rf_deny_write++; 7249 fp->rf_share_deny |= deny; 7250 7251 if (open_a & OPEN4_SHARE_ACCESS_READ) 7252 fp->rf_access_read++; 7253 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7254 fp->rf_access_write++; 7255 fp->rf_share_access |= access; 7256 7257 /* 7258 * Check for delegation here. if the deleg argument is not 7259 * DELEG_ANY, then this is a reclaim from a client and 7260 * we must honor the delegation requested. If necessary we can 7261 * set the recall flag. 7262 */ 7263 7264 dsp = rfs4_grant_delegation(deleg, sp, &recall); 7265 7266 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE); 7267 7268 next_stateid(&sp->rs_stateid); 7269 7270 resp->stateid = sp->rs_stateid.stateid; 7271 7272 rfs4_dbe_unlock(fp->rf_dbe); 7273 rfs4_dbe_unlock(sp->rs_dbe); 7274 7275 if (dsp) { 7276 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall); 7277 rfs4_deleg_state_rele(dsp); 7278 } 7279 7280 rfs4_file_rele(fp); 7281 rfs4_state_rele(sp); 7282 7283 resp->status = NFS4_OK; 7284 } 7285 7286 /*ARGSUSED*/ 7287 static void 7288 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args, 7289 rfs4_openowner_t *oo, OPEN4res *resp) 7290 { 7291 /* cs->vp and cs->fh have been updated by putfh. */ 7292 rfs4_do_open(cs, req, oo, DELEG_ANY, 7293 (args->share_access & 0xff), args->share_deny, resp, 0); 7294 } 7295 7296 /*ARGSUSED*/ 7297 static void 7298 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req, 7299 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7300 { 7301 change_info4 *cinfo = &resp->cinfo; 7302 bitmap4 *attrset = &resp->attrset; 7303 7304 if (args->opentype == OPEN4_NOCREATE) 7305 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file, 7306 req, cs, args->share_access, cinfo); 7307 else { 7308 /* inhibit delegation grants during exclusive create */ 7309 7310 if (args->mode == EXCLUSIVE4) 7311 rfs4_disable_delegation(); 7312 7313 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset, 7314 oo->ro_client->rc_clientid); 7315 } 7316 7317 if (resp->status == NFS4_OK) { 7318 7319 /* cs->vp cs->fh now reference the desired file */ 7320 7321 rfs4_do_open(cs, req, oo, 7322 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY, 7323 args->share_access, args->share_deny, resp, 0); 7324 7325 /* 7326 * If rfs4_createfile set attrset, we must 7327 * clear this attrset before the response is copied. 7328 */ 7329 if (resp->status != NFS4_OK && resp->attrset) { 7330 resp->attrset = 0; 7331 } 7332 } 7333 else 7334 *cs->statusp = resp->status; 7335 7336 if (args->mode == EXCLUSIVE4) 7337 rfs4_enable_delegation(); 7338 } 7339 7340 /*ARGSUSED*/ 7341 static void 7342 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req, 7343 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7344 { 7345 change_info4 *cinfo = &resp->cinfo; 7346 vattr_t va; 7347 vtype_t v_type = cs->vp->v_type; 7348 int error = 0; 7349 7350 /* Verify that we have a regular file */ 7351 if (v_type != VREG) { 7352 if (v_type == VDIR) 7353 resp->status = NFS4ERR_ISDIR; 7354 else if (v_type == VLNK) 7355 resp->status = NFS4ERR_SYMLINK; 7356 else 7357 resp->status = NFS4ERR_INVAL; 7358 return; 7359 } 7360 7361 va.va_mask = AT_MODE|AT_UID; 7362 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL); 7363 if (error) { 7364 resp->status = puterrno4(error); 7365 return; 7366 } 7367 7368 cs->mandlock = MANDLOCK(cs->vp, va.va_mode); 7369 7370 /* 7371 * Check if we have access to the file, Note the the file 7372 * could have originally been open UNCHECKED or GUARDED 7373 * with mode bits that will now fail, but there is nothing 7374 * we can really do about that except in the case that the 7375 * owner of the file is the one requesting the open. 7376 */ 7377 if (crgetuid(cs->cr) != va.va_uid) { 7378 resp->status = check_open_access(args->share_access, cs, req); 7379 if (resp->status != NFS4_OK) { 7380 return; 7381 } 7382 } 7383 7384 /* 7385 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero 7386 */ 7387 cinfo->before = 0; 7388 cinfo->after = 0; 7389 cinfo->atomic = FALSE; 7390 7391 rfs4_do_open(cs, req, oo, 7392 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type), 7393 args->share_access, args->share_deny, resp, 0); 7394 } 7395 7396 static void 7397 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req, 7398 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7399 { 7400 int error; 7401 nfsstat4 status; 7402 stateid4 stateid = 7403 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid; 7404 rfs4_deleg_state_t *dsp; 7405 7406 /* 7407 * Find the state info from the stateid and confirm that the 7408 * file is delegated. If the state openowner is the same as 7409 * the supplied openowner we're done. If not, get the file 7410 * info from the found state info. Use that file info to 7411 * create the state for this lock owner. Note solaris doen't 7412 * really need the pathname to find the file. We may want to 7413 * lookup the pathname and make sure that the vp exist and 7414 * matches the vp in the file structure. However it is 7415 * possible that the pathname nolonger exists (local process 7416 * unlinks the file), so this may not be that useful. 7417 */ 7418 7419 status = rfs4_get_deleg_state(&stateid, &dsp); 7420 if (status != NFS4_OK) { 7421 resp->status = status; 7422 return; 7423 } 7424 7425 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE); 7426 7427 /* 7428 * New lock owner, create state. Since this was probably called 7429 * in response to a CB_RECALL we set deleg to DELEG_NONE 7430 */ 7431 7432 ASSERT(cs->vp != NULL); 7433 VN_RELE(cs->vp); 7434 VN_HOLD(dsp->rds_finfo->rf_vp); 7435 cs->vp = dsp->rds_finfo->rf_vp; 7436 7437 error = makefh4(&cs->fh, cs->vp, cs->exi); 7438 if (error != 0) { 7439 rfs4_deleg_state_rele(dsp); 7440 *cs->statusp = resp->status = puterrno4(error); 7441 return; 7442 } 7443 7444 /* Mark progress for delegation returns */ 7445 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec(); 7446 rfs4_deleg_state_rele(dsp); 7447 rfs4_do_open(cs, req, oo, DELEG_NONE, 7448 args->share_access, args->share_deny, resp, 1); 7449 } 7450 7451 /*ARGSUSED*/ 7452 static void 7453 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req, 7454 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7455 { 7456 /* 7457 * Lookup the pathname, it must already exist since this file 7458 * was delegated. 7459 * 7460 * Find the file and state info for this vp and open owner pair. 7461 * check that they are in fact delegated. 7462 * check that the state access and deny modes are the same. 7463 * 7464 * Return the delgation possibly seting the recall flag. 7465 */ 7466 rfs4_file_t *fp; 7467 rfs4_state_t *sp; 7468 bool_t create = FALSE; 7469 bool_t dcreate = FALSE; 7470 rfs4_deleg_state_t *dsp; 7471 nfsace4 *ace; 7472 7473 /* Note we ignore oflags */ 7474 resp->status = rfs4_lookupfile( 7475 &args->claim.open_claim4_u.file_delegate_prev, 7476 req, cs, args->share_access, &resp->cinfo); 7477 7478 if (resp->status != NFS4_OK) { 7479 return; 7480 } 7481 7482 /* get the file struct and hold a lock on it during initial open */ 7483 fp = rfs4_findfile_withlock(cs->vp, NULL, &create); 7484 if (fp == NULL) { 7485 resp->status = NFS4ERR_RESOURCE; 7486 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status); 7487 return; 7488 } 7489 7490 sp = rfs4_findstate_by_owner_file(oo, fp, &create); 7491 if (sp == NULL) { 7492 resp->status = NFS4ERR_SERVERFAULT; 7493 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status); 7494 rw_exit(&fp->rf_file_rwlock); 7495 rfs4_file_rele(fp); 7496 return; 7497 } 7498 7499 rfs4_dbe_lock(sp->rs_dbe); 7500 rfs4_dbe_lock(fp->rf_dbe); 7501 if (args->share_access != sp->rs_share_access || 7502 args->share_deny != sp->rs_share_deny || 7503 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 7504 NFS4_DEBUG(rfs4_debug, 7505 (CE_NOTE, "rfs4_do_opendelprev: state mixup")); 7506 rfs4_dbe_unlock(fp->rf_dbe); 7507 rfs4_dbe_unlock(sp->rs_dbe); 7508 rfs4_file_rele(fp); 7509 rfs4_state_rele(sp); 7510 resp->status = NFS4ERR_SERVERFAULT; 7511 return; 7512 } 7513 rfs4_dbe_unlock(fp->rf_dbe); 7514 rfs4_dbe_unlock(sp->rs_dbe); 7515 7516 dsp = rfs4_finddeleg(sp, &dcreate); 7517 if (dsp == NULL) { 7518 rfs4_state_rele(sp); 7519 rfs4_file_rele(fp); 7520 resp->status = NFS4ERR_SERVERFAULT; 7521 return; 7522 } 7523 7524 next_stateid(&sp->rs_stateid); 7525 7526 resp->stateid = sp->rs_stateid.stateid; 7527 7528 resp->delegation.delegation_type = dsp->rds_dtype; 7529 7530 if (dsp->rds_dtype == OPEN_DELEGATE_READ) { 7531 open_read_delegation4 *rv = 7532 &resp->delegation.open_delegation4_u.read; 7533 7534 rv->stateid = dsp->rds_delegid.stateid; 7535 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7536 ace = &rv->permissions; 7537 } else { 7538 open_write_delegation4 *rv = 7539 &resp->delegation.open_delegation4_u.write; 7540 7541 rv->stateid = dsp->rds_delegid.stateid; 7542 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7543 ace = &rv->permissions; 7544 rv->space_limit.limitby = NFS_LIMIT_SIZE; 7545 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX; 7546 } 7547 7548 /* XXX For now */ 7549 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE; 7550 ace->flag = 0; 7551 ace->access_mask = 0; 7552 ace->who.utf8string_len = 0; 7553 ace->who.utf8string_val = 0; 7554 7555 rfs4_deleg_state_rele(dsp); 7556 rfs4_state_rele(sp); 7557 rfs4_file_rele(fp); 7558 } 7559 7560 typedef enum { 7561 NFS4_CHKSEQ_OKAY = 0, 7562 NFS4_CHKSEQ_REPLAY = 1, 7563 NFS4_CHKSEQ_BAD = 2 7564 } rfs4_chkseq_t; 7565 7566 /* 7567 * Generic function for sequence number checks. 7568 */ 7569 static rfs4_chkseq_t 7570 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop, 7571 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres) 7572 { 7573 /* Same sequence ids and matching operations? */ 7574 if (seqid == rqst_seq && resop->resop == lastop->resop) { 7575 if (copyres == TRUE) { 7576 rfs4_free_reply(resop); 7577 rfs4_copy_reply(resop, lastop); 7578 } 7579 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 7580 "Replayed SEQID %d\n", seqid)); 7581 return (NFS4_CHKSEQ_REPLAY); 7582 } 7583 7584 /* If the incoming sequence is not the next expected then it is bad */ 7585 if (rqst_seq != seqid + 1) { 7586 if (rqst_seq == seqid) { 7587 NFS4_DEBUG(rfs4_debug, 7588 (CE_NOTE, "BAD SEQID: Replayed sequence id " 7589 "but last op was %d current op is %d\n", 7590 lastop->resop, resop->resop)); 7591 return (NFS4_CHKSEQ_BAD); 7592 } 7593 NFS4_DEBUG(rfs4_debug, 7594 (CE_NOTE, "BAD SEQID: got %u expecting %u\n", 7595 rqst_seq, seqid)); 7596 return (NFS4_CHKSEQ_BAD); 7597 } 7598 7599 /* Everything okay -- next expected */ 7600 return (NFS4_CHKSEQ_OKAY); 7601 } 7602 7603 7604 static rfs4_chkseq_t 7605 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop, 7606 const compound_state_t *cs) 7607 { 7608 rfs4_chkseq_t rc; 7609 7610 if (rfs4_has_session(cs)) 7611 return (NFS4_CHKSEQ_OKAY); 7612 7613 rfs4_dbe_lock(op->ro_dbe); 7614 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop, 7615 TRUE); 7616 rfs4_dbe_unlock(op->ro_dbe); 7617 7618 if (rc == NFS4_CHKSEQ_OKAY) 7619 rfs4_update_lease(op->ro_client); 7620 7621 return (rc); 7622 } 7623 7624 static rfs4_chkseq_t 7625 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop) 7626 { 7627 rfs4_chkseq_t rc; 7628 7629 rfs4_dbe_lock(op->ro_dbe); 7630 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, 7631 olo_seqid, resop, FALSE); 7632 rfs4_dbe_unlock(op->ro_dbe); 7633 7634 return (rc); 7635 } 7636 7637 static rfs4_chkseq_t 7638 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop) 7639 { 7640 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY; 7641 7642 rfs4_dbe_lock(lsp->rls_dbe); 7643 if (!lsp->rls_skip_seqid_check) 7644 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid, 7645 resop, TRUE); 7646 rfs4_dbe_unlock(lsp->rls_dbe); 7647 7648 return (rc); 7649 } 7650 7651 static void 7652 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop, 7653 struct svc_req *req, struct compound_state *cs) 7654 { 7655 OPEN4args *args = &argop->nfs_argop4_u.opopen; 7656 OPEN4res *resp = &resop->nfs_resop4_u.opopen; 7657 open_owner4 *owner = &args->owner; 7658 open_claim_type4 claim = args->claim.claim; 7659 rfs4_client_t *cp; 7660 rfs4_openowner_t *oo; 7661 bool_t create; 7662 bool_t replay = FALSE; 7663 int can_reclaim; 7664 7665 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs, 7666 OPEN4args *, args); 7667 7668 if (cs->vp == NULL) { 7669 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7670 goto end; 7671 } 7672 7673 /* rfc5661 section 18.16.3 */ 7674 if (rfs4_has_session(cs)) 7675 owner->clientid = cs->client->rc_clientid; 7676 7677 /* 7678 * Need to check clientid and lease expiration first based on 7679 * error ordering and incrementing sequence id. 7680 */ 7681 cp = rfs4_findclient_by_id(owner->clientid, FALSE); 7682 if (cp == NULL) { 7683 *cs->statusp = resp->status = 7684 rfs4_check_clientid(&owner->clientid, 0); 7685 goto end; 7686 } 7687 7688 if (rfs4_lease_expired(cp)) { 7689 rfs4_client_close(cp); 7690 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7691 goto end; 7692 } 7693 can_reclaim = cp->rc_can_reclaim; 7694 7695 /* 7696 * RFC8881 18.51.3 7697 * If non-reclaim locking operations are done before the 7698 * RECLAIM_COMPLETE, error NFS4ERR_GRACE will be returned 7699 */ 7700 if (rfs4_has_session(cs) && !cp->rc_reclaim_completed && 7701 claim != CLAIM_PREVIOUS) { 7702 rfs4_client_rele(cp); 7703 *cs->statusp = resp->status = NFS4ERR_GRACE; 7704 goto end; 7705 } 7706 7707 /* 7708 * Find the open_owner for use from this point forward. Take 7709 * care in updating the sequence id based on the type of error 7710 * being returned. 7711 */ 7712 retry: 7713 create = TRUE; 7714 oo = rfs4_findopenowner(owner, &create, args->seqid); 7715 if (oo == NULL) { 7716 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 7717 rfs4_client_rele(cp); 7718 goto end; 7719 } 7720 7721 /* 7722 * OPEN_CONFIRM must not be implemented in v4.1 7723 */ 7724 if (rfs4_has_session(cs)) { 7725 oo->ro_need_confirm = FALSE; 7726 } 7727 7728 /* Hold off access to the sequence space while the open is done */ 7729 /* Workaround to avoid deadlock */ 7730 if (!rfs4_has_session(cs)) 7731 rfs4_sw_enter(&oo->ro_sw); 7732 7733 /* 7734 * If the open_owner existed before at the server, then check 7735 * the sequence id. 7736 */ 7737 if (!create && !oo->ro_postpone_confirm) { 7738 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) { 7739 case NFS4_CHKSEQ_BAD: 7740 ASSERT(!rfs4_has_session(cs)); 7741 if ((args->seqid > oo->ro_open_seqid) && 7742 oo->ro_need_confirm) { 7743 rfs4_free_opens(oo, TRUE, FALSE); 7744 rfs4_sw_exit(&oo->ro_sw); 7745 rfs4_openowner_rele(oo); 7746 goto retry; 7747 } 7748 resp->status = NFS4ERR_BAD_SEQID; 7749 goto out; 7750 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */ 7751 replay = TRUE; 7752 goto out; 7753 default: 7754 break; 7755 } 7756 7757 /* 7758 * Sequence was ok and open owner exists 7759 * check to see if we have yet to see an 7760 * open_confirm. 7761 */ 7762 if (oo->ro_need_confirm) { 7763 rfs4_free_opens(oo, TRUE, FALSE); 7764 ASSERT(!rfs4_has_session(cs)); 7765 rfs4_sw_exit(&oo->ro_sw); 7766 rfs4_openowner_rele(oo); 7767 goto retry; 7768 } 7769 } 7770 /* Grace only applies to regular-type OPENs */ 7771 if (rfs4_clnt_in_grace(cp) && 7772 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR || 7773 claim == CLAIM_FH)) { 7774 *cs->statusp = resp->status = NFS4ERR_GRACE; 7775 goto out; 7776 } 7777 7778 /* 7779 * If previous state at the server existed then can_reclaim 7780 * will be set. If not reply NFS4ERR_NO_GRACE to the 7781 * client. 7782 */ 7783 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) { 7784 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7785 goto out; 7786 } 7787 7788 7789 /* 7790 * Reject the open if the client has missed the grace period 7791 */ 7792 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) { 7793 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7794 goto out; 7795 } 7796 7797 /* Couple of up-front bookkeeping items */ 7798 if (oo->ro_need_confirm) { 7799 /* 7800 * If this is a reclaim OPEN then we should not ask 7801 * for a confirmation of the open_owner per the 7802 * protocol specification. 7803 */ 7804 if (claim == CLAIM_PREVIOUS) 7805 oo->ro_need_confirm = FALSE; 7806 else 7807 resp->rflags |= OPEN4_RESULT_CONFIRM; 7808 } 7809 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX; 7810 7811 /* 7812 * If there is an unshared filesystem mounted on this vnode, 7813 * do not allow to open/create in this directory. 7814 */ 7815 if (vn_ismntpt(cs->vp)) { 7816 *cs->statusp = resp->status = NFS4ERR_ACCESS; 7817 goto out; 7818 } 7819 7820 /* 7821 * access must READ, WRITE, or BOTH. No access is invalid. 7822 * deny can be READ, WRITE, BOTH, or NONE. 7823 * bits not defined for access/deny are invalid. 7824 */ 7825 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) || 7826 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) || 7827 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) { 7828 *cs->statusp = resp->status = NFS4ERR_INVAL; 7829 goto out; 7830 } 7831 7832 7833 /* 7834 * make sure attrset is zero before response is built. 7835 */ 7836 resp->attrset = 0; 7837 7838 switch (claim) { 7839 case CLAIM_NULL: 7840 rfs4_do_opennull(cs, req, args, oo, resp); 7841 break; 7842 case CLAIM_PREVIOUS: 7843 rfs4_do_openprev(cs, req, args, oo, resp); 7844 break; 7845 case CLAIM_DELEGATE_CUR: 7846 rfs4_do_opendelcur(cs, req, args, oo, resp); 7847 break; 7848 case CLAIM_DELEGATE_PREV: 7849 rfs4_do_opendelprev(cs, req, args, oo, resp); 7850 break; 7851 case CLAIM_FH: 7852 rfs4_do_openfh(cs, req, args, oo, resp); 7853 break; 7854 default: 7855 resp->status = NFS4ERR_INVAL; 7856 break; 7857 } 7858 7859 out: 7860 rfs4_client_rele(cp); 7861 7862 /* Catch sequence id handling here to make it a little easier */ 7863 switch (resp->status) { 7864 case NFS4ERR_BADXDR: 7865 case NFS4ERR_BAD_SEQID: 7866 case NFS4ERR_BAD_STATEID: 7867 case NFS4ERR_NOFILEHANDLE: 7868 case NFS4ERR_RESOURCE: 7869 case NFS4ERR_STALE_CLIENTID: 7870 case NFS4ERR_STALE_STATEID: 7871 /* 7872 * The protocol states that if any of these errors are 7873 * being returned, the sequence id should not be 7874 * incremented. Any other return requires an 7875 * increment. 7876 */ 7877 break; 7878 default: 7879 /* Always update the lease in this case */ 7880 rfs4_update_lease(oo->ro_client); 7881 7882 /* Regular response - copy the result */ 7883 if (!replay) 7884 rfs4_update_open_resp(oo, resop, &cs->fh); 7885 7886 /* 7887 * REPLAY case: Only if the previous response was OK 7888 * do we copy the filehandle. If not OK, no 7889 * filehandle to copy. 7890 */ 7891 if (replay == TRUE && 7892 resp->status == NFS4_OK && 7893 oo->ro_reply_fh.nfs_fh4_val) { 7894 /* 7895 * If this is a replay, we must restore the 7896 * current filehandle/vp to that of what was 7897 * returned originally. Try our best to do 7898 * it. 7899 */ 7900 nfs_fh4_fmt_t *fh_fmtp = 7901 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val; 7902 7903 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, 7904 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 7905 7906 if (cs->exi == NULL) { 7907 resp->status = NFS4ERR_STALE; 7908 goto finish; 7909 } 7910 7911 VN_RELE(cs->vp); 7912 7913 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi, 7914 &resp->status); 7915 7916 if (cs->vp == NULL) 7917 goto finish; 7918 7919 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh); 7920 } 7921 7922 /* 7923 * If this was a replay, no need to update the 7924 * sequence id. If the open_owner was not created on 7925 * this pass, then update. The first use of an 7926 * open_owner will not bump the sequence id. 7927 */ 7928 if (replay == FALSE && !create) 7929 rfs4_update_open_sequence(oo); 7930 /* 7931 * If the client is receiving an error and the 7932 * open_owner needs to be confirmed, there is no way 7933 * to notify the client of this fact ignoring the fact 7934 * that the server has no method of returning a 7935 * stateid to confirm. Therefore, the server needs to 7936 * mark this open_owner in a way as to avoid the 7937 * sequence id checking the next time the client uses 7938 * this open_owner. 7939 */ 7940 if (resp->status != NFS4_OK && oo->ro_need_confirm) 7941 oo->ro_postpone_confirm = TRUE; 7942 /* 7943 * If OK response then clear the postpone flag and 7944 * reset the sequence id to keep in sync with the 7945 * client. 7946 */ 7947 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) { 7948 oo->ro_postpone_confirm = FALSE; 7949 oo->ro_open_seqid = args->seqid; 7950 } 7951 break; 7952 } 7953 7954 finish: 7955 *cs->statusp = resp->status; 7956 7957 if (!rfs4_has_session(cs)) 7958 rfs4_sw_exit(&oo->ro_sw); 7959 rfs4_openowner_rele(oo); 7960 7961 put_stateid4(cs, &resp->stateid); 7962 end: 7963 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs, 7964 OPEN4res *, resp); 7965 } 7966 7967 /*ARGSUSED*/ 7968 void 7969 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 7970 struct svc_req *req, struct compound_state *cs) 7971 { 7972 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm; 7973 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm; 7974 rfs4_state_t *sp; 7975 nfsstat4 status; 7976 7977 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs, 7978 OPEN_CONFIRM4args *, args); 7979 7980 ASSERT(!rfs4_has_session(cs)); 7981 7982 if (cs->vp == NULL) { 7983 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7984 goto out; 7985 } 7986 7987 if (cs->vp->v_type != VREG) { 7988 *cs->statusp = resp->status = 7989 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL; 7990 return; 7991 } 7992 7993 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7994 if (status != NFS4_OK) { 7995 *cs->statusp = resp->status = status; 7996 goto out; 7997 } 7998 7999 /* Ensure specified filehandle matches */ 8000 if (cs->vp != sp->rs_finfo->rf_vp) { 8001 rfs4_state_rele(sp); 8002 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8003 goto out; 8004 } 8005 8006 /* hold off other access to open_owner while we tinker */ 8007 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8008 8009 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8010 case NFS4_CHECK_STATEID_OKAY: 8011 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8012 resop, cs) != 0) { 8013 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8014 break; 8015 } 8016 /* 8017 * If it is the appropriate stateid and determined to 8018 * be "OKAY" then this means that the stateid does not 8019 * need to be confirmed and the client is in error for 8020 * sending an OPEN_CONFIRM. 8021 */ 8022 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8023 break; 8024 case NFS4_CHECK_STATEID_OLD: 8025 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8026 break; 8027 case NFS4_CHECK_STATEID_BAD: 8028 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8029 break; 8030 case NFS4_CHECK_STATEID_EXPIRED: 8031 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8032 break; 8033 case NFS4_CHECK_STATEID_CLOSED: 8034 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8035 break; 8036 case NFS4_CHECK_STATEID_REPLAY: 8037 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8038 resop, cs)) { 8039 case NFS4_CHKSEQ_OKAY: 8040 /* 8041 * This is replayed stateid; if seqid matches 8042 * next expected, then client is using wrong seqid. 8043 */ 8044 /* fall through */ 8045 case NFS4_CHKSEQ_BAD: 8046 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8047 break; 8048 case NFS4_CHKSEQ_REPLAY: 8049 /* 8050 * Note this case is the duplicate case so 8051 * resp->status is already set. 8052 */ 8053 *cs->statusp = resp->status; 8054 rfs4_update_lease(sp->rs_owner->ro_client); 8055 break; 8056 } 8057 break; 8058 case NFS4_CHECK_STATEID_UNCONFIRMED: 8059 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8060 resop, cs) != NFS4_CHKSEQ_OKAY) { 8061 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8062 break; 8063 } 8064 *cs->statusp = resp->status = NFS4_OK; 8065 8066 next_stateid(&sp->rs_stateid); 8067 resp->open_stateid = sp->rs_stateid.stateid; 8068 sp->rs_owner->ro_need_confirm = FALSE; 8069 rfs4_update_lease(sp->rs_owner->ro_client); 8070 rfs4_update_open_sequence(sp->rs_owner); 8071 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8072 break; 8073 default: 8074 ASSERT(FALSE); 8075 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8076 break; 8077 } 8078 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8079 rfs4_state_rele(sp); 8080 8081 out: 8082 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs, 8083 OPEN_CONFIRM4res *, resp); 8084 } 8085 8086 /*ARGSUSED*/ 8087 void 8088 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop, 8089 struct svc_req *req, struct compound_state *cs) 8090 { 8091 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade; 8092 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade; 8093 uint32_t access = args->share_access; 8094 uint32_t deny = args->share_deny; 8095 nfsstat4 status; 8096 rfs4_state_t *sp; 8097 rfs4_file_t *fp; 8098 int fflags = 0; 8099 8100 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs, 8101 OPEN_DOWNGRADE4args *, args); 8102 8103 if (cs->vp == NULL) { 8104 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8105 goto out; 8106 } 8107 8108 if (cs->vp->v_type != VREG) { 8109 *cs->statusp = resp->status = NFS4ERR_INVAL; 8110 return; 8111 } 8112 8113 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 8114 if (status != NFS4_OK) { 8115 *cs->statusp = resp->status = status; 8116 goto out; 8117 } 8118 8119 /* Ensure specified filehandle matches */ 8120 if (cs->vp != sp->rs_finfo->rf_vp) { 8121 rfs4_state_rele(sp); 8122 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8123 goto out; 8124 } 8125 8126 /* hold off other access to open_owner while we tinker */ 8127 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8128 8129 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8130 case NFS4_CHECK_STATEID_OKAY: 8131 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8132 resop, cs) != NFS4_CHKSEQ_OKAY) { 8133 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8134 goto end; 8135 } 8136 break; 8137 case NFS4_CHECK_STATEID_OLD: 8138 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8139 goto end; 8140 case NFS4_CHECK_STATEID_BAD: 8141 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8142 goto end; 8143 case NFS4_CHECK_STATEID_EXPIRED: 8144 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8145 goto end; 8146 case NFS4_CHECK_STATEID_CLOSED: 8147 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8148 goto end; 8149 case NFS4_CHECK_STATEID_UNCONFIRMED: 8150 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8151 goto end; 8152 case NFS4_CHECK_STATEID_REPLAY: 8153 ASSERT(!rfs4_has_session(cs)); 8154 8155 /* Check the sequence id for the open owner */ 8156 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8157 resop, cs)) { 8158 case NFS4_CHKSEQ_OKAY: 8159 /* 8160 * This is replayed stateid; if seqid matches 8161 * next expected, then client is using wrong seqid. 8162 */ 8163 /* fall through */ 8164 case NFS4_CHKSEQ_BAD: 8165 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8166 goto end; 8167 case NFS4_CHKSEQ_REPLAY: 8168 /* 8169 * Note this case is the duplicate case so 8170 * resp->status is already set. 8171 */ 8172 *cs->statusp = resp->status; 8173 rfs4_update_lease(sp->rs_owner->ro_client); 8174 goto end; 8175 } 8176 break; 8177 default: 8178 ASSERT(FALSE); 8179 break; 8180 } 8181 8182 rfs4_dbe_lock(sp->rs_dbe); 8183 /* 8184 * Check that the new access modes and deny modes are valid. 8185 * Check that no invalid bits are set. 8186 */ 8187 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) || 8188 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) { 8189 *cs->statusp = resp->status = NFS4ERR_INVAL; 8190 rfs4_update_open_sequence(sp->rs_owner); 8191 rfs4_dbe_unlock(sp->rs_dbe); 8192 goto end; 8193 } 8194 8195 /* 8196 * The new modes must be a subset of the current modes and 8197 * the access must specify at least one mode. To test that 8198 * the new mode is a subset of the current modes we bitwise 8199 * AND them together and check that the result equals the new 8200 * mode. For example: 8201 * New mode, access == R and current mode, sp->rs_open_access == RW 8202 * access & sp->rs_open_access == R == access, so the new access mode 8203 * is valid. Consider access == RW, sp->rs_open_access = R 8204 * access & sp->rs_open_access == R != access, so the new access mode 8205 * is invalid. 8206 */ 8207 if ((access & sp->rs_open_access) != access || 8208 (deny & sp->rs_open_deny) != deny || 8209 (access & 8210 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) { 8211 *cs->statusp = resp->status = NFS4ERR_INVAL; 8212 rfs4_update_open_sequence(sp->rs_owner); 8213 rfs4_dbe_unlock(sp->rs_dbe); 8214 goto end; 8215 } 8216 8217 /* 8218 * Release any share locks associated with this stateID. 8219 * Strictly speaking, this violates the spec because the 8220 * spec effectively requires that open downgrade be atomic. 8221 * At present, fs_shrlock does not have this capability. 8222 */ 8223 (void) rfs4_unshare(sp); 8224 8225 status = rfs4_share(sp, access, deny); 8226 if (status != NFS4_OK) { 8227 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8228 rfs4_update_open_sequence(sp->rs_owner); 8229 rfs4_dbe_unlock(sp->rs_dbe); 8230 goto end; 8231 } 8232 8233 fp = sp->rs_finfo; 8234 rfs4_dbe_lock(fp->rf_dbe); 8235 8236 /* 8237 * If the current mode has deny read and the new mode 8238 * does not, decrement the number of deny read mode bits 8239 * and if it goes to zero turn off the deny read bit 8240 * on the file. 8241 */ 8242 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) && 8243 (deny & OPEN4_SHARE_DENY_READ) == 0) { 8244 fp->rf_deny_read--; 8245 if (fp->rf_deny_read == 0) 8246 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8247 } 8248 8249 /* 8250 * If the current mode has deny write and the new mode 8251 * does not, decrement the number of deny write mode bits 8252 * and if it goes to zero turn off the deny write bit 8253 * on the file. 8254 */ 8255 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) && 8256 (deny & OPEN4_SHARE_DENY_WRITE) == 0) { 8257 fp->rf_deny_write--; 8258 if (fp->rf_deny_write == 0) 8259 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8260 } 8261 8262 /* 8263 * If the current mode has access read and the new mode 8264 * does not, decrement the number of access read mode bits 8265 * and if it goes to zero turn off the access read bit 8266 * on the file. set fflags to FREAD for the call to 8267 * vn_open_downgrade(). 8268 */ 8269 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) && 8270 (access & OPEN4_SHARE_ACCESS_READ) == 0) { 8271 fp->rf_access_read--; 8272 if (fp->rf_access_read == 0) 8273 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8274 fflags |= FREAD; 8275 } 8276 8277 /* 8278 * If the current mode has access write and the new mode 8279 * does not, decrement the number of access write mode bits 8280 * and if it goes to zero turn off the access write bit 8281 * on the file. set fflags to FWRITE for the call to 8282 * vn_open_downgrade(). 8283 */ 8284 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) && 8285 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) { 8286 fp->rf_access_write--; 8287 if (fp->rf_access_write == 0) 8288 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE; 8289 fflags |= FWRITE; 8290 } 8291 8292 /* Check that the file is still accessible */ 8293 ASSERT(fp->rf_share_access); 8294 8295 rfs4_dbe_unlock(fp->rf_dbe); 8296 8297 /* now set the new open access and deny modes */ 8298 sp->rs_open_access = access; 8299 sp->rs_open_deny = deny; 8300 8301 /* 8302 * we successfully downgraded the share lock, now we need to downgrade 8303 * the open. it is possible that the downgrade was only for a deny 8304 * mode and we have nothing else to do. 8305 */ 8306 if ((fflags & (FREAD|FWRITE)) != 0) 8307 vn_open_downgrade(cs->vp, fflags); 8308 8309 /* Update the stateid */ 8310 next_stateid(&sp->rs_stateid); 8311 resp->open_stateid = sp->rs_stateid.stateid; 8312 8313 rfs4_dbe_unlock(sp->rs_dbe); 8314 8315 *cs->statusp = resp->status = NFS4_OK; 8316 /* Update the lease */ 8317 rfs4_update_lease(sp->rs_owner->ro_client); 8318 /* And the sequence */ 8319 rfs4_update_open_sequence(sp->rs_owner); 8320 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8321 8322 end: 8323 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8324 rfs4_state_rele(sp); 8325 out: 8326 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs, 8327 OPEN_DOWNGRADE4res *, resp); 8328 } 8329 8330 static void * 8331 memstr(const void *s1, const char *s2, size_t n) 8332 { 8333 size_t l = strlen(s2); 8334 char *p = (char *)s1; 8335 8336 while (n >= l) { 8337 if (bcmp(p, s2, l) == 0) 8338 return (p); 8339 p++; 8340 n--; 8341 } 8342 8343 return (NULL); 8344 } 8345 8346 /* 8347 * The logic behind this function is detailed in the NFSv4 RFC in the 8348 * SETCLIENTID operation description under IMPLEMENTATION. Refer to 8349 * that section for explicit guidance to server behavior for 8350 * SETCLIENTID. 8351 */ 8352 void 8353 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop, 8354 struct svc_req *req, struct compound_state *cs) 8355 { 8356 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid; 8357 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid; 8358 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed; 8359 rfs4_clntip_t *ci; 8360 bool_t create; 8361 char *addr, *netid; 8362 int len; 8363 8364 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs, 8365 SETCLIENTID4args *, args); 8366 retry: 8367 newcp = cp_confirmed = cp_unconfirmed = NULL; 8368 8369 /* 8370 * Save the caller's IP address 8371 */ 8372 args->client.cl_addr = 8373 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 8374 8375 /* 8376 * Record if it is a Solaris client that cannot handle referrals. 8377 */ 8378 if (memstr(args->client.id_val, "Solaris", args->client.id_len) && 8379 !memstr(args->client.id_val, "+referrals", args->client.id_len)) { 8380 /* Add a "yes, it's downrev" record */ 8381 create = TRUE; 8382 ci = rfs4_find_clntip(args->client.cl_addr, &create); 8383 ASSERT(ci != NULL); 8384 rfs4_dbe_rele(ci->ri_dbe); 8385 } else { 8386 /* Remove any previous record */ 8387 rfs4_invalidate_clntip(args->client.cl_addr); 8388 } 8389 8390 /* 8391 * In search of an EXISTING client matching the incoming 8392 * request to establish a new client identifier at the server 8393 */ 8394 create = TRUE; 8395 cp = rfs4_findclient(&args->client, &create, NULL); 8396 8397 /* Should never happen */ 8398 ASSERT(cp != NULL); 8399 8400 if (cp == NULL) { 8401 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8402 goto out; 8403 } 8404 8405 /* 8406 * Easiest case. Client identifier is newly created and is 8407 * unconfirmed. Also note that for this case, no other 8408 * entries exist for the client identifier. Nothing else to 8409 * check. Just setup the response and respond. 8410 */ 8411 if (create) { 8412 *cs->statusp = res->status = NFS4_OK; 8413 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid; 8414 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8415 cp->rc_confirm_verf; 8416 /* Setup callback information; CB_NULL confirmation later */ 8417 rfs4_client_setcb(cp, &args->callback, args->callback_ident); 8418 8419 rfs4_client_rele(cp); 8420 goto out; 8421 } 8422 8423 /* 8424 * An existing, confirmed client may exist but it may not have 8425 * been active for at least one lease period. If so, then 8426 * "close" the client and create a new client identifier 8427 */ 8428 if (rfs4_lease_expired(cp)) { 8429 rfs4_client_close(cp); 8430 goto retry; 8431 } 8432 8433 if (cp->rc_need_confirm == TRUE) 8434 cp_unconfirmed = cp; 8435 else 8436 cp_confirmed = cp; 8437 8438 cp = NULL; 8439 8440 /* 8441 * We have a confirmed client, now check for an 8442 * unconfimred entry 8443 */ 8444 if (cp_confirmed) { 8445 /* If creds don't match then client identifier is inuse */ 8446 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) { 8447 rfs4_cbinfo_t *cbp; 8448 /* 8449 * Some one else has established this client 8450 * id. Try and say * who they are. We will use 8451 * the call back address supplied by * the 8452 * first client. 8453 */ 8454 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8455 8456 addr = netid = NULL; 8457 8458 cbp = &cp_confirmed->rc_cbinfo; 8459 if (cbp->cb_callback.cb_location.r_addr && 8460 cbp->cb_callback.cb_location.r_netid) { 8461 cb_client4 *cbcp = &cbp->cb_callback; 8462 8463 len = strlen(cbcp->cb_location.r_addr)+1; 8464 addr = kmem_alloc(len, KM_SLEEP); 8465 bcopy(cbcp->cb_location.r_addr, addr, len); 8466 len = strlen(cbcp->cb_location.r_netid)+1; 8467 netid = kmem_alloc(len, KM_SLEEP); 8468 bcopy(cbcp->cb_location.r_netid, netid, len); 8469 } 8470 8471 res->SETCLIENTID4res_u.client_using.r_addr = addr; 8472 res->SETCLIENTID4res_u.client_using.r_netid = netid; 8473 8474 rfs4_client_rele(cp_confirmed); 8475 } 8476 8477 /* 8478 * Confirmed, creds match, and verifier matches; must 8479 * be an update of the callback info 8480 */ 8481 if (cp_confirmed->rc_nfs_client.verifier == 8482 args->client.verifier) { 8483 /* Setup callback information */ 8484 rfs4_client_setcb(cp_confirmed, &args->callback, 8485 args->callback_ident); 8486 8487 /* everything okay -- move ahead */ 8488 *cs->statusp = res->status = NFS4_OK; 8489 res->SETCLIENTID4res_u.resok4.clientid = 8490 cp_confirmed->rc_clientid; 8491 8492 /* update the confirm_verifier and return it */ 8493 rfs4_client_scv_next(cp_confirmed); 8494 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8495 cp_confirmed->rc_confirm_verf; 8496 8497 rfs4_client_rele(cp_confirmed); 8498 goto out; 8499 } 8500 8501 /* 8502 * Creds match but the verifier doesn't. Must search 8503 * for an unconfirmed client that would be replaced by 8504 * this request. 8505 */ 8506 create = FALSE; 8507 cp_unconfirmed = rfs4_findclient(&args->client, &create, 8508 cp_confirmed); 8509 } 8510 8511 /* 8512 * At this point, we have taken care of the brand new client 8513 * struct, INUSE case, update of an existing, and confirmed 8514 * client struct. 8515 */ 8516 8517 /* 8518 * check to see if things have changed while we originally 8519 * picked up the client struct. If they have, then return and 8520 * retry the processing of this SETCLIENTID request. 8521 */ 8522 if (cp_unconfirmed) { 8523 rfs4_dbe_lock(cp_unconfirmed->rc_dbe); 8524 if (!cp_unconfirmed->rc_need_confirm) { 8525 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8526 rfs4_client_rele(cp_unconfirmed); 8527 if (cp_confirmed) 8528 rfs4_client_rele(cp_confirmed); 8529 goto retry; 8530 } 8531 /* do away with the old unconfirmed one */ 8532 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe); 8533 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8534 rfs4_client_rele(cp_unconfirmed); 8535 cp_unconfirmed = NULL; 8536 } 8537 8538 /* 8539 * This search will temporarily hide the confirmed client 8540 * struct while a new client struct is created as the 8541 * unconfirmed one. 8542 */ 8543 create = TRUE; 8544 newcp = rfs4_findclient(&args->client, &create, cp_confirmed); 8545 8546 ASSERT(newcp != NULL); 8547 8548 if (newcp == NULL) { 8549 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8550 rfs4_client_rele(cp_confirmed); 8551 goto out; 8552 } 8553 8554 /* 8555 * If one was not created, then a similar request must be in 8556 * process so release and start over with this one 8557 */ 8558 if (create != TRUE) { 8559 rfs4_client_rele(newcp); 8560 if (cp_confirmed) 8561 rfs4_client_rele(cp_confirmed); 8562 goto retry; 8563 } 8564 8565 *cs->statusp = res->status = NFS4_OK; 8566 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid; 8567 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8568 newcp->rc_confirm_verf; 8569 /* Setup callback information; CB_NULL confirmation later */ 8570 rfs4_client_setcb(newcp, &args->callback, args->callback_ident); 8571 8572 newcp->rc_cp_confirmed = cp_confirmed; 8573 8574 rfs4_client_rele(newcp); 8575 8576 out: 8577 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs, 8578 SETCLIENTID4res *, res); 8579 } 8580 8581 /*ARGSUSED*/ 8582 void 8583 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 8584 struct svc_req *req, struct compound_state *cs) 8585 { 8586 SETCLIENTID_CONFIRM4args *args = 8587 &argop->nfs_argop4_u.opsetclientid_confirm; 8588 SETCLIENTID_CONFIRM4res *res = 8589 &resop->nfs_resop4_u.opsetclientid_confirm; 8590 rfs4_client_t *cp, *cptoclose = NULL; 8591 nfs4_srv_t *nsrv4; 8592 8593 DTRACE_NFSV4_2(op__setclientid__confirm__start, 8594 struct compound_state *, cs, 8595 SETCLIENTID_CONFIRM4args *, args); 8596 8597 nsrv4 = nfs4_get_srv(); 8598 *cs->statusp = res->status = NFS4_OK; 8599 8600 cp = rfs4_findclient_by_id(args->clientid, TRUE); 8601 8602 if (cp == NULL) { 8603 *cs->statusp = res->status = 8604 rfs4_check_clientid(&args->clientid, 1); 8605 goto out; 8606 } 8607 8608 if (!creds_ok(&cp->rc_cr_set, req, cs)) { 8609 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8610 rfs4_client_rele(cp); 8611 goto out; 8612 } 8613 8614 /* If the verifier doesn't match, the record doesn't match */ 8615 if (cp->rc_confirm_verf != args->setclientid_confirm) { 8616 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID; 8617 rfs4_client_rele(cp); 8618 goto out; 8619 } 8620 8621 rfs4_dbe_lock(cp->rc_dbe); 8622 cp->rc_need_confirm = FALSE; 8623 if (cp->rc_cp_confirmed) { 8624 cptoclose = cp->rc_cp_confirmed; 8625 cptoclose->rc_ss_remove = 1; 8626 cp->rc_cp_confirmed = NULL; 8627 } 8628 8629 /* 8630 * Update the client's associated server instance, if it's changed 8631 * since the client was created. 8632 */ 8633 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst) 8634 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); 8635 8636 /* 8637 * Record clientid in stable storage. 8638 * Must be done after server instance has been assigned. 8639 */ 8640 rfs4_ss_clid(nsrv4, cp); 8641 8642 rfs4_dbe_unlock(cp->rc_dbe); 8643 8644 if (cptoclose) 8645 /* don't need to rele, client_close does it */ 8646 rfs4_client_close(cptoclose); 8647 8648 /* If needed, initiate CB_NULL call for callback path */ 8649 rfs4_deleg_cb_check(cp); 8650 rfs4_update_lease(cp); 8651 8652 /* 8653 * Check to see if client can perform reclaims 8654 */ 8655 rfs4_ss_chkclid(nsrv4, cp); 8656 8657 rfs4_client_rele(cp); 8658 8659 out: 8660 DTRACE_NFSV4_2(op__setclientid__confirm__done, 8661 struct compound_state *, cs, 8662 SETCLIENTID_CONFIRM4 *, res); 8663 } 8664 8665 extern stateid4 invalid_stateid; 8666 8667 /*ARGSUSED*/ 8668 void 8669 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop, 8670 struct svc_req *req, struct compound_state *cs) 8671 { 8672 CLOSE4args *args = &argop->nfs_argop4_u.opclose; 8673 CLOSE4res *resp = &resop->nfs_resop4_u.opclose; 8674 rfs4_state_t *sp; 8675 nfsstat4 status; 8676 8677 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs, 8678 CLOSE4args *, args); 8679 8680 if (cs->vp == NULL) { 8681 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8682 goto out; 8683 } 8684 8685 get_stateid4(cs, &args->open_stateid); 8686 8687 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID); 8688 if (status != NFS4_OK) { 8689 *cs->statusp = resp->status = status; 8690 goto out; 8691 } 8692 8693 /* Ensure specified filehandle matches */ 8694 if (cs->vp != sp->rs_finfo->rf_vp) { 8695 rfs4_state_rele(sp); 8696 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8697 goto out; 8698 } 8699 8700 /* hold off other access to open_owner while we tinker */ 8701 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8702 8703 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8704 case NFS4_CHECK_STATEID_OKAY: 8705 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8706 resop, cs) != NFS4_CHKSEQ_OKAY) { 8707 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8708 goto end; 8709 } 8710 break; 8711 case NFS4_CHECK_STATEID_OLD: 8712 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8713 goto end; 8714 case NFS4_CHECK_STATEID_BAD: 8715 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8716 goto end; 8717 case NFS4_CHECK_STATEID_EXPIRED: 8718 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8719 goto end; 8720 case NFS4_CHECK_STATEID_CLOSED: 8721 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8722 goto end; 8723 case NFS4_CHECK_STATEID_UNCONFIRMED: 8724 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8725 goto end; 8726 case NFS4_CHECK_STATEID_REPLAY: 8727 ASSERT(!rfs4_has_session(cs)); 8728 8729 /* Check the sequence id for the open owner */ 8730 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8731 resop, cs)) { 8732 case NFS4_CHKSEQ_OKAY: 8733 /* 8734 * This is replayed stateid; if seqid matches 8735 * next expected, then client is using wrong seqid. 8736 */ 8737 /* FALL THROUGH */ 8738 case NFS4_CHKSEQ_BAD: 8739 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8740 goto end; 8741 case NFS4_CHKSEQ_REPLAY: 8742 /* 8743 * Note this case is the duplicate case so 8744 * resp->status is already set. 8745 */ 8746 *cs->statusp = resp->status; 8747 rfs4_update_lease(sp->rs_owner->ro_client); 8748 goto end; 8749 } 8750 break; 8751 default: 8752 ASSERT(FALSE); 8753 break; 8754 } 8755 8756 rfs4_dbe_lock(sp->rs_dbe); 8757 8758 /* Update the stateid. */ 8759 next_stateid(&sp->rs_stateid); 8760 rfs4_dbe_unlock(sp->rs_dbe); 8761 8762 rfs4_update_lease(sp->rs_owner->ro_client); 8763 rfs4_update_open_sequence(sp->rs_owner); 8764 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8765 8766 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 8767 8768 /* See RFC8881 section 18.2.4, and RFC7530 section 16.2.5 */ 8769 resp->open_stateid = invalid_stateid; 8770 *cs->statusp = resp->status = status; 8771 8772 end: 8773 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8774 rfs4_state_rele(sp); 8775 out: 8776 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs, 8777 CLOSE4res *, resp); 8778 } 8779 8780 /* 8781 * Manage the counts on the file struct and close all file locks 8782 */ 8783 /*ARGSUSED*/ 8784 void 8785 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr, 8786 bool_t close_of_client) 8787 { 8788 rfs4_file_t *fp = sp->rs_finfo; 8789 rfs4_lo_state_t *lsp; 8790 int fflags = 0; 8791 8792 /* 8793 * If this call is part of the larger closing down of client 8794 * state then it is just easier to release all locks 8795 * associated with this client instead of going through each 8796 * individual file and cleaning locks there. 8797 */ 8798 if (close_of_client) { 8799 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE && 8800 !list_is_empty(&sp->rs_lostatelist) && 8801 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) { 8802 /* Is the PxFS kernel module loaded? */ 8803 if (lm_remove_file_locks != NULL) { 8804 int new_sysid; 8805 8806 /* Encode the cluster nodeid in new sysid */ 8807 new_sysid = sp->rs_owner->ro_client->rc_sysidt; 8808 lm_set_nlmid_flk(&new_sysid); 8809 8810 /* 8811 * This PxFS routine removes file locks for a 8812 * client over all nodes of a cluster. 8813 */ 8814 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 8815 "lm_remove_file_locks(sysid=0x%x)\n", 8816 new_sysid)); 8817 (*lm_remove_file_locks)(new_sysid); 8818 } else { 8819 struct flock64 flk; 8820 8821 /* Release all locks for this client */ 8822 flk.l_type = F_UNLKSYS; 8823 flk.l_whence = 0; 8824 flk.l_start = 0; 8825 flk.l_len = 0; 8826 flk.l_sysid = 8827 sp->rs_owner->ro_client->rc_sysidt; 8828 flk.l_pid = 0; 8829 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK, 8830 &flk, F_REMOTELOCK | FREAD | FWRITE, 8831 (u_offset_t)0, NULL, CRED(), NULL); 8832 } 8833 8834 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE; 8835 } 8836 } 8837 8838 /* 8839 * Release all locks on this file by this lock owner or at 8840 * least mark the locks as having been released 8841 */ 8842 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL; 8843 lsp = list_next(&sp->rs_lostatelist, lsp)) { 8844 lsp->rls_locks_cleaned = TRUE; 8845 8846 /* Was this already taken care of above? */ 8847 if (!close_of_client && 8848 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8849 (void) cleanlocks(sp->rs_finfo->rf_vp, 8850 lsp->rls_locker->rl_pid, 8851 lsp->rls_locker->rl_client->rc_sysidt); 8852 } 8853 8854 /* 8855 * Release any shrlocks associated with this open state ID. 8856 * This must be done before the rfs4_state gets marked closed. 8857 */ 8858 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8859 (void) rfs4_unshare(sp); 8860 8861 if (sp->rs_open_access) { 8862 rfs4_dbe_lock(fp->rf_dbe); 8863 8864 /* 8865 * Decrement the count for each access and deny bit that this 8866 * state has contributed to the file. 8867 * If the file counts go to zero 8868 * clear the appropriate bit in the appropriate mask. 8869 */ 8870 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) { 8871 fp->rf_access_read--; 8872 fflags |= FREAD; 8873 if (fp->rf_access_read == 0) 8874 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8875 } 8876 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) { 8877 fp->rf_access_write--; 8878 fflags |= FWRITE; 8879 if (fp->rf_access_write == 0) 8880 fp->rf_share_access &= 8881 ~OPEN4_SHARE_ACCESS_WRITE; 8882 } 8883 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) { 8884 fp->rf_deny_read--; 8885 if (fp->rf_deny_read == 0) 8886 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8887 } 8888 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) { 8889 fp->rf_deny_write--; 8890 if (fp->rf_deny_write == 0) 8891 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8892 } 8893 8894 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL); 8895 8896 rfs4_dbe_unlock(fp->rf_dbe); 8897 8898 sp->rs_open_access = 0; 8899 sp->rs_open_deny = 0; 8900 } 8901 } 8902 8903 /* 8904 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure. 8905 */ 8906 static nfsstat4 8907 lock_denied(LOCK4denied *dp, struct flock64 *flk) 8908 { 8909 rfs4_lockowner_t *lo; 8910 rfs4_client_t *cp; 8911 uint32_t len; 8912 8913 lo = rfs4_findlockowner_by_pid(flk->l_pid); 8914 if (lo != NULL) { 8915 cp = lo->rl_client; 8916 if (rfs4_lease_expired(cp)) { 8917 rfs4_lockowner_rele(lo); 8918 rfs4_dbe_hold(cp->rc_dbe); 8919 rfs4_client_close(cp); 8920 return (NFS4ERR_EXPIRED); 8921 } 8922 dp->owner.clientid = lo->rl_owner.clientid; 8923 len = lo->rl_owner.owner_len; 8924 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8925 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len); 8926 dp->owner.owner_len = len; 8927 rfs4_lockowner_rele(lo); 8928 goto finish; 8929 } 8930 8931 /* 8932 * Its not a NFS4 lock. We take advantage that the upper 32 bits 8933 * of the client id contain the boot time for a NFS4 lock. So we 8934 * fabricate and identity by setting clientid to the sysid, and 8935 * the lock owner to the pid. 8936 */ 8937 dp->owner.clientid = flk->l_sysid; 8938 len = sizeof (pid_t); 8939 dp->owner.owner_len = len; 8940 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8941 bcopy(&flk->l_pid, dp->owner.owner_val, len); 8942 finish: 8943 dp->offset = flk->l_start; 8944 dp->length = flk->l_len; 8945 8946 if (flk->l_type == F_RDLCK) 8947 dp->locktype = READ_LT; 8948 else if (flk->l_type == F_WRLCK) 8949 dp->locktype = WRITE_LT; 8950 else 8951 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */ 8952 8953 return (NFS4_OK); 8954 } 8955 8956 /* 8957 * The NFSv4.0 LOCK operation does not support the blocking lock (at the 8958 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a 8959 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared 8960 * for that (obviously); they are sending the LOCK requests with some delays 8961 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the 8962 * locking and delay implementation at the client side. 8963 * 8964 * To make the life of the clients easier, the NFSv4.0 server tries to do some 8965 * fast retries on its own (the for loop below) in a hope the lock will be 8966 * available soon. And if not, the client won't need to resend the LOCK 8967 * requests so fast to check the lock availability. This basically saves some 8968 * network traffic and tries to make sure the client gets the lock ASAP. 8969 */ 8970 static int 8971 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred) 8972 { 8973 int error; 8974 struct flock64 flk; 8975 int i; 8976 clock_t delaytime; 8977 int cmd; 8978 int spin_cnt = 0; 8979 8980 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK; 8981 retry: 8982 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 8983 8984 for (i = 0; i < rfs4_maxlock_tries; i++) { 8985 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock); 8986 error = VOP_FRLOCK(vp, cmd, 8987 flock, flag, (u_offset_t)0, NULL, cred, NULL); 8988 8989 if (error != EAGAIN && error != EACCES) 8990 break; 8991 8992 if (i < rfs4_maxlock_tries - 1) { 8993 delay(delaytime); 8994 delaytime *= 2; 8995 } 8996 } 8997 8998 if (error == EAGAIN || error == EACCES) { 8999 /* Get the owner of the lock */ 9000 flk = *flock; 9001 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk); 9002 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred, 9003 NULL) == 0) { 9004 /* 9005 * There's a race inherent in the current VOP_FRLOCK 9006 * design where: 9007 * a: "other guy" takes a lock that conflicts with a 9008 * lock we want 9009 * b: we attempt to take our lock (non-blocking) and 9010 * the attempt fails. 9011 * c: "other guy" releases the conflicting lock 9012 * d: we ask what lock conflicts with the lock we want, 9013 * getting F_UNLCK (no lock blocks us) 9014 * 9015 * If we retry the non-blocking lock attempt in this 9016 * case (restart at step 'b') there's some possibility 9017 * that many such attempts might fail. However a test 9018 * designed to actually provoke this race shows that 9019 * the vast majority of cases require no retry, and 9020 * only a few took as many as three retries. Here's 9021 * the test outcome: 9022 * 9023 * number of retries how many times we needed 9024 * that many retries 9025 * 0 79461 9026 * 1 862 9027 * 2 49 9028 * 3 5 9029 * 9030 * Given those empirical results, we arbitrarily limit 9031 * the retry count to ten. 9032 * 9033 * If we actually make to ten retries and give up, 9034 * nothing catastrophic happens, but we're unable to 9035 * return the information about the conflicting lock to 9036 * the NFS client. That's an acceptable trade off vs. 9037 * letting this retry loop run forever. 9038 */ 9039 if (flk.l_type == F_UNLCK) { 9040 if (spin_cnt++ < 10) { 9041 /* No longer locked, retry */ 9042 goto retry; 9043 } 9044 } else { 9045 *flock = flk; 9046 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)", 9047 F_GETLK, &flk); 9048 } 9049 } 9050 } 9051 9052 return (error); 9053 } 9054 9055 /*ARGSUSED*/ 9056 static nfsstat4 9057 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype, 9058 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop) 9059 { 9060 nfsstat4 status; 9061 rfs4_lockowner_t *lo = lsp->rls_locker; 9062 rfs4_state_t *sp = lsp->rls_state; 9063 struct flock64 flock; 9064 int16_t ltype; 9065 int flag; 9066 int error; 9067 sysid_t sysid; 9068 LOCK4res *lres; 9069 vnode_t *vp; 9070 9071 if (rfs4_lease_expired(lo->rl_client)) { 9072 return (NFS4ERR_EXPIRED); 9073 } 9074 9075 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 9076 return (status); 9077 9078 /* Check for zero length. To lock to end of file use all ones for V4 */ 9079 if (length == 0) 9080 return (NFS4ERR_INVAL); 9081 else if (length == (length4)(~0)) 9082 length = 0; /* Posix to end of file */ 9083 9084 retry: 9085 rfs4_dbe_lock(sp->rs_dbe); 9086 if (sp->rs_closed == TRUE) { 9087 rfs4_dbe_unlock(sp->rs_dbe); 9088 return (NFS4ERR_OLD_STATEID); 9089 } 9090 9091 if (resop->resop != OP_LOCKU) { 9092 switch (locktype) { 9093 case READ_LT: 9094 case READW_LT: 9095 if ((sp->rs_share_access 9096 & OPEN4_SHARE_ACCESS_READ) == 0) { 9097 rfs4_dbe_unlock(sp->rs_dbe); 9098 9099 return (NFS4ERR_OPENMODE); 9100 } 9101 ltype = F_RDLCK; 9102 break; 9103 case WRITE_LT: 9104 case WRITEW_LT: 9105 if ((sp->rs_share_access 9106 & OPEN4_SHARE_ACCESS_WRITE) == 0) { 9107 rfs4_dbe_unlock(sp->rs_dbe); 9108 9109 return (NFS4ERR_OPENMODE); 9110 } 9111 ltype = F_WRLCK; 9112 break; 9113 } 9114 } else 9115 ltype = F_UNLCK; 9116 9117 flock.l_type = ltype; 9118 flock.l_whence = 0; /* SEEK_SET */ 9119 flock.l_start = offset; 9120 flock.l_len = length; 9121 flock.l_sysid = sysid; 9122 flock.l_pid = lsp->rls_locker->rl_pid; 9123 9124 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9125 if (flock.l_len < 0 || flock.l_start < 0) { 9126 rfs4_dbe_unlock(sp->rs_dbe); 9127 return (NFS4ERR_INVAL); 9128 } 9129 9130 /* 9131 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and 9132 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE. 9133 */ 9134 flag = (int)sp->rs_share_access | F_REMOTELOCK; 9135 9136 vp = sp->rs_finfo->rf_vp; 9137 VN_HOLD(vp); 9138 9139 /* 9140 * We need to unlock sp before we call the underlying filesystem to 9141 * acquire the file lock. 9142 */ 9143 rfs4_dbe_unlock(sp->rs_dbe); 9144 9145 error = setlock(vp, &flock, flag, cred); 9146 9147 /* 9148 * Make sure the file is still open. In a case the file was closed in 9149 * the meantime, clean the lock we acquired using the setlock() call 9150 * above, and return the appropriate error. 9151 */ 9152 rfs4_dbe_lock(sp->rs_dbe); 9153 if (sp->rs_closed == TRUE) { 9154 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid); 9155 rfs4_dbe_unlock(sp->rs_dbe); 9156 9157 VN_RELE(vp); 9158 9159 return (NFS4ERR_OLD_STATEID); 9160 } 9161 rfs4_dbe_unlock(sp->rs_dbe); 9162 9163 VN_RELE(vp); 9164 9165 if (error == 0) { 9166 rfs4_dbe_lock(lsp->rls_dbe); 9167 next_stateid(&lsp->rls_lockid); 9168 rfs4_dbe_unlock(lsp->rls_dbe); 9169 } 9170 9171 /* 9172 * N.B. We map error values to nfsv4 errors. This is differrent 9173 * than puterrno4 routine. 9174 */ 9175 switch (error) { 9176 case 0: 9177 status = NFS4_OK; 9178 break; 9179 case EAGAIN: 9180 case EACCES: /* Old value */ 9181 /* Can only get here if op is OP_LOCK */ 9182 ASSERT(resop->resop == OP_LOCK); 9183 lres = &resop->nfs_resop4_u.oplock; 9184 status = NFS4ERR_DENIED; 9185 if (lock_denied(&lres->LOCK4res_u.denied, &flock) 9186 == NFS4ERR_EXPIRED) 9187 goto retry; 9188 break; 9189 case ENOLCK: 9190 status = NFS4ERR_DELAY; 9191 break; 9192 case EOVERFLOW: 9193 status = NFS4ERR_INVAL; 9194 break; 9195 case EINVAL: 9196 status = NFS4ERR_NOTSUPP; 9197 break; 9198 default: 9199 status = NFS4ERR_SERVERFAULT; 9200 break; 9201 } 9202 9203 return (status); 9204 } 9205 9206 /*ARGSUSED*/ 9207 void 9208 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop, 9209 struct svc_req *req, struct compound_state *cs) 9210 { 9211 LOCK4args *args = &argop->nfs_argop4_u.oplock; 9212 LOCK4res *resp = &resop->nfs_resop4_u.oplock; 9213 nfsstat4 status; 9214 stateid4 *stateid; 9215 rfs4_lockowner_t *lo; 9216 rfs4_client_t *cp; 9217 rfs4_state_t *sp = NULL; 9218 rfs4_lo_state_t *lsp = NULL; 9219 bool_t ls_sw_held = FALSE; 9220 bool_t create = TRUE; 9221 bool_t lcreate = TRUE; 9222 bool_t dup_lock = FALSE; 9223 int rc; 9224 9225 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs, 9226 LOCK4args *, args); 9227 9228 if (cs->vp == NULL) { 9229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9230 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9231 cs, LOCK4res *, resp); 9232 return; 9233 } 9234 9235 if (args->locker.new_lock_owner) { 9236 /* Create a new lockowner for this instance */ 9237 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner; 9238 9239 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner")); 9240 9241 stateid = &olo->open_stateid; 9242 get_stateid4(cs, stateid); 9243 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID); 9244 if (status != NFS4_OK) { 9245 NFS4_DEBUG(rfs4_debug, 9246 (CE_NOTE, "Get state failed in lock %d", status)); 9247 *cs->statusp = resp->status = status; 9248 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9249 cs, LOCK4res *, resp); 9250 return; 9251 } 9252 9253 /* Ensure specified filehandle matches */ 9254 if (cs->vp != sp->rs_finfo->rf_vp) { 9255 rfs4_state_rele(sp); 9256 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9257 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9258 cs, LOCK4res *, resp); 9259 return; 9260 } 9261 9262 /* hold off other access to open_owner while we tinker */ 9263 rfs4_sw_enter(&sp->rs_owner->ro_sw); 9264 9265 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) { 9266 case NFS4_CHECK_STATEID_OLD: 9267 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9268 goto end; 9269 case NFS4_CHECK_STATEID_BAD: 9270 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9271 goto end; 9272 case NFS4_CHECK_STATEID_EXPIRED: 9273 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9274 goto end; 9275 case NFS4_CHECK_STATEID_UNCONFIRMED: 9276 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9277 goto end; 9278 case NFS4_CHECK_STATEID_CLOSED: 9279 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9280 goto end; 9281 case NFS4_CHECK_STATEID_OKAY: 9282 if (rfs4_has_session(cs)) 9283 break; 9284 /* FALLTHROUGH */ 9285 case NFS4_CHECK_STATEID_REPLAY: 9286 ASSERT(!rfs4_has_session(cs)); 9287 9288 switch (rfs4_check_olo_seqid(olo->open_seqid, 9289 sp->rs_owner, resop)) { 9290 case NFS4_CHKSEQ_OKAY: 9291 if (rc == NFS4_CHECK_STATEID_OKAY) 9292 break; 9293 /* 9294 * This is replayed stateid; if seqid 9295 * matches next expected, then client 9296 * is using wrong seqid. 9297 */ 9298 /* FALLTHROUGH */ 9299 case NFS4_CHKSEQ_BAD: 9300 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9301 goto end; 9302 case NFS4_CHKSEQ_REPLAY: 9303 /* This is a duplicate LOCK request */ 9304 dup_lock = TRUE; 9305 9306 /* 9307 * For a duplicate we do not want to 9308 * create a new lockowner as it should 9309 * already exist. 9310 * Turn off the lockowner create flag. 9311 */ 9312 lcreate = FALSE; 9313 } 9314 break; 9315 } 9316 9317 /* 9318 * See RFC 8881 18.10.3. MUST be ignored by the server: 9319 * The clientid field of the lock_owner field of the 9320 * open_owner field (locker.open_owner.lock_owner.clientid). 9321 */ 9322 if (rfs4_has_session(cs)) 9323 olo->lock_owner.clientid = cs->client->rc_clientid; 9324 9325 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate); 9326 if (lo == NULL) { 9327 NFS4_DEBUG(rfs4_debug, 9328 (CE_NOTE, "rfs4_op_lock: no lock owner")); 9329 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 9330 goto end; 9331 } 9332 9333 lsp = rfs4_findlo_state_by_owner(lo, sp, &create); 9334 if (lsp == NULL) { 9335 rfs4_update_lease(sp->rs_owner->ro_client); 9336 /* 9337 * Only update theh open_seqid if this is not 9338 * a duplicate request 9339 */ 9340 if (dup_lock == FALSE) { 9341 rfs4_update_open_sequence(sp->rs_owner); 9342 } 9343 9344 NFS4_DEBUG(rfs4_debug, 9345 (CE_NOTE, "rfs4_op_lock: no state")); 9346 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 9347 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9348 rfs4_lockowner_rele(lo); 9349 goto end; 9350 } 9351 9352 /* 9353 * This is the new_lock_owner branch and the client is 9354 * supposed to be associating a new lock_owner with 9355 * the open file at this point. If we find that a 9356 * lock_owner/state association already exists and a 9357 * successful LOCK request was returned to the client, 9358 * an error is returned to the client since this is 9359 * not appropriate. The client should be using the 9360 * existing lock_owner branch. 9361 */ 9362 if (!rfs4_has_session(cs) && !dup_lock && !create) { 9363 if (lsp->rls_lock_completed == TRUE) { 9364 *cs->statusp = 9365 resp->status = NFS4ERR_BAD_SEQID; 9366 rfs4_lockowner_rele(lo); 9367 goto end; 9368 } 9369 } 9370 9371 rfs4_update_lease(sp->rs_owner->ro_client); 9372 9373 /* 9374 * Only update theh open_seqid if this is not 9375 * a duplicate request 9376 */ 9377 if (dup_lock == FALSE) { 9378 rfs4_update_open_sequence(sp->rs_owner); 9379 } 9380 9381 /* 9382 * If this is a duplicate lock request, just copy the 9383 * previously saved reply and return. 9384 */ 9385 if (dup_lock == TRUE) { 9386 /* verify that lock_seqid's match */ 9387 if (lsp->rls_seqid != olo->lock_seqid) { 9388 NFS4_DEBUG(rfs4_debug, 9389 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad" 9390 "lsp->seqid=%d old->seqid=%d", 9391 lsp->rls_seqid, olo->lock_seqid)); 9392 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9393 } else { 9394 rfs4_copy_reply(resop, &lsp->rls_reply); 9395 /* 9396 * Make sure to copy the just 9397 * retrieved reply status into the 9398 * overall compound status 9399 */ 9400 *cs->statusp = resp->status; 9401 } 9402 rfs4_lockowner_rele(lo); 9403 goto end; 9404 } 9405 9406 rfs4_dbe_lock(lsp->rls_dbe); 9407 9408 /* Make sure to update the lock sequence id */ 9409 lsp->rls_seqid = olo->lock_seqid; 9410 9411 NFS4_DEBUG(rfs4_debug, 9412 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid)); 9413 9414 /* 9415 * This is used to signify the newly created lockowner 9416 * stateid and its sequence number. The checks for 9417 * sequence number and increment don't occur on the 9418 * very first lock request for a lockowner. 9419 */ 9420 lsp->rls_skip_seqid_check = TRUE; 9421 9422 /* hold off other access to lsp while we tinker */ 9423 rfs4_sw_enter(&lsp->rls_sw); 9424 ls_sw_held = TRUE; 9425 9426 rfs4_dbe_unlock(lsp->rls_dbe); 9427 9428 rfs4_lockowner_rele(lo); 9429 } else { 9430 stateid = &args->locker.locker4_u.lock_owner.lock_stateid; 9431 /* get lsp and hold the lock on the underlying file struct */ 9432 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) 9433 != NFS4_OK) { 9434 *cs->statusp = resp->status = status; 9435 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9436 cs, LOCK4res *, resp); 9437 return; 9438 } 9439 create = FALSE; /* We didn't create lsp */ 9440 9441 /* Ensure specified filehandle matches */ 9442 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9443 rfs4_lo_state_rele(lsp, TRUE); 9444 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9445 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9446 cs, LOCK4res *, resp); 9447 return; 9448 } 9449 9450 /* hold off other access to lsp while we tinker */ 9451 rfs4_sw_enter(&lsp->rls_sw); 9452 ls_sw_held = TRUE; 9453 9454 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9455 /* 9456 * The stateid looks like it was okay (expected to be 9457 * the next one) 9458 */ 9459 case NFS4_CHECK_STATEID_OKAY: 9460 if (rfs4_has_session(cs)) 9461 break; 9462 9463 /* 9464 * The sequence id is now checked. Determine 9465 * if this is a replay or if it is in the 9466 * expected (next) sequence. In the case of a 9467 * replay, there are two replay conditions 9468 * that may occur. The first is the normal 9469 * condition where a LOCK is done with a 9470 * NFS4_OK response and the stateid is 9471 * updated. That case is handled below when 9472 * the stateid is identified as a REPLAY. The 9473 * second is the case where an error is 9474 * returned, like NFS4ERR_DENIED, and the 9475 * sequence number is updated but the stateid 9476 * is not updated. This second case is dealt 9477 * with here. So it may seem odd that the 9478 * stateid is okay but the sequence id is a 9479 * replay but it is okay. 9480 */ 9481 switch (rfs4_check_lock_seqid( 9482 args->locker.locker4_u.lock_owner.lock_seqid, 9483 lsp, resop)) { 9484 case NFS4_CHKSEQ_REPLAY: 9485 if (resp->status != NFS4_OK) { 9486 /* 9487 * Here is our replay and need 9488 * to verify that the last 9489 * response was an error. 9490 */ 9491 *cs->statusp = resp->status; 9492 goto end; 9493 } 9494 /* 9495 * This is done since the sequence id 9496 * looked like a replay but it didn't 9497 * pass our check so a BAD_SEQID is 9498 * returned as a result. 9499 */ 9500 /*FALLTHROUGH*/ 9501 case NFS4_CHKSEQ_BAD: 9502 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9503 goto end; 9504 case NFS4_CHKSEQ_OKAY: 9505 /* Everything looks okay move ahead */ 9506 break; 9507 } 9508 break; 9509 case NFS4_CHECK_STATEID_OLD: 9510 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9511 goto end; 9512 case NFS4_CHECK_STATEID_BAD: 9513 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9514 goto end; 9515 case NFS4_CHECK_STATEID_EXPIRED: 9516 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9517 goto end; 9518 case NFS4_CHECK_STATEID_CLOSED: 9519 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9520 goto end; 9521 case NFS4_CHECK_STATEID_REPLAY: 9522 ASSERT(!rfs4_has_session(cs)); 9523 9524 switch (rfs4_check_lock_seqid( 9525 args->locker.locker4_u.lock_owner.lock_seqid, 9526 lsp, resop)) { 9527 case NFS4_CHKSEQ_OKAY: 9528 /* 9529 * This is a replayed stateid; if 9530 * seqid matches the next expected, 9531 * then client is using wrong seqid. 9532 */ 9533 case NFS4_CHKSEQ_BAD: 9534 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9535 goto end; 9536 case NFS4_CHKSEQ_REPLAY: 9537 rfs4_update_lease(lsp->rls_locker->rl_client); 9538 *cs->statusp = status = resp->status; 9539 goto end; 9540 } 9541 break; 9542 default: 9543 ASSERT(FALSE); 9544 break; 9545 } 9546 9547 rfs4_update_lock_sequence(lsp); 9548 rfs4_update_lease(lsp->rls_locker->rl_client); 9549 } 9550 9551 /* 9552 * NFS4 only allows locking on regular files, so 9553 * verify type of object. 9554 */ 9555 if (cs->vp->v_type != VREG) { 9556 if (cs->vp->v_type == VDIR) 9557 status = NFS4ERR_ISDIR; 9558 else 9559 status = NFS4ERR_INVAL; 9560 goto out; 9561 } 9562 9563 cp = lsp->rls_state->rs_owner->ro_client; 9564 9565 if (rfs4_clnt_in_grace(cp) && !args->reclaim) { 9566 status = NFS4ERR_GRACE; 9567 goto out; 9568 } 9569 9570 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) { 9571 status = NFS4ERR_NO_GRACE; 9572 goto out; 9573 } 9574 9575 if (!rfs4_clnt_in_grace(cp) && args->reclaim) { 9576 status = NFS4ERR_NO_GRACE; 9577 goto out; 9578 } 9579 9580 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) 9581 cs->deleg = TRUE; 9582 9583 status = rfs4_do_lock(lsp, args->locktype, 9584 args->offset, args->length, cs->cr, resop); 9585 9586 out: 9587 lsp->rls_skip_seqid_check = FALSE; 9588 9589 *cs->statusp = resp->status = status; 9590 9591 if (status == NFS4_OK) { 9592 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid; 9593 lsp->rls_lock_completed = TRUE; 9594 9595 put_stateid4(cs, &resp->LOCK4res_u.lock_stateid); 9596 } 9597 /* 9598 * Only update the "OPEN" response here if this was a new 9599 * lock_owner 9600 */ 9601 if (sp) 9602 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9603 9604 rfs4_update_lock_resp(lsp, resop); 9605 9606 end: 9607 if (lsp) { 9608 if (ls_sw_held) 9609 rfs4_sw_exit(&lsp->rls_sw); 9610 /* 9611 * If an sp obtained, then the lsp does not represent 9612 * a lock on the file struct. 9613 */ 9614 if (sp != NULL) 9615 rfs4_lo_state_rele(lsp, FALSE); 9616 else 9617 rfs4_lo_state_rele(lsp, TRUE); 9618 } 9619 if (sp) { 9620 rfs4_sw_exit(&sp->rs_owner->ro_sw); 9621 rfs4_state_rele(sp); 9622 } 9623 9624 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs, 9625 LOCK4res *, resp); 9626 } 9627 9628 /* free function for LOCK/LOCKT */ 9629 static void 9630 lock_denied_free(nfs_resop4 *resop) 9631 { 9632 LOCK4denied *dp = NULL; 9633 9634 switch (resop->resop) { 9635 case OP_LOCK: 9636 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED) 9637 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied; 9638 break; 9639 case OP_LOCKT: 9640 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED) 9641 dp = &resop->nfs_resop4_u.oplockt.denied; 9642 break; 9643 default: 9644 break; 9645 } 9646 9647 if (dp) 9648 kmem_free(dp->owner.owner_val, dp->owner.owner_len); 9649 } 9650 9651 /*ARGSUSED*/ 9652 void 9653 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop, 9654 struct svc_req *req, struct compound_state *cs) 9655 { 9656 LOCKU4args *args = &argop->nfs_argop4_u.oplocku; 9657 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku; 9658 nfsstat4 status; 9659 stateid4 *stateid = &args->lock_stateid; 9660 rfs4_lo_state_t *lsp; 9661 9662 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs, 9663 LOCKU4args *, args); 9664 9665 if (cs->vp == NULL) { 9666 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9667 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9668 LOCKU4res *, resp); 9669 return; 9670 } 9671 9672 get_stateid4(cs, stateid); 9673 9674 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) { 9675 *cs->statusp = resp->status = status; 9676 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9677 LOCKU4res *, resp); 9678 return; 9679 } 9680 9681 /* Ensure specified filehandle matches */ 9682 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9683 rfs4_lo_state_rele(lsp, TRUE); 9684 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9685 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9686 LOCKU4res *, resp); 9687 return; 9688 } 9689 9690 /* hold off other access to lsp while we tinker */ 9691 rfs4_sw_enter(&lsp->rls_sw); 9692 9693 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9694 case NFS4_CHECK_STATEID_OKAY: 9695 if (rfs4_has_session(cs)) 9696 break; 9697 9698 if (rfs4_check_lock_seqid(args->seqid, lsp, resop) 9699 != NFS4_CHKSEQ_OKAY) { 9700 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9701 goto end; 9702 } 9703 break; 9704 case NFS4_CHECK_STATEID_OLD: 9705 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9706 goto end; 9707 case NFS4_CHECK_STATEID_BAD: 9708 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9709 goto end; 9710 case NFS4_CHECK_STATEID_EXPIRED: 9711 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9712 goto end; 9713 case NFS4_CHECK_STATEID_CLOSED: 9714 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9715 goto end; 9716 case NFS4_CHECK_STATEID_REPLAY: 9717 ASSERT(!rfs4_has_session(cs)); 9718 9719 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) { 9720 case NFS4_CHKSEQ_OKAY: 9721 /* 9722 * This is a replayed stateid; if 9723 * seqid matches the next expected, 9724 * then client is using wrong seqid. 9725 */ 9726 case NFS4_CHKSEQ_BAD: 9727 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9728 goto end; 9729 case NFS4_CHKSEQ_REPLAY: 9730 rfs4_update_lease(lsp->rls_locker->rl_client); 9731 *cs->statusp = status = resp->status; 9732 goto end; 9733 } 9734 break; 9735 default: 9736 ASSERT(FALSE); 9737 break; 9738 } 9739 9740 rfs4_update_lock_sequence(lsp); 9741 rfs4_update_lease(lsp->rls_locker->rl_client); 9742 9743 /* 9744 * NFS4 only allows locking on regular files, so 9745 * verify type of object. 9746 */ 9747 if (cs->vp->v_type != VREG) { 9748 if (cs->vp->v_type == VDIR) 9749 status = NFS4ERR_ISDIR; 9750 else 9751 status = NFS4ERR_INVAL; 9752 goto out; 9753 } 9754 9755 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) { 9756 status = NFS4ERR_GRACE; 9757 goto out; 9758 } 9759 9760 status = rfs4_do_lock(lsp, args->locktype, 9761 args->offset, args->length, cs->cr, resop); 9762 9763 out: 9764 *cs->statusp = resp->status = status; 9765 9766 if (status == NFS4_OK) 9767 resp->lock_stateid = lsp->rls_lockid.stateid; 9768 9769 rfs4_update_lock_resp(lsp, resop); 9770 9771 end: 9772 rfs4_sw_exit(&lsp->rls_sw); 9773 rfs4_lo_state_rele(lsp, TRUE); 9774 9775 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9776 LOCKU4res *, resp); 9777 } 9778 9779 /* 9780 * LOCKT is a best effort routine, the client can not be guaranteed that 9781 * the status return is still in effect by the time the reply is received. 9782 * They are numerous race conditions in this routine, but we are not required 9783 * and can not be accurate. 9784 */ 9785 /*ARGSUSED*/ 9786 void 9787 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop, 9788 struct svc_req *req, struct compound_state *cs) 9789 { 9790 LOCKT4args *args = &argop->nfs_argop4_u.oplockt; 9791 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt; 9792 rfs4_lockowner_t *lo; 9793 rfs4_client_t *cp; 9794 bool_t create = FALSE; 9795 struct flock64 flk; 9796 int error; 9797 int flag = FREAD | FWRITE; 9798 int ltype; 9799 length4 posix_length; 9800 sysid_t sysid; 9801 pid_t pid; 9802 9803 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs, 9804 LOCKT4args *, args); 9805 9806 if (cs->vp == NULL) { 9807 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9808 goto out; 9809 } 9810 9811 /* 9812 * NFS4 only allows locking on regular files, so 9813 * verify type of object. 9814 */ 9815 if (cs->vp->v_type != VREG) { 9816 if (cs->vp->v_type == VDIR) 9817 *cs->statusp = resp->status = NFS4ERR_ISDIR; 9818 else 9819 *cs->statusp = resp->status = NFS4ERR_INVAL; 9820 goto out; 9821 } 9822 9823 /* 9824 * Check out the clientid to ensure the server knows about it 9825 * so that we correctly inform the client of a server reboot. 9826 */ 9827 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE)) 9828 == NULL) { 9829 *cs->statusp = resp->status = 9830 rfs4_check_clientid(&args->owner.clientid, 0); 9831 goto out; 9832 } 9833 if (rfs4_lease_expired(cp)) { 9834 rfs4_client_close(cp); 9835 /* 9836 * Protocol doesn't allow returning NFS4ERR_STALE as 9837 * other operations do on this check so STALE_CLIENTID 9838 * is returned instead 9839 */ 9840 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 9841 goto out; 9842 } 9843 9844 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) { 9845 *cs->statusp = resp->status = NFS4ERR_GRACE; 9846 rfs4_client_rele(cp); 9847 goto out; 9848 } 9849 rfs4_client_rele(cp); 9850 9851 resp->status = NFS4_OK; 9852 9853 switch (args->locktype) { 9854 case READ_LT: 9855 case READW_LT: 9856 ltype = F_RDLCK; 9857 break; 9858 case WRITE_LT: 9859 case WRITEW_LT: 9860 ltype = F_WRLCK; 9861 break; 9862 } 9863 9864 posix_length = args->length; 9865 /* Check for zero length. To lock to end of file use all ones for V4 */ 9866 if (posix_length == 0) { 9867 *cs->statusp = resp->status = NFS4ERR_INVAL; 9868 goto out; 9869 } else if (posix_length == (length4)(~0)) { 9870 posix_length = 0; /* Posix to end of file */ 9871 } 9872 9873 /* 9874 * See RFC 8881 18.11.3: 9875 * The clientid field of the owner MAY be set to any value 9876 * by the client and MUST be ignored by the server. 9877 */ 9878 if (rfs4_has_session(cs)) 9879 args->owner.clientid = cs->client->rc_clientid; 9880 9881 /* Find or create a lockowner */ 9882 lo = rfs4_findlockowner(&args->owner, &create); 9883 9884 if (lo) { 9885 pid = lo->rl_pid; 9886 if ((resp->status = 9887 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 9888 goto err; 9889 } else { 9890 pid = 0; 9891 sysid = lockt_sysid; 9892 } 9893 retry: 9894 flk.l_type = ltype; 9895 flk.l_whence = 0; /* SEEK_SET */ 9896 flk.l_start = args->offset; 9897 flk.l_len = posix_length; 9898 flk.l_sysid = sysid; 9899 flk.l_pid = pid; 9900 flag |= F_REMOTELOCK; 9901 9902 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk); 9903 9904 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9905 if (flk.l_len < 0 || flk.l_start < 0) { 9906 resp->status = NFS4ERR_INVAL; 9907 goto err; 9908 } 9909 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0, 9910 NULL, cs->cr, NULL); 9911 9912 /* 9913 * N.B. We map error values to nfsv4 errors. This is differrent 9914 * than puterrno4 routine. 9915 */ 9916 switch (error) { 9917 case 0: 9918 if (flk.l_type == F_UNLCK) 9919 resp->status = NFS4_OK; 9920 else { 9921 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED) 9922 goto retry; 9923 resp->status = NFS4ERR_DENIED; 9924 } 9925 break; 9926 case EOVERFLOW: 9927 resp->status = NFS4ERR_INVAL; 9928 break; 9929 case EINVAL: 9930 resp->status = NFS4ERR_NOTSUPP; 9931 break; 9932 default: 9933 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)", 9934 error); 9935 resp->status = NFS4ERR_SERVERFAULT; 9936 break; 9937 } 9938 9939 err: 9940 if (lo) 9941 rfs4_lockowner_rele(lo); 9942 *cs->statusp = resp->status; 9943 out: 9944 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs, 9945 LOCKT4res *, resp); 9946 } 9947 9948 int 9949 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny) 9950 { 9951 int err; 9952 int cmd; 9953 vnode_t *vp; 9954 struct shrlock shr; 9955 struct shr_locowner shr_loco; 9956 int fflags = 0; 9957 9958 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 9959 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 9960 9961 if (sp->rs_closed) 9962 return (NFS4ERR_OLD_STATEID); 9963 9964 vp = sp->rs_finfo->rf_vp; 9965 ASSERT(vp); 9966 9967 shr.s_access = shr.s_deny = 0; 9968 9969 if (access & OPEN4_SHARE_ACCESS_READ) { 9970 fflags |= FREAD; 9971 shr.s_access |= F_RDACC; 9972 } 9973 if (access & OPEN4_SHARE_ACCESS_WRITE) { 9974 fflags |= FWRITE; 9975 shr.s_access |= F_WRACC; 9976 } 9977 ASSERT(shr.s_access); 9978 9979 if (deny & OPEN4_SHARE_DENY_READ) 9980 shr.s_deny |= F_RDDNY; 9981 if (deny & OPEN4_SHARE_DENY_WRITE) 9982 shr.s_deny |= F_WRDNY; 9983 9984 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 9985 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 9986 shr_loco.sl_pid = shr.s_pid; 9987 shr_loco.sl_id = shr.s_sysid; 9988 shr.s_owner = (caddr_t)&shr_loco; 9989 shr.s_own_len = sizeof (shr_loco); 9990 9991 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE; 9992 9993 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL); 9994 if (err != 0) { 9995 if (err == EAGAIN) 9996 err = NFS4ERR_SHARE_DENIED; 9997 else 9998 err = puterrno4(err); 9999 return (err); 10000 } 10001 10002 sp->rs_share_access |= access; 10003 sp->rs_share_deny |= deny; 10004 10005 return (0); 10006 } 10007 10008 int 10009 rfs4_unshare(rfs4_state_t *sp) 10010 { 10011 int err; 10012 struct shrlock shr; 10013 struct shr_locowner shr_loco; 10014 10015 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 10016 10017 if (sp->rs_closed || sp->rs_share_access == 0) 10018 return (0); 10019 10020 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 10021 ASSERT(sp->rs_finfo->rf_vp); 10022 10023 shr.s_access = shr.s_deny = 0; 10024 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 10025 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 10026 shr_loco.sl_pid = shr.s_pid; 10027 shr_loco.sl_id = shr.s_sysid; 10028 shr.s_owner = (caddr_t)&shr_loco; 10029 shr.s_own_len = sizeof (shr_loco); 10030 10031 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(), 10032 NULL); 10033 if (err != 0) { 10034 err = puterrno4(err); 10035 return (err); 10036 } 10037 10038 sp->rs_share_access = 0; 10039 sp->rs_share_deny = 0; 10040 10041 return (0); 10042 10043 } 10044 10045 static int 10046 rdma_setup_read_data4(READ4args *args, READ4res *rok) 10047 { 10048 struct clist *wcl; 10049 count4 count = rok->data_len; 10050 int wlist_len; 10051 10052 wcl = args->wlist; 10053 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 10054 return (FALSE); 10055 } 10056 wcl = args->wlist; 10057 rok->wlist_len = wlist_len; 10058 rok->wlist = wcl; 10059 return (TRUE); 10060 } 10061 10062 /* tunable to disable server referrals */ 10063 int rfs4_no_referrals = 0; 10064 10065 /* 10066 * Find an NFS record in reparse point data. 10067 * Returns 0 for success and <0 or an errno value on failure. 10068 */ 10069 int 10070 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap) 10071 { 10072 int err; 10073 char *stype, *val; 10074 nvlist_t *nvl; 10075 nvpair_t *curr; 10076 10077 if ((nvl = reparse_init()) == NULL) 10078 return (-1); 10079 10080 if ((err = reparse_vnode_parse(vp, nvl)) != 0) { 10081 reparse_free(nvl); 10082 return (err); 10083 } 10084 10085 curr = NULL; 10086 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) { 10087 if ((stype = nvpair_name(curr)) == NULL) { 10088 reparse_free(nvl); 10089 return (-2); 10090 } 10091 if (strncasecmp(stype, "NFS", 3) == 0) 10092 break; 10093 } 10094 10095 if ((curr == NULL) || 10096 (nvpair_value_string(curr, &val))) { 10097 reparse_free(nvl); 10098 return (-3); 10099 } 10100 *nvlp = nvl; 10101 *svcp = stype; 10102 *datap = val; 10103 return (0); 10104 } 10105 10106 int 10107 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr) 10108 { 10109 nvlist_t *nvl; 10110 char *s, *d; 10111 10112 if (rfs4_no_referrals != 0) 10113 return (B_FALSE); 10114 10115 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 10116 return (B_FALSE); 10117 10118 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0) 10119 return (B_FALSE); 10120 10121 reparse_free(nvl); 10122 10123 return (B_TRUE); 10124 } 10125 10126 /* 10127 * There is a user-level copy of this routine in ref_subr.c. 10128 * Changes should be kept in sync. 10129 */ 10130 static int 10131 nfs4_create_components(char *path, component4 *comp4) 10132 { 10133 int slen, plen, ncomp; 10134 char *ori_path, *nxtc, buf[MAXNAMELEN]; 10135 10136 if (path == NULL) 10137 return (0); 10138 10139 plen = strlen(path) + 1; /* include the terminator */ 10140 ori_path = path; 10141 ncomp = 0; 10142 10143 /* count number of components in the path */ 10144 for (nxtc = path; nxtc < ori_path + plen; nxtc++) { 10145 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') { 10146 if ((slen = nxtc - path) == 0) { 10147 path = nxtc + 1; 10148 continue; 10149 } 10150 10151 if (comp4 != NULL) { 10152 bcopy(path, buf, slen); 10153 buf[slen] = '\0'; 10154 (void) str_to_utf8(buf, &comp4[ncomp]); 10155 } 10156 10157 ncomp++; /* 1 valid component */ 10158 path = nxtc + 1; 10159 } 10160 if (*nxtc == '\0' || *nxtc == '\n') 10161 break; 10162 } 10163 10164 return (ncomp); 10165 } 10166 10167 /* 10168 * There is a user-level copy of this routine in ref_subr.c. 10169 * Changes should be kept in sync. 10170 */ 10171 static int 10172 make_pathname4(char *path, pathname4 *pathname) 10173 { 10174 int ncomp; 10175 component4 *comp4; 10176 10177 if (pathname == NULL) 10178 return (0); 10179 10180 if (path == NULL) { 10181 pathname->pathname4_val = NULL; 10182 pathname->pathname4_len = 0; 10183 return (0); 10184 } 10185 10186 /* count number of components to alloc buffer */ 10187 if ((ncomp = nfs4_create_components(path, NULL)) == 0) { 10188 pathname->pathname4_val = NULL; 10189 pathname->pathname4_len = 0; 10190 return (0); 10191 } 10192 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP); 10193 10194 /* copy components into allocated buffer */ 10195 ncomp = nfs4_create_components(path, comp4); 10196 10197 pathname->pathname4_val = comp4; 10198 pathname->pathname4_len = ncomp; 10199 10200 return (ncomp); 10201 } 10202 10203 #define xdr_fs_locations4 xdr_fattr4_fs_locations 10204 10205 fs_locations4 * 10206 fetch_referral(vnode_t *vp, cred_t *cr) 10207 { 10208 nvlist_t *nvl; 10209 char *stype, *sdata; 10210 fs_locations4 *result; 10211 char buf[1024]; 10212 size_t bufsize; 10213 XDR xdr; 10214 int err; 10215 10216 /* 10217 * Check attrs to ensure it's a reparse point 10218 */ 10219 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 10220 return (NULL); 10221 10222 /* 10223 * Look for an NFS record and get the type and data 10224 */ 10225 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0) 10226 return (NULL); 10227 10228 /* 10229 * With the type and data, upcall to get the referral 10230 */ 10231 bufsize = sizeof (buf); 10232 bzero(buf, sizeof (buf)); 10233 err = reparse_kderef((const char *)stype, (const char *)sdata, 10234 buf, &bufsize); 10235 reparse_free(nvl); 10236 10237 DTRACE_PROBE4(nfs4serv__func__referral__upcall, 10238 char *, stype, char *, sdata, char *, buf, int, err); 10239 if (err) { 10240 cmn_err(CE_NOTE, 10241 "reparsed daemon not running: unable to get referral (%d)", 10242 err); 10243 return (NULL); 10244 } 10245 10246 /* 10247 * We get an XDR'ed record back from the kderef call 10248 */ 10249 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE); 10250 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP); 10251 err = xdr_fs_locations4(&xdr, result); 10252 XDR_DESTROY(&xdr); 10253 if (err != TRUE) { 10254 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail, 10255 int, err); 10256 return (NULL); 10257 } 10258 10259 /* 10260 * Look at path to recover fs_root, ignoring the leading '/' 10261 */ 10262 (void) make_pathname4(vp->v_path, &result->fs_root); 10263 10264 return (result); 10265 } 10266 10267 char * 10268 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz) 10269 { 10270 fs_locations4 *fsl; 10271 fs_location4 *fs; 10272 char *server, *path, *symbuf; 10273 static char *prefix = "/net/"; 10274 int i, size, npaths; 10275 uint_t len; 10276 10277 /* Get the referral */ 10278 if ((fsl = fetch_referral(vp, cr)) == NULL) 10279 return (NULL); 10280 10281 /* Deal with only the first location and first server */ 10282 fs = &fsl->locations_val[0]; 10283 server = utf8_to_str(&fs->server_val[0], &len, NULL); 10284 if (server == NULL) { 10285 rfs4_free_fs_locations4(fsl); 10286 kmem_free(fsl, sizeof (fs_locations4)); 10287 return (NULL); 10288 } 10289 10290 /* Figure out size for "/net/" + host + /path/path/path + NULL */ 10291 size = strlen(prefix) + len; 10292 for (i = 0; i < fs->rootpath.pathname4_len; i++) 10293 size += fs->rootpath.pathname4_val[i].utf8string_len + 1; 10294 10295 /* Allocate the symlink buffer and fill it */ 10296 symbuf = kmem_zalloc(size, KM_SLEEP); 10297 (void) strcat(symbuf, prefix); 10298 (void) strcat(symbuf, server); 10299 kmem_free(server, len); 10300 10301 npaths = 0; 10302 for (i = 0; i < fs->rootpath.pathname4_len; i++) { 10303 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL); 10304 if (path == NULL) 10305 continue; 10306 (void) strcat(symbuf, "/"); 10307 (void) strcat(symbuf, path); 10308 npaths++; 10309 kmem_free(path, len); 10310 } 10311 10312 rfs4_free_fs_locations4(fsl); 10313 kmem_free(fsl, sizeof (fs_locations4)); 10314 10315 if (strsz != NULL) 10316 *strsz = size; 10317 return (symbuf); 10318 } 10319 10320 /* 10321 * Check to see if we have a downrev Solaris client, so that we 10322 * can send it a symlink instead of a referral. 10323 */ 10324 int 10325 client_is_downrev(struct svc_req *req) 10326 { 10327 struct sockaddr *ca; 10328 rfs4_clntip_t *ci; 10329 bool_t create = FALSE; 10330 int is_downrev; 10331 10332 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 10333 ASSERT(ca); 10334 ci = rfs4_find_clntip(ca, &create); 10335 if (ci == NULL) 10336 return (0); 10337 is_downrev = ci->ri_no_referrals; 10338 rfs4_dbe_rele(ci->ri_dbe); 10339 return (is_downrev); 10340 } 10341 10342 /* 10343 * Do the main work of handling HA-NFSv4 Resource Group failover on 10344 * Sun Cluster. 10345 * We need to detect whether any RG admin paths have been added or removed, 10346 * and adjust resources accordingly. 10347 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In 10348 * order to scale, the list and array of paths need to be held in more 10349 * suitable data structures. 10350 */ 10351 static void 10352 hanfsv4_failover(nfs4_srv_t *nsrv4) 10353 { 10354 int i, start_grace, numadded_paths = 0; 10355 char **added_paths = NULL; 10356 rfs4_dss_path_t *dss_path; 10357 10358 /* 10359 * Note: currently, dss_pathlist cannot be NULL, since 10360 * it will always include an entry for NFS4_DSS_VAR_DIR. If we 10361 * make the latter dynamically specified too, the following will 10362 * need to be adjusted. 10363 */ 10364 10365 /* 10366 * First, look for removed paths: RGs that have been failed-over 10367 * away from this node. 10368 * Walk the "currently-serving" dss_pathlist and, for each 10369 * path, check if it is on the "passed-in" rfs4_dss_newpaths array 10370 * from nfsd. If not, that RG path has been removed. 10371 * 10372 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed 10373 * any duplicates. 10374 */ 10375 dss_path = nsrv4->dss_pathlist; 10376 do { 10377 int found = 0; 10378 char *path = dss_path->path; 10379 10380 /* used only for non-HA so may not be removed */ 10381 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10382 dss_path = dss_path->next; 10383 continue; 10384 } 10385 10386 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10387 int cmpret; 10388 char *newpath = rfs4_dss_newpaths[i]; 10389 10390 /* 10391 * Since nfsd has sorted rfs4_dss_newpaths for us, 10392 * once the return from strcmp is negative we know 10393 * we've passed the point where "path" should be, 10394 * and can stop searching: "path" has been removed. 10395 */ 10396 cmpret = strcmp(path, newpath); 10397 if (cmpret < 0) 10398 break; 10399 if (cmpret == 0) { 10400 found = 1; 10401 break; 10402 } 10403 } 10404 10405 if (found == 0) { 10406 unsigned index = dss_path->index; 10407 rfs4_servinst_t *sip = dss_path->sip; 10408 rfs4_dss_path_t *path_next = dss_path->next; 10409 10410 /* 10411 * This path has been removed. 10412 * We must clear out the servinst reference to 10413 * it, since it's now owned by another 10414 * node: we should not attempt to touch it. 10415 */ 10416 ASSERT(dss_path == sip->dss_paths[index]); 10417 sip->dss_paths[index] = NULL; 10418 10419 /* remove from "currently-serving" list, and destroy */ 10420 remque(dss_path); 10421 /* allow for NUL */ 10422 kmem_free(dss_path->path, strlen(dss_path->path) + 1); 10423 kmem_free(dss_path, sizeof (rfs4_dss_path_t)); 10424 10425 dss_path = path_next; 10426 } else { 10427 /* path was found; not removed */ 10428 dss_path = dss_path->next; 10429 } 10430 } while (dss_path != nsrv4->dss_pathlist); 10431 10432 /* 10433 * Now, look for added paths: RGs that have been failed-over 10434 * to this node. 10435 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, 10436 * for each path, check if it is on the "currently-serving" 10437 * dss_pathlist. If not, that RG path has been added. 10438 * 10439 * Note: we don't do duplicate detection here; nfsd does that for us. 10440 * 10441 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us 10442 * an upper bound for the size needed for added_paths[numadded_paths]. 10443 */ 10444 10445 /* probably more space than we need, but guaranteed to be enough */ 10446 if (rfs4_dss_numnewpaths > 0) { 10447 size_t sz = rfs4_dss_numnewpaths * sizeof (char *); 10448 added_paths = kmem_zalloc(sz, KM_SLEEP); 10449 } 10450 10451 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ 10452 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10453 int found = 0; 10454 char *newpath = rfs4_dss_newpaths[i]; 10455 10456 dss_path = nsrv4->dss_pathlist; 10457 do { 10458 char *path = dss_path->path; 10459 10460 /* used only for non-HA */ 10461 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10462 dss_path = dss_path->next; 10463 continue; 10464 } 10465 10466 if (strncmp(path, newpath, strlen(path)) == 0) { 10467 found = 1; 10468 break; 10469 } 10470 10471 dss_path = dss_path->next; 10472 } while (dss_path != nsrv4->dss_pathlist); 10473 10474 if (found == 0) { 10475 added_paths[numadded_paths] = newpath; 10476 numadded_paths++; 10477 } 10478 } 10479 10480 /* did we find any added paths? */ 10481 if (numadded_paths > 0) { 10482 10483 /* create a new server instance, and start its grace period */ 10484 start_grace = 1; 10485 /* CSTYLED */ 10486 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths); 10487 10488 /* read in the stable storage state from these paths */ 10489 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths); 10490 10491 /* 10492 * Multiple failovers during a grace period will cause 10493 * clients of the same resource group to be partitioned 10494 * into different server instances, with different 10495 * grace periods. Since clients of the same resource 10496 * group must be subject to the same grace period, 10497 * we need to reset all currently active grace periods. 10498 */ 10499 rfs4_grace_reset_all(nsrv4); 10500 } 10501 10502 if (rfs4_dss_numnewpaths > 0) 10503 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); 10504 } 10505