1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 /* 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 33 * Copyright 2019 Nexenta Systems, Inc. 34 * Copyright 2019 Nexenta by DDN, Inc. 35 * Copyright 2021 Racktop Systems, Inc. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/cred.h> 42 #include <sys/buf.h> 43 #include <sys/vfs.h> 44 #include <sys/vfs_opreg.h> 45 #include <sys/vnode.h> 46 #include <sys/uio.h> 47 #include <sys/errno.h> 48 #include <sys/sysmacros.h> 49 #include <sys/statvfs.h> 50 #include <sys/kmem.h> 51 #include <sys/dirent.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/systeminfo.h> 55 #include <sys/flock.h> 56 #include <sys/pathname.h> 57 #include <sys/nbmlock.h> 58 #include <sys/share.h> 59 #include <sys/atomic.h> 60 #include <sys/policy.h> 61 #include <sys/fem.h> 62 #include <sys/sdt.h> 63 #include <sys/ddi.h> 64 #include <sys/zone.h> 65 66 #include <fs/fs_reparse.h> 67 68 #include <rpc/types.h> 69 #include <rpc/auth.h> 70 #include <rpc/rpcsec_gss.h> 71 #include <rpc/svc.h> 72 73 #include <nfs/nfs.h> 74 #include <nfs/nfssys.h> 75 #include <nfs/export.h> 76 #include <nfs/nfs_cmd.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_drc.h> 80 81 #include <sys/strsubr.h> 82 #include <sys/strsun.h> 83 84 #include <inet/common.h> 85 #include <inet/ip.h> 86 #include <inet/ip6.h> 87 88 #include <sys/tsol/label.h> 89 #include <sys/tsol/tndb.h> 90 91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 95 extern struct svc_ops rdma_svc_ops; 96 extern int nfs_loaned_buffers; 97 #define RFS4_LOOKUP_EXP_STATE_MAX 8 /* Limit of loop to clean expired states */ 98 static int rfs4_lookup_exp_state_max = RFS4_LOOKUP_EXP_STATE_MAX; 99 /* End of Tunables */ 100 101 static int rdma_setup_read_data4(READ4args *, READ4res *); 102 103 /* 104 * Used to bump the stateid4.seqid value and show changes in the stateid 105 */ 106 #define next_stateid(sp) (++(sp)->bits.chgseq) 107 108 /* 109 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 110 * This is used to return NFS4ERR_TOOSMALL when clients specify 111 * maxcount that isn't large enough to hold the smallest possible 112 * XDR encoded dirent. 113 * 114 * sizeof cookie (8 bytes) + 115 * sizeof name_len (4 bytes) + 116 * sizeof smallest (padded) name (4 bytes) + 117 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 118 * sizeof attrlist4_len (4 bytes) + 119 * sizeof next boolean (4 bytes) 120 * 121 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 122 * the smallest possible entry4 (assumes no attrs requested). 123 * sizeof nfsstat4 (4 bytes) + 124 * sizeof verifier4 (8 bytes) + 125 * sizeof entry4list bool (4 bytes) + 126 * sizeof entry4 (36 bytes) + 127 * sizeof eof bool (4 bytes) 128 * 129 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 130 * VOP_READDIR. Its value is the size of the maximum possible dirent 131 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 132 * required for a given name length. MAXNAMELEN is the maximum 133 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 134 * macros are to allow for . and .. entries -- just a minor tweak to try 135 * and guarantee that buffer we give to VOP_READDIR will be large enough 136 * to hold ., .., and the largest possible solaris dirent64. 137 */ 138 #define RFS4_MINLEN_ENTRY4 36 139 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 140 #define RFS4_MINLEN_RDDIR_BUF \ 141 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 142 143 /* 144 * It would be better to pad to 4 bytes since that's what XDR would do, 145 * but the dirents UFS gives us are already padded to 8, so just take 146 * what we're given. Dircount is only a hint anyway. Currently the 147 * solaris kernel is ASCII only, so there's no point in calling the 148 * UTF8 functions. 149 * 150 * dirent64: named padded to provide 8 byte struct alignment 151 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 152 * 153 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 154 * 155 */ 156 #define DIRENT64_TO_DIRCOUNT(dp) \ 157 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 158 159 160 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 161 162 u_longlong_t nfs4_srv_caller_id; 163 uint_t nfs4_srv_vkey = 0; 164 165 void rfs4_init_compound_state(struct compound_state *); 166 167 static void nullfree(caddr_t); 168 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 169 struct compound_state *); 170 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 171 struct compound_state *); 172 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 173 struct compound_state *); 174 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 175 struct compound_state *); 176 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 177 struct compound_state *); 178 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 179 struct svc_req *, struct compound_state *); 180 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, 181 struct svc_req *, struct compound_state *); 182 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 183 struct compound_state *); 184 static void rfs4_op_getattr_free(nfs_resop4 *); 185 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 186 struct compound_state *); 187 static void rfs4_op_getfh_free(nfs_resop4 *); 188 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 189 struct compound_state *); 190 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 struct compound_state *); 192 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 struct compound_state *); 194 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 195 struct compound_state *); 196 static void lock_denied_free(nfs_resop4 *); 197 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 198 struct compound_state *); 199 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 200 struct compound_state *); 201 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 202 struct compound_state *); 203 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 204 struct compound_state *); 205 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 206 struct svc_req *req, struct compound_state *cs); 207 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 208 struct compound_state *); 209 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 210 struct compound_state *); 211 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, 212 struct svc_req *, struct compound_state *); 213 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 214 struct svc_req *, struct compound_state *); 215 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 216 struct compound_state *); 217 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 218 struct compound_state *); 219 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 220 struct compound_state *); 221 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 222 struct compound_state *); 223 static void rfs4_op_read_free(nfs_resop4 *); 224 static void rfs4_op_readdir_free(nfs_resop4 *resop); 225 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 226 struct compound_state *); 227 static void rfs4_op_readlink_free(nfs_resop4 *); 228 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 229 struct svc_req *, struct compound_state *); 230 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 231 struct compound_state *); 232 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 233 struct compound_state *); 234 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 235 struct compound_state *); 236 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 237 struct compound_state *); 238 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 239 struct compound_state *); 240 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 241 struct compound_state *); 242 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 243 struct compound_state *); 244 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 245 struct compound_state *); 246 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, 247 struct svc_req *, struct compound_state *); 248 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, 249 struct svc_req *req, struct compound_state *); 250 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 251 struct compound_state *); 252 static void rfs4_op_secinfo_free(nfs_resop4 *); 253 254 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop, 255 struct svc_req *req, struct compound_state *cs); 256 void rfs4x_exchange_id_free(nfs_resop4 *); 257 258 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop, 259 struct svc_req *req, struct compound_state *cs); 260 261 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop, 262 struct svc_req *req, compound_state_t *cs); 263 264 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop, 265 struct svc_req *req, struct compound_state *cs); 266 267 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop, 268 struct svc_req *req, compound_state_t *cs); 269 270 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop, 271 struct svc_req *req, compound_state_t *cs); 272 273 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop, 274 struct svc_req *req, compound_state_t *cs); 275 276 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop, 277 struct svc_req *req, compound_state_t *cs); 278 void rfs4x_op_free_stateid(nfs_argop4 *argop, nfs_resop4 *resop, 279 struct svc_req *req, compound_state_t *cs); 280 281 static nfsstat4 check_open_access(uint32_t, struct compound_state *, 282 struct svc_req *); 283 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 284 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *); 285 286 /* 287 * translation table for attrs 288 */ 289 struct nfs4_ntov_table { 290 union nfs4_attr_u *na; 291 uint8_t amap[NFS4_MAXNUM_ATTRS]; 292 int attrcnt; 293 bool_t vfsstat; 294 }; 295 296 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); 297 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 298 struct nfs4_svgetit_arg *sargp); 299 300 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, 301 struct compound_state *cs, struct nfs4_svgetit_arg *sargp, 302 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); 303 304 static void hanfsv4_failover(nfs4_srv_t *); 305 306 fem_t *deleg_rdops; 307 fem_t *deleg_wrops; 308 309 /* 310 * NFS4 op dispatch table 311 */ 312 313 struct rfsv4disp { 314 void (*dis_proc)(); /* proc to call */ 315 void (*dis_resfree)(); /* frees space allocated by proc */ 316 int dis_flags; /* OP_IDEMPOTENT, etc... */ 317 }; 318 319 #define OP_IDEMPOTENT (1 << 0) 320 #define OP_CLEAR_STATEID (1 << 1) 321 322 static struct rfsv4disp rfsv4disptab[] = { 323 /* 324 * NFS VERSION 4 325 */ 326 327 /* RFS_NULL = 0 */ 328 {rfs4_op_illegal, nullfree, 0}, 329 330 /* UNUSED = 1 */ 331 {rfs4_op_illegal, nullfree, 0}, 332 333 /* UNUSED = 2 */ 334 {rfs4_op_illegal, nullfree, 0}, 335 336 /* OP_ACCESS = 3 */ 337 {rfs4_op_access, nullfree, OP_IDEMPOTENT}, 338 339 /* OP_CLOSE = 4 */ 340 {rfs4_op_close, nullfree, OP_CLEAR_STATEID}, 341 342 /* OP_COMMIT = 5 */ 343 {rfs4_op_commit, nullfree, OP_IDEMPOTENT}, 344 345 /* OP_CREATE = 6 */ 346 {rfs4_op_create, nullfree, OP_CLEAR_STATEID}, 347 348 /* OP_DELEGPURGE = 7 */ 349 {rfs4_op_delegpurge, nullfree, 0}, 350 351 /* OP_DELEGRETURN = 8 */ 352 {rfs4_op_delegreturn, nullfree, 0}, 353 354 /* OP_GETATTR = 9 */ 355 {rfs4_op_getattr, rfs4_op_getattr_free, OP_IDEMPOTENT}, 356 357 /* OP_GETFH = 10 */ 358 {rfs4_op_getfh, rfs4_op_getfh_free, OP_IDEMPOTENT}, 359 360 /* OP_LINK = 11 */ 361 {rfs4_op_link, nullfree, 0}, 362 363 /* OP_LOCK = 12 */ 364 {rfs4_op_lock, lock_denied_free, 0}, 365 366 /* OP_LOCKT = 13 */ 367 {rfs4_op_lockt, lock_denied_free, 0}, 368 369 /* OP_LOCKU = 14 */ 370 {rfs4_op_locku, nullfree, 0}, 371 372 /* OP_LOOKUP = 15 */ 373 {rfs4_op_lookup, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 374 375 /* OP_LOOKUPP = 16 */ 376 {rfs4_op_lookupp, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 377 378 /* OP_NVERIFY = 17 */ 379 {rfs4_op_nverify, nullfree, OP_IDEMPOTENT}, 380 381 /* OP_OPEN = 18 */ 382 {rfs4_op_open, rfs4_free_reply, 0}, 383 384 /* OP_OPENATTR = 19 */ 385 {rfs4_op_openattr, nullfree, 0}, 386 387 /* OP_OPEN_CONFIRM = 20 */ 388 {rfs4_op_open_confirm, nullfree, 0}, 389 390 /* OP_OPEN_DOWNGRADE = 21 */ 391 {rfs4_op_open_downgrade, nullfree, 0}, 392 393 /* OP_OPEN_PUTFH = 22 */ 394 {rfs4_op_putfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 395 396 /* OP_PUTPUBFH = 23 */ 397 {rfs4_op_putpubfh, nullfree, OP_IDEMPOTENT}, 398 399 /* OP_PUTROOTFH = 24 */ 400 {rfs4_op_putrootfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)}, 401 402 /* OP_READ = 25 */ 403 {rfs4_op_read, rfs4_op_read_free, OP_IDEMPOTENT}, 404 405 /* OP_READDIR = 26 */ 406 {rfs4_op_readdir, rfs4_op_readdir_free, OP_IDEMPOTENT}, 407 408 /* OP_READLINK = 27 */ 409 {rfs4_op_readlink, rfs4_op_readlink_free, OP_IDEMPOTENT}, 410 411 /* OP_REMOVE = 28 */ 412 {rfs4_op_remove, nullfree, 0}, 413 414 /* OP_RENAME = 29 */ 415 {rfs4_op_rename, nullfree, 0}, 416 417 /* OP_RENEW = 30 */ 418 {rfs4_op_renew, nullfree, 0}, 419 420 /* OP_RESTOREFH = 31 */ 421 {rfs4_op_restorefh, nullfree, OP_IDEMPOTENT}, 422 423 /* OP_SAVEFH = 32 */ 424 {rfs4_op_savefh, nullfree, OP_IDEMPOTENT}, 425 426 /* OP_SECINFO = 33 */ 427 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0}, 428 429 /* OP_SETATTR = 34 */ 430 {rfs4_op_setattr, nullfree, 0}, 431 432 /* OP_SETCLIENTID = 35 */ 433 {rfs4_op_setclientid, nullfree, 0}, 434 435 /* OP_SETCLIENTID_CONFIRM = 36 */ 436 {rfs4_op_setclientid_confirm, nullfree, 0}, 437 438 /* OP_VERIFY = 37 */ 439 {rfs4_op_verify, nullfree, OP_IDEMPOTENT}, 440 441 /* OP_WRITE = 38 */ 442 {rfs4_op_write, nullfree, 0}, 443 444 /* OP_RELEASE_LOCKOWNER = 39 */ 445 {rfs4_op_release_lockowner, nullfree, 0}, 446 447 /* 448 * NFSv4.1 operations 449 */ 450 451 /* OP_BACKCHANNEL_CTL = 40 */ 452 {rfs4_op_notsup, nullfree, 0}, 453 454 /* OP_BIND_CONN_TO_SESSION = 41 */ 455 {rfs4x_op_bind_conn_to_session, nullfree, 0}, 456 457 /* OP_EXCHANGE_ID = 42 */ 458 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0}, 459 460 /* OP_CREATE_SESSION = 43 */ 461 {rfs4x_op_create_session, nullfree, 0}, 462 463 /* OP_DESTROY_SESSION = 44 */ 464 {rfs4x_op_destroy_session, nullfree, 0}, 465 466 /* OP_FREE_STATEID = 45 */ 467 {rfs4x_op_free_stateid, nullfree, 0}, 468 469 /* OP_GET_DIR_DELEGATION = 46 */ 470 {rfs4_op_notsup, nullfree, 0}, 471 472 /* OP_GETDEVICEINFO = 47 */ 473 {rfs4_op_notsup, nullfree, 0}, 474 475 /* OP_GETDEVICELIST = 48 */ 476 {rfs4_op_notsup, nullfree, 0}, 477 478 /* OP_LAYOUTCOMMIT = 49 */ 479 {rfs4_op_notsup, nullfree, 0}, 480 481 /* OP_LAYOUTGET = 50 */ 482 {rfs4_op_notsup, nullfree, 0}, 483 484 /* OP_LAYOUTRETURN = 51 */ 485 {rfs4_op_notsup, nullfree, 0}, 486 487 /* OP_SECINFO_NO_NAME = 52 */ 488 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0}, 489 490 /* OP_SEQUENCE = 53 */ 491 {rfs4x_op_sequence, nullfree, 0}, 492 493 /* OP_SET_SSV = 54 */ 494 {rfs4_op_notsup, nullfree, 0}, 495 496 /* OP_TEST_STATEID = 55 */ 497 {rfs4_op_notsup, nullfree, 0}, 498 499 /* OP_WANT_DELEGATION = 56 */ 500 {rfs4_op_notsup, nullfree, 0}, 501 502 /* OP_DESTROY_CLIENTID = 57 */ 503 {rfs4x_op_destroy_clientid, nullfree, 0}, 504 505 /* OP_RECLAIM_COMPLETE = 58 */ 506 {rfs4x_op_reclaim_complete, nullfree, 0}, 507 }; 508 509 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]); 510 511 #define OP_ILLEGAL_IDX (rfsv4disp_cnt) 512 513 #ifdef DEBUG 514 515 int rfs4_fillone_debug = 0; 516 int rfs4_no_stub_access = 1; 517 int rfs4_rddir_debug = 0; 518 519 static char *rfs4_op_string[] = { 520 "rfs4_op_null", 521 "rfs4_op_1 unused", 522 "rfs4_op_2 unused", 523 "rfs4_op_access", 524 "rfs4_op_close", 525 "rfs4_op_commit", 526 "rfs4_op_create", 527 "rfs4_op_delegpurge", 528 "rfs4_op_delegreturn", 529 "rfs4_op_getattr", 530 "rfs4_op_getfh", 531 "rfs4_op_link", 532 "rfs4_op_lock", 533 "rfs4_op_lockt", 534 "rfs4_op_locku", 535 "rfs4_op_lookup", 536 "rfs4_op_lookupp", 537 "rfs4_op_nverify", 538 "rfs4_op_open", 539 "rfs4_op_openattr", 540 "rfs4_op_open_confirm", 541 "rfs4_op_open_downgrade", 542 "rfs4_op_putfh", 543 "rfs4_op_putpubfh", 544 "rfs4_op_putrootfh", 545 "rfs4_op_read", 546 "rfs4_op_readdir", 547 "rfs4_op_readlink", 548 "rfs4_op_remove", 549 "rfs4_op_rename", 550 "rfs4_op_renew", 551 "rfs4_op_restorefh", 552 "rfs4_op_savefh", 553 "rfs4_op_secinfo", 554 "rfs4_op_setattr", 555 "rfs4_op_setclientid", 556 "rfs4_op_setclient_confirm", 557 "rfs4_op_verify", 558 "rfs4_op_write", 559 "rfs4_op_release_lockowner", 560 /* NFSv4.1 */ 561 "backchannel_ctl", 562 "bind_conn_to_session", 563 "exchange_id", 564 "create_session", 565 "destroy_session", 566 "free_stateid", 567 "get_dir_delegation", 568 "getdeviceinfo", 569 "getdevicelist", 570 "layoutcommit", 571 "layoutget", 572 "layoutreturn", 573 "secinfo_no_name", 574 "sequence", 575 "set_ssv", 576 "test_stateid", 577 "want_delegation", 578 "destroy_clientid", 579 "reclaim_complete", 580 "rfs4_op_illegal" 581 }; 582 583 #endif 584 585 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *); 586 587 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 588 589 extern void rfs4_free_fs_locations4(fs_locations4 *); 590 591 #ifdef nextdp 592 #undef nextdp 593 #endif 594 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 595 596 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = { 597 VOPNAME_OPEN, { .femop_open = deleg_rd_open }, 598 VOPNAME_WRITE, { .femop_write = deleg_rd_write }, 599 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr }, 600 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock }, 601 VOPNAME_SPACE, { .femop_space = deleg_rd_space }, 602 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr }, 603 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent }, 604 NULL, NULL 605 }; 606 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { 607 VOPNAME_OPEN, { .femop_open = deleg_wr_open }, 608 VOPNAME_READ, { .femop_read = deleg_wr_read }, 609 VOPNAME_WRITE, { .femop_write = deleg_wr_write }, 610 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr }, 611 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock }, 612 VOPNAME_SPACE, { .femop_space = deleg_wr_space }, 613 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr }, 614 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent }, 615 NULL, NULL 616 }; 617 618 619 nfs4_srv_t * 620 nfs4_get_srv(void) 621 { 622 nfs_globals_t *ng = nfs_srv_getzg(); 623 nfs4_srv_t *srv = ng->nfs4_srv; 624 ASSERT(srv != NULL); 625 return (srv); 626 } 627 628 void 629 rfs4_srv_zone_init(nfs_globals_t *ng) 630 { 631 nfs4_srv_t *nsrv4; 632 timespec32_t verf; 633 634 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP); 635 636 /* 637 * The following algorithm attempts to find a unique verifier 638 * to be used as the write verifier returned from the server 639 * to the client. It is important that this verifier change 640 * whenever the server reboots. Of secondary importance, it 641 * is important for the verifier to be unique between two 642 * different servers. 643 * 644 * Thus, an attempt is made to use the system hostid and the 645 * current time in seconds when the nfssrv kernel module is 646 * loaded. It is assumed that an NFS server will not be able 647 * to boot and then to reboot in less than a second. If the 648 * hostid has not been set, then the current high resolution 649 * time is used. This will ensure different verifiers each 650 * time the server reboots and minimize the chances that two 651 * different servers will have the same verifier. 652 * XXX - this is broken on LP64 kernels. 653 */ 654 verf.tv_sec = (time_t)zone_get_hostid(NULL); 655 if (verf.tv_sec != 0) { 656 verf.tv_nsec = gethrestime_sec(); 657 } else { 658 timespec_t tverf; 659 660 gethrestime(&tverf); 661 verf.tv_sec = (time_t)tverf.tv_sec; 662 verf.tv_nsec = tverf.tv_nsec; 663 } 664 nsrv4->write4verf = *(uint64_t *)&verf; 665 666 /* Used to manage create/destroy of server state */ 667 nsrv4->nfs4_server_state = NULL; 668 nsrv4->nfs4_cur_servinst = NULL; 669 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE; 670 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL); 671 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL); 672 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL); 673 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL); 674 675 ng->nfs4_srv = nsrv4; 676 } 677 678 void 679 rfs4_srv_zone_fini(nfs_globals_t *ng) 680 { 681 nfs4_srv_t *nsrv4 = ng->nfs4_srv; 682 683 ng->nfs4_srv = NULL; 684 685 mutex_destroy(&nsrv4->deleg_lock); 686 mutex_destroy(&nsrv4->state_lock); 687 mutex_destroy(&nsrv4->servinst_lock); 688 rw_destroy(&nsrv4->deleg_policy_lock); 689 690 kmem_free(nsrv4, sizeof (*nsrv4)); 691 } 692 693 void 694 rfs4_srvrinit(void) 695 { 696 extern void rfs4_attr_init(); 697 698 rfs4_attr_init(); 699 700 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) { 701 rfs4_disable_delegation(); 702 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, 703 &deleg_wrops) != 0) { 704 rfs4_disable_delegation(); 705 fem_free(deleg_rdops); 706 } 707 708 nfs4_srv_caller_id = fs_new_caller_id(); 709 lockt_sysid = lm_alloc_sysidt(); 710 vsd_create(&nfs4_srv_vkey, NULL); 711 rfs4_state_g_init(); 712 } 713 714 void 715 rfs4_srvrfini(void) 716 { 717 if (lockt_sysid != LM_NOSYSID) { 718 lm_free_sysidt(lockt_sysid); 719 lockt_sysid = LM_NOSYSID; 720 } 721 722 rfs4_state_g_fini(); 723 724 fem_free(deleg_rdops); 725 fem_free(deleg_wrops); 726 } 727 728 void 729 rfs4_do_server_start(int server_upordown, int srv_delegation, 730 nfs4_minor_t nfs4_minor_max, int cluster_booted) 731 { 732 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 733 734 /* Is this a warm start? */ 735 if (server_upordown == NFS_SERVER_QUIESCED) { 736 cmn_err(CE_NOTE, "nfs4_srv: " 737 "server was previously quiesced; " 738 "existing NFSv4 state will be re-used"); 739 740 /* 741 * HA-NFSv4: this is also the signal 742 * that a Resource Group failover has 743 * occurred. 744 */ 745 if (cluster_booted) 746 hanfsv4_failover(nsrv4); 747 } else { 748 /* Cold start */ 749 nsrv4->rfs4_start_time = 0; 750 rfs4_state_zone_init(nsrv4); 751 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max, 752 nfs4_drc_hash); 753 754 /* 755 * The nfsd service was started with the -s option 756 * we need to pull in any state from the paths indicated. 757 */ 758 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { 759 /* read in the stable storage state from these paths */ 760 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths, 761 rfs4_dss_newpaths); 762 } 763 } 764 765 nsrv4->nfs4_minor_max = nfs4_minor_max; 766 767 /* Check if delegation is to be enabled */ 768 if (srv_delegation != FALSE) 769 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE); 770 } 771 772 void 773 rfs4_init_compound_state(struct compound_state *cs) 774 { 775 bzero(cs, sizeof (*cs)); 776 cs->cont = TRUE; 777 cs->access = CS_ACCESS_DENIED; 778 cs->deleg = FALSE; 779 cs->mandlock = FALSE; 780 cs->fh.nfs_fh4_val = cs->fhbuf; 781 } 782 783 /* Do cleanup of the compound_state */ 784 void 785 rfs4_fini_compound_state(struct compound_state *cs) 786 { 787 if (cs->vp) { 788 VN_RELE(cs->vp); 789 } 790 if (cs->saved_vp) { 791 VN_RELE(cs->saved_vp); 792 } 793 if (cs->cr) { 794 crfree(cs->cr); 795 } 796 if (cs->saved_fh.nfs_fh4_val) { 797 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE); 798 } 799 if (cs->sp) { 800 rfs4x_session_rele(cs->sp); 801 } 802 } 803 804 void 805 rfs4_grace_start(rfs4_servinst_t *sip) 806 { 807 rw_enter(&sip->rwlock, RW_WRITER); 808 sip->start_time = nfs_sys_uptime(); 809 sip->grace_period = rfs4_grace_period; 810 rw_exit(&sip->rwlock); 811 } 812 813 /* 814 * returns true if the instance's grace period has never been started 815 */ 816 int 817 rfs4_servinst_grace_new(rfs4_servinst_t *sip) 818 { 819 time_t start_time; 820 821 rw_enter(&sip->rwlock, RW_READER); 822 start_time = sip->start_time; 823 rw_exit(&sip->rwlock); 824 825 return (start_time == 0); 826 } 827 828 /* 829 * Indicates if server instance is within the 830 * grace period. 831 */ 832 int 833 rfs4_servinst_in_grace(rfs4_servinst_t *sip) 834 { 835 time_t grace_expiry; 836 837 /* All clients called reclaim-complete */ 838 if (sip->nreclaim == 0 || sip->grace_period == 0) 839 return (0); 840 841 rw_enter(&sip->rwlock, RW_READER); 842 grace_expiry = sip->start_time + sip->grace_period; 843 rw_exit(&sip->rwlock); 844 845 if (nfs_sys_uptime() < grace_expiry) 846 return (1); 847 848 /* Once grace period ends, optimize next calls */ 849 sip->grace_period = 0; 850 return (0); 851 } 852 853 int 854 rfs4_clnt_in_grace(rfs4_client_t *cp) 855 { 856 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 857 858 return (rfs4_servinst_in_grace(cp->rc_server_instance)); 859 } 860 861 /* 862 * reset all currently active grace periods 863 */ 864 void 865 rfs4_grace_reset_all(nfs4_srv_t *nsrv4) 866 { 867 rfs4_servinst_t *sip; 868 869 mutex_enter(&nsrv4->servinst_lock); 870 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 871 if (rfs4_servinst_in_grace(sip)) 872 rfs4_grace_start(sip); 873 mutex_exit(&nsrv4->servinst_lock); 874 } 875 876 /* 877 * start any new instances' grace periods 878 */ 879 void 880 rfs4_grace_start_new(nfs4_srv_t *nsrv4) 881 { 882 rfs4_servinst_t *sip; 883 884 mutex_enter(&nsrv4->servinst_lock); 885 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 886 if (rfs4_servinst_grace_new(sip)) 887 rfs4_grace_start(sip); 888 mutex_exit(&nsrv4->servinst_lock); 889 } 890 891 static rfs4_dss_path_t * 892 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip, 893 char *path, unsigned index) 894 { 895 size_t len; 896 rfs4_dss_path_t *dss_path; 897 898 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP); 899 900 /* 901 * Take a copy of the string, since the original may be overwritten. 902 * Sadly, no strdup() in the kernel. 903 */ 904 /* allow for NUL */ 905 len = strlen(path) + 1; 906 dss_path->path = kmem_alloc(len, KM_SLEEP); 907 (void) strlcpy(dss_path->path, path, len); 908 909 /* associate with servinst */ 910 dss_path->sip = sip; 911 dss_path->index = index; 912 913 /* 914 * Add to list of served paths. 915 * No locking required, as we're only ever called at startup. 916 */ 917 if (nsrv4->dss_pathlist == NULL) { 918 /* this is the first dss_path_t */ 919 920 /* needed for insque/remque */ 921 dss_path->next = dss_path->prev = dss_path; 922 923 nsrv4->dss_pathlist = dss_path; 924 } else { 925 insque(dss_path, nsrv4->dss_pathlist); 926 } 927 928 return (dss_path); 929 } 930 931 /* 932 * Create a new server instance, and make it the currently active instance. 933 * Note that starting the grace period too early will reduce the clients' 934 * recovery window. 935 */ 936 void 937 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace, 938 int dss_npaths, char **dss_paths) 939 { 940 unsigned i; 941 rfs4_servinst_t *sip; 942 rfs4_oldstate_t *oldstate; 943 944 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP); 945 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL); 946 947 sip->nreclaim = 0; 948 sip->start_time = (time_t)0; 949 sip->grace_period = (time_t)0; 950 sip->next = NULL; 951 sip->prev = NULL; 952 953 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL); 954 /* 955 * This initial dummy entry is required to setup for insque/remque. 956 * It must be skipped over whenever the list is traversed. 957 */ 958 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 959 /* insque/remque require initial list entry to be self-terminated */ 960 oldstate->next = oldstate; 961 oldstate->prev = oldstate; 962 sip->oldstate = oldstate; 963 964 965 sip->dss_npaths = dss_npaths; 966 sip->dss_paths = kmem_alloc(dss_npaths * 967 sizeof (rfs4_dss_path_t *), KM_SLEEP); 968 969 for (i = 0; i < dss_npaths; i++) { 970 sip->dss_paths[i] = 971 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i); 972 } 973 974 mutex_enter(&nsrv4->servinst_lock); 975 if (nsrv4->nfs4_cur_servinst != NULL) { 976 /* add to linked list */ 977 sip->prev = nsrv4->nfs4_cur_servinst; 978 nsrv4->nfs4_cur_servinst->next = sip; 979 } 980 if (start_grace) 981 rfs4_grace_start(sip); 982 /* make the new instance "current" */ 983 nsrv4->nfs4_cur_servinst = sip; 984 985 mutex_exit(&nsrv4->servinst_lock); 986 } 987 988 /* 989 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy 990 * all instances directly. 991 */ 992 void 993 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4) 994 { 995 rfs4_servinst_t *sip, *prev, *current; 996 #ifdef DEBUG 997 int n = 0; 998 #endif 999 1000 mutex_enter(&nsrv4->servinst_lock); 1001 ASSERT(nsrv4->nfs4_cur_servinst != NULL); 1002 current = nsrv4->nfs4_cur_servinst; 1003 nsrv4->nfs4_cur_servinst = NULL; 1004 for (sip = current; sip != NULL; sip = prev) { 1005 prev = sip->prev; 1006 rw_destroy(&sip->rwlock); 1007 if (sip->oldstate) 1008 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t)); 1009 if (sip->dss_paths) { 1010 int i = sip->dss_npaths; 1011 1012 while (i > 0) { 1013 i--; 1014 if (sip->dss_paths[i] != NULL) { 1015 char *path = sip->dss_paths[i]->path; 1016 1017 if (path != NULL) { 1018 kmem_free(path, 1019 strlen(path) + 1); 1020 } 1021 kmem_free(sip->dss_paths[i], 1022 sizeof (rfs4_dss_path_t)); 1023 } 1024 } 1025 kmem_free(sip->dss_paths, 1026 sip->dss_npaths * sizeof (rfs4_dss_path_t *)); 1027 } 1028 kmem_free(sip, sizeof (rfs4_servinst_t)); 1029 #ifdef DEBUG 1030 n++; 1031 #endif 1032 } 1033 mutex_exit(&nsrv4->servinst_lock); 1034 } 1035 1036 /* 1037 * Assign the current server instance to a client_t. 1038 * Should be called with cp->rc_dbe held. 1039 */ 1040 void 1041 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp, 1042 rfs4_servinst_t *sip) 1043 { 1044 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1045 1046 /* 1047 * The lock ensures that if the current instance is in the process 1048 * of changing, we will see the new one. 1049 */ 1050 mutex_enter(&nsrv4->servinst_lock); 1051 cp->rc_server_instance = sip; 1052 mutex_exit(&nsrv4->servinst_lock); 1053 } 1054 1055 rfs4_servinst_t * 1056 rfs4_servinst(rfs4_client_t *cp) 1057 { 1058 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1059 1060 return (cp->rc_server_instance); 1061 } 1062 1063 /* ARGSUSED */ 1064 static void 1065 nullfree(caddr_t resop) 1066 { 1067 } 1068 1069 /* 1070 * This is a fall-through for invalid or not implemented (yet) ops 1071 */ 1072 /* ARGSUSED */ 1073 static void 1074 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1075 struct compound_state *cs) 1076 { 1077 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 1078 } 1079 1080 /* 1081 * Check if the security flavor, nfsnum, is in the flavor_list. 1082 */ 1083 bool_t 1084 in_flavor_list(int nfsnum, int *flavor_list, int count) 1085 { 1086 int i; 1087 1088 for (i = 0; i < count; i++) { 1089 if (nfsnum == flavor_list[i]) 1090 return (TRUE); 1091 } 1092 return (FALSE); 1093 } 1094 1095 /* 1096 * Used by rfs4_op_secinfo to get the security information from the 1097 * export structure associated with the component. 1098 */ 1099 /* ARGSUSED */ 1100 nfsstat4 1101 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) 1102 { 1103 int error, different_export = 0; 1104 vnode_t *dvp, *vp; 1105 struct exportinfo *exi; 1106 fid_t fid; 1107 uint_t count, i; 1108 secinfo4 *resok_val; 1109 struct secinfo *secp; 1110 seconfig_t *si; 1111 bool_t did_traverse = FALSE; 1112 int dotdot, walk; 1113 nfs_export_t *ne = nfs_get_export(); 1114 1115 dvp = cs->vp; 1116 exi = cs->exi; 1117 ASSERT(exi != NULL); 1118 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 1119 1120 /* 1121 * If dotdotting, then need to check whether it's above the 1122 * root of a filesystem, or above an export point. 1123 */ 1124 if (dotdot) { 1125 vnode_t *zone_rootvp = ne->exi_root->exi_vp; 1126 1127 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid); 1128 /* 1129 * If dotdotting at the root of a filesystem, then 1130 * need to traverse back to the mounted-on filesystem 1131 * and do the dotdot lookup there. 1132 */ 1133 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) { 1134 1135 /* 1136 * If at the system root, then can 1137 * go up no further. 1138 */ 1139 if (VN_CMP(dvp, zone_rootvp)) 1140 return (puterrno4(ENOENT)); 1141 1142 /* 1143 * Traverse back to the mounted-on filesystem 1144 */ 1145 dvp = untraverse(dvp, zone_rootvp); 1146 1147 /* 1148 * Set the different_export flag so we remember 1149 * to pick up a new exportinfo entry for 1150 * this new filesystem. 1151 */ 1152 different_export = 1; 1153 } else { 1154 1155 /* 1156 * If dotdotting above an export point then set 1157 * the different_export to get new export info. 1158 */ 1159 different_export = nfs_exported(exi, dvp); 1160 } 1161 } 1162 1163 /* 1164 * Get the vnode for the component "nm". 1165 */ 1166 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 1167 NULL, NULL, NULL); 1168 if (error) 1169 return (puterrno4(error)); 1170 1171 /* 1172 * If the vnode is in a pseudo filesystem, or if the security flavor 1173 * used in the request is valid but not an explicitly shared flavor, 1174 * or the access bit indicates that this is a limited access, 1175 * check whether this vnode is visible. 1176 */ 1177 if (!different_export && 1178 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) || 1179 cs->access & CS_ACCESS_LIMITED)) { 1180 if (! nfs_visible(exi, vp, &different_export)) { 1181 VN_RELE(vp); 1182 return (puterrno4(ENOENT)); 1183 } 1184 } 1185 1186 /* 1187 * If it's a mountpoint, then traverse it. 1188 */ 1189 if (vn_ismntpt(vp)) { 1190 if ((error = traverse(&vp)) != 0) { 1191 VN_RELE(vp); 1192 return (puterrno4(error)); 1193 } 1194 /* remember that we had to traverse mountpoint */ 1195 did_traverse = TRUE; 1196 different_export = 1; 1197 } else if (vp->v_vfsp != dvp->v_vfsp) { 1198 /* 1199 * If vp isn't a mountpoint and the vfs ptrs aren't the same, 1200 * then vp is probably an LOFS object. We don't need the 1201 * realvp, we just need to know that we might have crossed 1202 * a server fs boundary and need to call checkexport4. 1203 * (LOFS lookup hides server fs mountpoints, and actually calls 1204 * traverse) 1205 */ 1206 different_export = 1; 1207 } 1208 1209 /* 1210 * Get the export information for it. 1211 */ 1212 if (different_export) { 1213 1214 bzero(&fid, sizeof (fid)); 1215 fid.fid_len = MAXFIDSZ; 1216 error = vop_fid_pseudo(vp, &fid); 1217 if (error) { 1218 VN_RELE(vp); 1219 return (puterrno4(error)); 1220 } 1221 1222 /* We'll need to reassign "exi". */ 1223 if (dotdot) 1224 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 1225 else 1226 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 1227 1228 if (exi == NULL) { 1229 if (did_traverse == TRUE) { 1230 /* 1231 * If this vnode is a mounted-on vnode, 1232 * but the mounted-on file system is not 1233 * exported, send back the secinfo for 1234 * the exported node that the mounted-on 1235 * vnode lives in. 1236 */ 1237 exi = cs->exi; 1238 } else { 1239 VN_RELE(vp); 1240 return (puterrno4(EACCES)); 1241 } 1242 } 1243 } 1244 ASSERT(exi != NULL); 1245 1246 1247 /* 1248 * Create the secinfo result based on the security information 1249 * from the exportinfo structure (exi). 1250 * 1251 * Return all flavors for a pseudo node. 1252 * For a real export node, return the flavor that the client 1253 * has access with. 1254 */ 1255 ASSERT(RW_LOCK_HELD(&ne->exported_lock)); 1256 if (PSEUDO(exi)) { 1257 count = exi->exi_export.ex_seccnt; /* total sec count */ 1258 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); 1259 secp = exi->exi_export.ex_secinfo; 1260 1261 for (i = 0; i < count; i++) { 1262 si = &secp[i].s_secinfo; 1263 resok_val[i].flavor = si->sc_rpcnum; 1264 if (resok_val[i].flavor == RPCSEC_GSS) { 1265 rpcsec_gss_info *info; 1266 1267 info = &resok_val[i].flavor_info; 1268 info->qop = si->sc_qop; 1269 info->service = (rpc_gss_svc_t)si->sc_service; 1270 1271 /* get oid opaque data */ 1272 info->oid.sec_oid4_len = 1273 si->sc_gss_mech_type->length; 1274 info->oid.sec_oid4_val = kmem_alloc( 1275 si->sc_gss_mech_type->length, KM_SLEEP); 1276 bcopy( 1277 si->sc_gss_mech_type->elements, 1278 info->oid.sec_oid4_val, 1279 info->oid.sec_oid4_len); 1280 } 1281 } 1282 resp->SECINFO4resok_len = count; 1283 resp->SECINFO4resok_val = resok_val; 1284 } else { 1285 int ret_cnt = 0, k = 0; 1286 int *flavor_list; 1287 1288 count = exi->exi_export.ex_seccnt; /* total sec count */ 1289 secp = exi->exi_export.ex_secinfo; 1290 1291 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP); 1292 /* find out which flavors to return */ 1293 for (i = 0; i < count; i ++) { 1294 int access, flavor, perm; 1295 1296 flavor = secp[i].s_secinfo.sc_nfsnum; 1297 perm = secp[i].s_flags; 1298 1299 access = nfsauth4_secinfo_access(exi, cs->req, 1300 flavor, perm, cs->basecr); 1301 1302 if (! (access & NFSAUTH_DENIED) && 1303 ! (access & NFSAUTH_WRONGSEC)) { 1304 flavor_list[ret_cnt] = flavor; 1305 ret_cnt++; 1306 } 1307 } 1308 1309 /* Create the returning SECINFO value */ 1310 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP); 1311 1312 for (i = 0; i < count; i++) { 1313 /* 1314 * If the flavor is in the flavor list, 1315 * fill in resok_val. 1316 */ 1317 si = &secp[i].s_secinfo; 1318 if (in_flavor_list(si->sc_nfsnum, 1319 flavor_list, ret_cnt)) { 1320 resok_val[k].flavor = si->sc_rpcnum; 1321 if (resok_val[k].flavor == RPCSEC_GSS) { 1322 rpcsec_gss_info *info; 1323 1324 info = &resok_val[k].flavor_info; 1325 info->qop = si->sc_qop; 1326 info->service = (rpc_gss_svc_t) 1327 si->sc_service; 1328 1329 /* get oid opaque data */ 1330 info->oid.sec_oid4_len = 1331 si->sc_gss_mech_type->length; 1332 info->oid.sec_oid4_val = kmem_alloc( 1333 si->sc_gss_mech_type->length, 1334 KM_SLEEP); 1335 bcopy(si->sc_gss_mech_type->elements, 1336 info->oid.sec_oid4_val, 1337 info->oid.sec_oid4_len); 1338 } 1339 k++; 1340 } 1341 if (k >= ret_cnt) 1342 break; 1343 } 1344 resp->SECINFO4resok_len = ret_cnt; 1345 resp->SECINFO4resok_val = resok_val; 1346 kmem_free(flavor_list, count * sizeof (int)); 1347 } 1348 1349 VN_RELE(vp); 1350 return (NFS4_OK); 1351 } 1352 1353 /* 1354 * SECINFO (Operation 33): Obtain required security information on 1355 * the component name in the format of (security-mechanism-oid, qop, service) 1356 * triplets. 1357 */ 1358 /* ARGSUSED */ 1359 static void 1360 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1361 struct compound_state *cs) 1362 { 1363 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1364 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1365 utf8string *utfnm = &args->name; 1366 uint_t len; 1367 char *nm; 1368 struct sockaddr *ca; 1369 char *name = NULL; 1370 nfsstat4 status = NFS4_OK; 1371 1372 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1373 SECINFO4args *, args); 1374 1375 /* 1376 * Current file handle (cfh) should have been set before getting 1377 * into this function. If not, return error. 1378 */ 1379 if (cs->vp == NULL) { 1380 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1381 goto out; 1382 } 1383 1384 if (cs->vp->v_type != VDIR) { 1385 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1386 goto out; 1387 } 1388 1389 /* 1390 * Verify the component name. If failed, error out, but 1391 * do not error out if the component name is a "..". 1392 * SECINFO will return its parents secinfo data for SECINFO "..". 1393 */ 1394 status = utf8_dir_verify(utfnm); 1395 if (status != NFS4_OK) { 1396 if (utfnm->utf8string_len != 2 || 1397 utfnm->utf8string_val[0] != '.' || 1398 utfnm->utf8string_val[1] != '.') { 1399 *cs->statusp = resp->status = status; 1400 goto out; 1401 } 1402 } 1403 1404 nm = utf8_to_str(utfnm, &len, NULL); 1405 if (nm == NULL) { 1406 *cs->statusp = resp->status = NFS4ERR_INVAL; 1407 goto out; 1408 } 1409 1410 if (len > MAXNAMELEN) { 1411 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1412 kmem_free(nm, len); 1413 goto out; 1414 } 1415 1416 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1417 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1418 MAXPATHLEN + 1); 1419 1420 if (name == NULL) { 1421 *cs->statusp = resp->status = NFS4ERR_INVAL; 1422 kmem_free(nm, len); 1423 goto out; 1424 } 1425 1426 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp); 1427 1428 if (resp->status == NFS4_OK && rfs4_has_session(cs)) { 1429 /* 1430 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3 1431 * 1432 * 2.6.3.1.1.8 1433 * SECINFO and SECINFO_NO_NAME consume the current 1434 * filehandle (note that this is a change from NFSv4.0). 1435 * 1436 * 18.29.3 1437 * On success, the current filehandle is consumed (see 1438 * Section 2.6.3.1.1.8), and if the next operation after 1439 * SECINFO tries to use the current filehandle, that 1440 * operation will fail with the status 1441 * NFS4ERR_NOFILEHANDLE. 1442 */ 1443 VN_RELE(cs->vp); 1444 cs->vp = NULL; 1445 } 1446 1447 if (name != nm) 1448 kmem_free(name, MAXPATHLEN + 1); 1449 kmem_free(nm, len); 1450 1451 out: 1452 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1453 SECINFO4res *, resp); 1454 } 1455 1456 /* 1457 * Free SECINFO result. 1458 */ 1459 /* ARGSUSED */ 1460 static void 1461 rfs4_op_secinfo_free(nfs_resop4 *resop) 1462 { 1463 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1464 int count, i; 1465 secinfo4 *resok_val; 1466 1467 /* If this is not an Ok result, nothing to free. */ 1468 if (resp->status != NFS4_OK) { 1469 return; 1470 } 1471 1472 count = resp->SECINFO4resok_len; 1473 resok_val = resp->SECINFO4resok_val; 1474 1475 for (i = 0; i < count; i++) { 1476 if (resok_val[i].flavor == RPCSEC_GSS) { 1477 rpcsec_gss_info *info; 1478 1479 info = &resok_val[i].flavor_info; 1480 kmem_free(info->oid.sec_oid4_val, 1481 info->oid.sec_oid4_len); 1482 } 1483 } 1484 kmem_free(resok_val, count * sizeof (secinfo4)); 1485 resp->SECINFO4resok_len = 0; 1486 resp->SECINFO4resok_val = NULL; 1487 } 1488 1489 /* ARGSUSED */ 1490 static void 1491 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1492 struct compound_state *cs) 1493 { 1494 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1495 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1496 int error; 1497 vnode_t *vp; 1498 struct vattr va; 1499 int checkwriteperm; 1500 cred_t *cr = cs->cr; 1501 bslabel_t *clabel, *slabel; 1502 ts_label_t *tslabel; 1503 boolean_t admin_low_client; 1504 1505 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1506 ACCESS4args *, args); 1507 1508 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */ 1509 if (cs->access == CS_ACCESS_DENIED) { 1510 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1511 goto out; 1512 } 1513 #endif 1514 if (cs->vp == NULL) { 1515 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1516 goto out; 1517 } 1518 1519 ASSERT(cr != NULL); 1520 1521 vp = cs->vp; 1522 1523 /* 1524 * If the file system is exported read only, it is not appropriate 1525 * to check write permissions for regular files and directories. 1526 * Special files are interpreted by the client, so the underlying 1527 * permissions are sent back to the client for interpretation. 1528 */ 1529 if (rdonly4(req, cs) && 1530 (vp->v_type == VREG || vp->v_type == VDIR)) 1531 checkwriteperm = 0; 1532 else 1533 checkwriteperm = 1; 1534 1535 /* 1536 * XXX 1537 * We need the mode so that we can correctly determine access 1538 * permissions relative to a mandatory lock file. Access to 1539 * mandatory lock files is denied on the server, so it might 1540 * as well be reflected to the server during the open. 1541 */ 1542 va.va_mask = AT_MODE; 1543 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1544 if (error) { 1545 *cs->statusp = resp->status = puterrno4(error); 1546 goto out; 1547 } 1548 resp->access = 0; 1549 resp->supported = 0; 1550 1551 if (is_system_labeled()) { 1552 ASSERT(req->rq_label != NULL); 1553 clabel = req->rq_label; 1554 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1555 "got client label from request(1)", 1556 struct svc_req *, req); 1557 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1558 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) { 1559 *cs->statusp = resp->status = puterrno4(EACCES); 1560 goto out; 1561 } 1562 slabel = label2bslabel(tslabel); 1563 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1564 char *, "got server label(1) for vp(2)", 1565 bslabel_t *, slabel, vnode_t *, vp); 1566 1567 admin_low_client = B_FALSE; 1568 } else 1569 admin_low_client = B_TRUE; 1570 } 1571 1572 if (args->access & ACCESS4_READ) { 1573 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1574 if (!error && !MANDLOCK(vp, va.va_mode) && 1575 (!is_system_labeled() || admin_low_client || 1576 bldominates(clabel, slabel))) 1577 resp->access |= ACCESS4_READ; 1578 resp->supported |= ACCESS4_READ; 1579 } 1580 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1581 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1582 if (!error && (!is_system_labeled() || admin_low_client || 1583 bldominates(clabel, slabel))) 1584 resp->access |= ACCESS4_LOOKUP; 1585 resp->supported |= ACCESS4_LOOKUP; 1586 } 1587 if (checkwriteperm && 1588 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1589 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1590 if (!error && !MANDLOCK(vp, va.va_mode) && 1591 (!is_system_labeled() || admin_low_client || 1592 blequal(clabel, slabel))) 1593 resp->access |= 1594 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND)); 1595 resp->supported |= 1596 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND); 1597 } 1598 1599 if (checkwriteperm && 1600 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1601 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1602 if (!error && (!is_system_labeled() || admin_low_client || 1603 blequal(clabel, slabel))) 1604 resp->access |= ACCESS4_DELETE; 1605 resp->supported |= ACCESS4_DELETE; 1606 } 1607 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1608 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1609 if (!error && !MANDLOCK(vp, va.va_mode) && 1610 (!is_system_labeled() || admin_low_client || 1611 bldominates(clabel, slabel))) 1612 resp->access |= ACCESS4_EXECUTE; 1613 resp->supported |= ACCESS4_EXECUTE; 1614 } 1615 1616 if (is_system_labeled() && !admin_low_client) 1617 label_rele(tslabel); 1618 1619 *cs->statusp = resp->status = NFS4_OK; 1620 out: 1621 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1622 ACCESS4res *, resp); 1623 } 1624 1625 /* ARGSUSED */ 1626 static void 1627 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1628 struct compound_state *cs) 1629 { 1630 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1631 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1632 int error; 1633 vnode_t *vp = cs->vp; 1634 cred_t *cr = cs->cr; 1635 vattr_t va; 1636 nfs4_srv_t *nsrv4; 1637 1638 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1639 COMMIT4args *, args); 1640 1641 if (vp == NULL) { 1642 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1643 goto out; 1644 } 1645 if (cs->access == CS_ACCESS_DENIED) { 1646 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1647 goto out; 1648 } 1649 1650 if (args->offset + args->count < args->offset) { 1651 *cs->statusp = resp->status = NFS4ERR_INVAL; 1652 goto out; 1653 } 1654 1655 va.va_mask = AT_UID; 1656 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1657 1658 /* 1659 * If we can't get the attributes, then we can't do the 1660 * right access checking. So, we'll fail the request. 1661 */ 1662 if (error) { 1663 *cs->statusp = resp->status = puterrno4(error); 1664 goto out; 1665 } 1666 if (rdonly4(req, cs)) { 1667 *cs->statusp = resp->status = NFS4ERR_ROFS; 1668 goto out; 1669 } 1670 1671 if (vp->v_type != VREG) { 1672 if (vp->v_type == VDIR) 1673 resp->status = NFS4ERR_ISDIR; 1674 else 1675 resp->status = NFS4ERR_INVAL; 1676 *cs->statusp = resp->status; 1677 goto out; 1678 } 1679 1680 if (crgetuid(cr) != va.va_uid && 1681 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) { 1682 *cs->statusp = resp->status = puterrno4(error); 1683 goto out; 1684 } 1685 1686 error = VOP_FSYNC(vp, FSYNC, cr, NULL); 1687 1688 if (error) { 1689 *cs->statusp = resp->status = puterrno4(error); 1690 goto out; 1691 } 1692 1693 nsrv4 = nfs4_get_srv(); 1694 *cs->statusp = resp->status = NFS4_OK; 1695 resp->writeverf = nsrv4->write4verf; 1696 out: 1697 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1698 COMMIT4res *, resp); 1699 } 1700 1701 /* 1702 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1703 * was completed. It does the nfsv4 create for special files. 1704 */ 1705 /* ARGSUSED */ 1706 static vnode_t * 1707 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req, 1708 struct compound_state *cs, vattr_t *vap, char *nm) 1709 { 1710 int error; 1711 cred_t *cr = cs->cr; 1712 vnode_t *dvp = cs->vp; 1713 vnode_t *vp = NULL; 1714 int mode; 1715 enum vcexcl excl; 1716 1717 switch (args->type) { 1718 case NF4CHR: 1719 case NF4BLK: 1720 if (secpolicy_sys_devices(cr) != 0) { 1721 *cs->statusp = resp->status = NFS4ERR_PERM; 1722 return (NULL); 1723 } 1724 if (args->type == NF4CHR) 1725 vap->va_type = VCHR; 1726 else 1727 vap->va_type = VBLK; 1728 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1, 1729 args->ftype4_u.devdata.specdata2); 1730 vap->va_mask |= AT_RDEV; 1731 break; 1732 case NF4SOCK: 1733 vap->va_type = VSOCK; 1734 break; 1735 case NF4FIFO: 1736 vap->va_type = VFIFO; 1737 break; 1738 default: 1739 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1740 return (NULL); 1741 } 1742 1743 /* 1744 * Must specify the mode. 1745 */ 1746 if (!(vap->va_mask & AT_MODE)) { 1747 *cs->statusp = resp->status = NFS4ERR_INVAL; 1748 return (NULL); 1749 } 1750 1751 excl = EXCL; 1752 1753 mode = 0; 1754 1755 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL); 1756 if (error) { 1757 *cs->statusp = resp->status = puterrno4(error); 1758 return (NULL); 1759 } 1760 return (vp); 1761 } 1762 1763 /* 1764 * nfsv4 create is used to create non-regular files. For regular files, 1765 * use nfsv4 open. 1766 */ 1767 /* ARGSUSED */ 1768 static void 1769 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1770 struct compound_state *cs) 1771 { 1772 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1773 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1774 int error; 1775 struct vattr bva, iva, iva2, ava, *vap; 1776 cred_t *cr = cs->cr; 1777 vnode_t *dvp = cs->vp; 1778 vnode_t *vp = NULL; 1779 vnode_t *realvp; 1780 char *nm, *lnm; 1781 uint_t len, llen; 1782 int syncval = 0; 1783 struct nfs4_svgetit_arg sarg; 1784 struct nfs4_ntov_table ntov; 1785 struct statvfs64 sb; 1786 nfsstat4 status; 1787 struct sockaddr *ca; 1788 char *name = NULL; 1789 char *lname = NULL; 1790 1791 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1792 CREATE4args *, args); 1793 1794 resp->attrset = 0; 1795 1796 if (dvp == NULL) { 1797 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1798 goto out; 1799 } 1800 1801 /* 1802 * If there is an unshared filesystem mounted on this vnode, 1803 * do not allow to create an object in this directory. 1804 */ 1805 if (vn_ismntpt(dvp)) { 1806 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1807 goto out; 1808 } 1809 1810 /* Verify that type is correct */ 1811 switch (args->type) { 1812 case NF4LNK: 1813 case NF4BLK: 1814 case NF4CHR: 1815 case NF4SOCK: 1816 case NF4FIFO: 1817 case NF4DIR: 1818 break; 1819 default: 1820 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1821 goto out; 1822 }; 1823 1824 if (cs->access == CS_ACCESS_DENIED) { 1825 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1826 goto out; 1827 } 1828 if (dvp->v_type != VDIR) { 1829 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1830 goto out; 1831 } 1832 status = utf8_dir_verify(&args->objname); 1833 if (status != NFS4_OK) { 1834 *cs->statusp = resp->status = status; 1835 goto out; 1836 } 1837 1838 if (rdonly4(req, cs)) { 1839 *cs->statusp = resp->status = NFS4ERR_ROFS; 1840 goto out; 1841 } 1842 1843 /* 1844 * Name of newly created object 1845 */ 1846 nm = utf8_to_fn(&args->objname, &len, NULL); 1847 if (nm == NULL) { 1848 *cs->statusp = resp->status = NFS4ERR_INVAL; 1849 goto out; 1850 } 1851 1852 if (len > MAXNAMELEN) { 1853 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1854 kmem_free(nm, len); 1855 goto out; 1856 } 1857 1858 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1859 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1860 MAXPATHLEN + 1); 1861 1862 if (name == NULL) { 1863 *cs->statusp = resp->status = NFS4ERR_INVAL; 1864 kmem_free(nm, len); 1865 goto out; 1866 } 1867 1868 resp->attrset = 0; 1869 1870 sarg.sbp = &sb; 1871 sarg.is_referral = B_FALSE; 1872 nfs4_ntov_table_init(&ntov); 1873 1874 status = do_rfs4_set_attrs(&resp->attrset, 1875 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1876 1877 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1878 status = NFS4ERR_INVAL; 1879 1880 if (status != NFS4_OK) { 1881 *cs->statusp = resp->status = status; 1882 if (name != nm) 1883 kmem_free(name, MAXPATHLEN + 1); 1884 kmem_free(nm, len); 1885 nfs4_ntov_table_free(&ntov, &sarg); 1886 resp->attrset = 0; 1887 goto out; 1888 } 1889 1890 /* Get "before" change value */ 1891 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE; 1892 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL); 1893 if (error) { 1894 *cs->statusp = resp->status = puterrno4(error); 1895 if (name != nm) 1896 kmem_free(name, MAXPATHLEN + 1); 1897 kmem_free(nm, len); 1898 nfs4_ntov_table_free(&ntov, &sarg); 1899 resp->attrset = 0; 1900 goto out; 1901 } 1902 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1903 1904 vap = sarg.vap; 1905 1906 /* 1907 * Set the default initial values for attributes when the parent 1908 * directory does not have the VSUID/VSGID bit set and they have 1909 * not been specified in createattrs. 1910 */ 1911 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) { 1912 vap->va_uid = crgetuid(cr); 1913 vap->va_mask |= AT_UID; 1914 } 1915 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) { 1916 vap->va_gid = crgetgid(cr); 1917 vap->va_mask |= AT_GID; 1918 } 1919 1920 vap->va_mask |= AT_TYPE; 1921 switch (args->type) { 1922 case NF4DIR: 1923 vap->va_type = VDIR; 1924 if ((vap->va_mask & AT_MODE) == 0) { 1925 vap->va_mode = 0700; /* default: owner rwx only */ 1926 vap->va_mask |= AT_MODE; 1927 } 1928 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL); 1929 if (error) 1930 break; 1931 1932 /* 1933 * Get the initial "after" sequence number, if it fails, 1934 * set to zero 1935 */ 1936 iva.va_mask = AT_SEQ; 1937 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1938 iva.va_seq = 0; 1939 break; 1940 case NF4LNK: 1941 vap->va_type = VLNK; 1942 if ((vap->va_mask & AT_MODE) == 0) { 1943 vap->va_mode = 0700; /* default: owner rwx only */ 1944 vap->va_mask |= AT_MODE; 1945 } 1946 1947 /* 1948 * symlink names must be treated as data 1949 */ 1950 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata, 1951 &llen, NULL); 1952 1953 if (lnm == NULL) { 1954 *cs->statusp = resp->status = NFS4ERR_INVAL; 1955 if (name != nm) 1956 kmem_free(name, MAXPATHLEN + 1); 1957 kmem_free(nm, len); 1958 nfs4_ntov_table_free(&ntov, &sarg); 1959 resp->attrset = 0; 1960 goto out; 1961 } 1962 1963 if (llen > MAXPATHLEN) { 1964 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1965 if (name != nm) 1966 kmem_free(name, MAXPATHLEN + 1); 1967 kmem_free(nm, len); 1968 kmem_free(lnm, llen); 1969 nfs4_ntov_table_free(&ntov, &sarg); 1970 resp->attrset = 0; 1971 goto out; 1972 } 1973 1974 lname = nfscmd_convname(ca, cs->exi, lnm, 1975 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 1976 1977 if (lname == NULL) { 1978 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 1979 if (name != nm) 1980 kmem_free(name, MAXPATHLEN + 1); 1981 kmem_free(nm, len); 1982 kmem_free(lnm, llen); 1983 nfs4_ntov_table_free(&ntov, &sarg); 1984 resp->attrset = 0; 1985 goto out; 1986 } 1987 1988 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0); 1989 if (lname != lnm) 1990 kmem_free(lname, MAXPATHLEN + 1); 1991 kmem_free(lnm, llen); 1992 if (error) 1993 break; 1994 1995 /* 1996 * Get the initial "after" sequence number, if it fails, 1997 * set to zero 1998 */ 1999 iva.va_mask = AT_SEQ; 2000 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 2001 iva.va_seq = 0; 2002 2003 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 2004 NULL, NULL, NULL); 2005 if (error) 2006 break; 2007 2008 /* 2009 * va_seq is not safe over VOP calls, check it again 2010 * if it has changed zero out iva to force atomic = FALSE. 2011 */ 2012 iva2.va_mask = AT_SEQ; 2013 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) || 2014 iva2.va_seq != iva.va_seq) 2015 iva.va_seq = 0; 2016 break; 2017 default: 2018 /* 2019 * probably a special file. 2020 */ 2021 if ((vap->va_mask & AT_MODE) == 0) { 2022 vap->va_mode = 0600; /* default: owner rw only */ 2023 vap->va_mask |= AT_MODE; 2024 } 2025 syncval = FNODSYNC; 2026 /* 2027 * We know this will only generate one VOP call 2028 */ 2029 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name); 2030 2031 if (vp == NULL) { 2032 if (name != nm) 2033 kmem_free(name, MAXPATHLEN + 1); 2034 kmem_free(nm, len); 2035 nfs4_ntov_table_free(&ntov, &sarg); 2036 resp->attrset = 0; 2037 goto out; 2038 } 2039 2040 /* 2041 * Get the initial "after" sequence number, if it fails, 2042 * set to zero 2043 */ 2044 iva.va_mask = AT_SEQ; 2045 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 2046 iva.va_seq = 0; 2047 2048 break; 2049 } 2050 if (name != nm) 2051 kmem_free(name, MAXPATHLEN + 1); 2052 kmem_free(nm, len); 2053 2054 if (error) { 2055 *cs->statusp = resp->status = puterrno4(error); 2056 } 2057 2058 /* 2059 * Force modified data and metadata out to stable storage. 2060 */ 2061 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2062 2063 if (resp->status != NFS4_OK) { 2064 if (vp != NULL) 2065 VN_RELE(vp); 2066 nfs4_ntov_table_free(&ntov, &sarg); 2067 resp->attrset = 0; 2068 goto out; 2069 } 2070 2071 /* 2072 * Finish setup of cinfo response, "before" value already set. 2073 * Get "after" change value, if it fails, simply return the 2074 * before value. 2075 */ 2076 ava.va_mask = AT_CTIME|AT_SEQ; 2077 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) { 2078 ava.va_ctime = bva.va_ctime; 2079 ava.va_seq = 0; 2080 } 2081 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 2082 2083 /* 2084 * True verification that object was created with correct 2085 * attrs is impossible. The attrs could have been changed 2086 * immediately after object creation. If attributes did 2087 * not verify, the only recourse for the server is to 2088 * destroy the object. Maybe if some attrs (like gid) 2089 * are set incorrectly, the object should be destroyed; 2090 * however, seems bad as a default policy. Do we really 2091 * want to destroy an object over one of the times not 2092 * verifying correctly? For these reasons, the server 2093 * currently sets bits in attrset for createattrs 2094 * that were set; however, no verification is done. 2095 * 2096 * vmask_to_nmask accounts for vattr bits set on create 2097 * [do_rfs4_set_attrs() only sets resp bits for 2098 * non-vattr/vfs bits.] 2099 * Mask off any bits set by default so as not to return 2100 * more attrset bits than were requested in createattrs 2101 */ 2102 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset); 2103 resp->attrset &= args->createattrs.attrmask; 2104 nfs4_ntov_table_free(&ntov, &sarg); 2105 2106 error = makefh4(&cs->fh, vp, cs->exi); 2107 if (error) { 2108 *cs->statusp = resp->status = puterrno4(error); 2109 } 2110 2111 /* 2112 * The cinfo.atomic = TRUE only if we got no errors, we have 2113 * non-zero va_seq's, and it has incremented by exactly one 2114 * during the creation and it didn't change during the VOP_LOOKUP 2115 * or VOP_FSYNC. 2116 */ 2117 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 2118 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 2119 resp->cinfo.atomic = TRUE; 2120 else 2121 resp->cinfo.atomic = FALSE; 2122 2123 /* 2124 * Force modified metadata out to stable storage. 2125 * 2126 * if a underlying vp exists, pass it to VOP_FSYNC 2127 */ 2128 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2129 (void) VOP_FSYNC(realvp, syncval, cr, NULL); 2130 else 2131 (void) VOP_FSYNC(vp, syncval, cr, NULL); 2132 2133 if (resp->status != NFS4_OK) { 2134 VN_RELE(vp); 2135 goto out; 2136 } 2137 if (cs->vp) 2138 VN_RELE(cs->vp); 2139 2140 cs->vp = vp; 2141 *cs->statusp = resp->status = NFS4_OK; 2142 out: 2143 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 2144 CREATE4res *, resp); 2145 } 2146 2147 /*ARGSUSED*/ 2148 static void 2149 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2150 struct compound_state *cs) 2151 { 2152 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs, 2153 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge); 2154 2155 rfs4_op_inval(argop, resop, req, cs); 2156 2157 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs, 2158 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge); 2159 } 2160 2161 /*ARGSUSED*/ 2162 static void 2163 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2164 struct compound_state *cs) 2165 { 2166 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 2167 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 2168 rfs4_deleg_state_t *dsp; 2169 nfsstat4 status; 2170 2171 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 2172 DELEGRETURN4args *, args); 2173 2174 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp); 2175 resp->status = *cs->statusp = status; 2176 if (status != NFS4_OK) 2177 goto out; 2178 2179 /* Ensure specified filehandle matches */ 2180 if (cs->vp != dsp->rds_finfo->rf_vp) { 2181 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 2182 } else 2183 rfs4_return_deleg(dsp, FALSE); 2184 2185 rfs4_update_lease(dsp->rds_client); 2186 2187 rfs4_deleg_state_rele(dsp); 2188 out: 2189 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 2190 DELEGRETURN4res *, resp); 2191 } 2192 2193 /* 2194 * Check to see if a given "flavor" is an explicitly shared flavor. 2195 * The assumption of this routine is the "flavor" is already a valid 2196 * flavor in the secinfo list of "exi". 2197 * 2198 * e.g. 2199 * # share -o sec=flavor1 /export 2200 * # share -o sec=flavor2 /export/home 2201 * 2202 * flavor2 is not an explicitly shared flavor for /export, 2203 * however it is in the secinfo list for /export thru the 2204 * server namespace setup. 2205 */ 2206 int 2207 is_exported_sec(int flavor, struct exportinfo *exi) 2208 { 2209 int i; 2210 struct secinfo *sp; 2211 2212 sp = exi->exi_export.ex_secinfo; 2213 for (i = 0; i < exi->exi_export.ex_seccnt; i++) { 2214 if (flavor == sp[i].s_secinfo.sc_nfsnum || 2215 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) { 2216 return (SEC_REF_EXPORTED(&sp[i])); 2217 } 2218 } 2219 2220 /* Should not reach this point based on the assumption */ 2221 return (0); 2222 } 2223 2224 /* 2225 * Check if the security flavor used in the request matches what is 2226 * required at the export point or at the root pseudo node (exi_root). 2227 * 2228 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise. 2229 * 2230 */ 2231 static int 2232 secinfo_match_or_authnone(struct compound_state *cs) 2233 { 2234 int i; 2235 struct secinfo *sp; 2236 2237 /* 2238 * Check cs->nfsflavor (from the request) against 2239 * the current export data in cs->exi. 2240 */ 2241 sp = cs->exi->exi_export.ex_secinfo; 2242 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) { 2243 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum || 2244 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) 2245 return (1); 2246 } 2247 2248 return (0); 2249 } 2250 2251 /* 2252 * Check the access authority for the client and return the correct error. 2253 */ 2254 nfsstat4 2255 call_checkauth4(struct compound_state *cs, struct svc_req *req) 2256 { 2257 int authres; 2258 2259 /* 2260 * First, check if the security flavor used in the request 2261 * are among the flavors set in the server namespace. 2262 */ 2263 if (!secinfo_match_or_authnone(cs)) { 2264 *cs->statusp = NFS4ERR_WRONGSEC; 2265 return (*cs->statusp); 2266 } 2267 2268 authres = checkauth4(cs, req); 2269 2270 if (authres > 0) { 2271 *cs->statusp = NFS4_OK; 2272 if (! (cs->access & CS_ACCESS_LIMITED)) 2273 cs->access = CS_ACCESS_OK; 2274 } else if (authres == 0) { 2275 *cs->statusp = NFS4ERR_ACCESS; 2276 } else if (authres == -2) { 2277 *cs->statusp = NFS4ERR_WRONGSEC; 2278 } else { 2279 *cs->statusp = NFS4ERR_DELAY; 2280 } 2281 return (*cs->statusp); 2282 } 2283 2284 /* 2285 * bitmap4_to_attrmask is called by getattr and readdir. 2286 * It sets up the vattr mask and determines whether vfsstat call is needed 2287 * based on the input bitmap. 2288 * Returns nfsv4 status. 2289 */ 2290 static nfsstat4 2291 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp) 2292 { 2293 int i; 2294 uint_t va_mask; 2295 struct statvfs64 *sbp = sargp->sbp; 2296 2297 sargp->sbp = NULL; 2298 sargp->flag = 0; 2299 sargp->rdattr_error = NFS4_OK; 2300 sargp->mntdfid_set = FALSE; 2301 if (sargp->cs->vp) 2302 sargp->xattr = get_fh4_flag(&sargp->cs->fh, 2303 FH4_ATTRDIR | FH4_NAMEDATTR); 2304 else 2305 sargp->xattr = 0; 2306 2307 /* 2308 * Set rdattr_error_req to true if return error per 2309 * failed entry rather than fail the readdir. 2310 */ 2311 if (breq & FATTR4_RDATTR_ERROR_MASK) 2312 sargp->rdattr_error_req = 1; 2313 else 2314 sargp->rdattr_error_req = 0; 2315 2316 /* 2317 * generate the va_mask 2318 * Handle the easy cases first 2319 */ 2320 switch (breq) { 2321 case NFS4_NTOV_ATTR_MASK: 2322 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK; 2323 return (NFS4_OK); 2324 2325 case NFS4_FS_ATTR_MASK: 2326 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK; 2327 sargp->sbp = sbp; 2328 return (NFS4_OK); 2329 2330 case NFS4_NTOV_ATTR_CACHE_MASK: 2331 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK; 2332 return (NFS4_OK); 2333 2334 case FATTR4_LEASE_TIME_MASK: 2335 sargp->vap->va_mask = 0; 2336 return (NFS4_OK); 2337 2338 default: 2339 va_mask = 0; 2340 for (i = 0; i < nfs4_ntov_map_size; i++) { 2341 if ((breq & nfs4_ntov_map[i].fbit) && 2342 nfs4_ntov_map[i].vbit) 2343 va_mask |= nfs4_ntov_map[i].vbit; 2344 } 2345 2346 /* 2347 * Check is vfsstat is needed 2348 */ 2349 if (breq & NFS4_FS_ATTR_MASK) 2350 sargp->sbp = sbp; 2351 2352 sargp->vap->va_mask = va_mask; 2353 return (NFS4_OK); 2354 } 2355 /* NOTREACHED */ 2356 } 2357 2358 /* 2359 * bitmap4_get_sysattrs is called by getattr and readdir. 2360 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs. 2361 * Returns nfsv4 status. 2362 */ 2363 static nfsstat4 2364 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp) 2365 { 2366 int error; 2367 struct compound_state *cs = sargp->cs; 2368 vnode_t *vp = cs->vp; 2369 2370 if (sargp->sbp != NULL) { 2371 error = VFS_STATVFS(vp->v_vfsp, sargp->sbp); 2372 if (error != 0) { 2373 sargp->sbp = NULL; /* to identify error */ 2374 return (puterrno4(error)); 2375 } 2376 } 2377 2378 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr)); 2379 } 2380 2381 static void 2382 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp) 2383 { 2384 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size, 2385 KM_SLEEP); 2386 ntovp->attrcnt = 0; 2387 ntovp->vfsstat = FALSE; 2388 } 2389 2390 static void 2391 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 2392 struct nfs4_svgetit_arg *sargp) 2393 { 2394 int i; 2395 union nfs4_attr_u *na; 2396 uint8_t *amap; 2397 2398 /* 2399 * XXX Should do the same checks for whether the bit is set 2400 */ 2401 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2402 i < ntovp->attrcnt; i++, na++, amap++) { 2403 (void) (*nfs4_ntov_map[*amap].sv_getit)( 2404 NFS4ATTR_FREEIT, sargp, na); 2405 } 2406 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) { 2407 /* 2408 * xdr_free for getattr will be done later 2409 */ 2410 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2411 i < ntovp->attrcnt; i++, na++, amap++) { 2412 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na); 2413 } 2414 } 2415 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size); 2416 } 2417 2418 /* 2419 * do_rfs4_op_getattr gets the system attrs and converts into fattr4. 2420 */ 2421 static nfsstat4 2422 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp, 2423 struct nfs4_svgetit_arg *sargp) 2424 { 2425 int error = 0; 2426 int i, k; 2427 struct nfs4_ntov_table ntov; 2428 XDR xdr; 2429 ulong_t xdr_size; 2430 char *xdr_attrs; 2431 nfsstat4 status = NFS4_OK; 2432 nfsstat4 prev_rdattr_error = sargp->rdattr_error; 2433 union nfs4_attr_u *na; 2434 uint8_t *amap; 2435 2436 sargp->op = NFS4ATTR_GETIT; 2437 sargp->flag = 0; 2438 2439 fattrp->attrmask = 0; 2440 /* if no bits requested, then return empty fattr4 */ 2441 if (breq == 0) { 2442 fattrp->attrlist4_len = 0; 2443 fattrp->attrlist4 = NULL; 2444 return (NFS4_OK); 2445 } 2446 2447 /* 2448 * return NFS4ERR_INVAL when client requests write-only attrs 2449 */ 2450 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK)) 2451 return (NFS4ERR_INVAL); 2452 2453 nfs4_ntov_table_init(&ntov); 2454 na = ntov.na; 2455 amap = ntov.amap; 2456 2457 /* 2458 * Now loop to get or verify the attrs 2459 */ 2460 for (i = 0; i < nfs4_ntov_map_size; i++) { 2461 if (breq & nfs4_ntov_map[i].fbit) { 2462 if ((*nfs4_ntov_map[i].sv_getit)( 2463 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) { 2464 2465 error = (*nfs4_ntov_map[i].sv_getit)( 2466 NFS4ATTR_GETIT, sargp, na); 2467 2468 /* 2469 * Possible error values: 2470 * >0 if sv_getit failed to 2471 * get the attr; 0 if succeeded; 2472 * <0 if rdattr_error and the 2473 * attribute cannot be returned. 2474 */ 2475 if (error && !(sargp->rdattr_error_req)) 2476 goto done; 2477 /* 2478 * If error then just for entry 2479 */ 2480 if (error == 0) { 2481 fattrp->attrmask |= 2482 nfs4_ntov_map[i].fbit; 2483 *amap++ = 2484 (uint8_t)nfs4_ntov_map[i].nval; 2485 na++; 2486 (ntov.attrcnt)++; 2487 } else if ((error > 0) && 2488 (sargp->rdattr_error == NFS4_OK)) { 2489 sargp->rdattr_error = puterrno4(error); 2490 } 2491 error = 0; 2492 } 2493 } 2494 } 2495 2496 /* 2497 * If rdattr_error was set after the return value for it was assigned, 2498 * update it. 2499 */ 2500 if (prev_rdattr_error != sargp->rdattr_error) { 2501 na = ntov.na; 2502 amap = ntov.amap; 2503 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2504 k = *amap; 2505 if (k < FATTR4_RDATTR_ERROR) { 2506 continue; 2507 } 2508 if ((k == FATTR4_RDATTR_ERROR) && 2509 ((*nfs4_ntov_map[k].sv_getit)( 2510 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) { 2511 2512 (void) (*nfs4_ntov_map[k].sv_getit)( 2513 NFS4ATTR_GETIT, sargp, na); 2514 } 2515 break; 2516 } 2517 } 2518 2519 xdr_size = 0; 2520 na = ntov.na; 2521 amap = ntov.amap; 2522 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2523 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na); 2524 } 2525 2526 fattrp->attrlist4_len = xdr_size; 2527 if (xdr_size) { 2528 /* freed by rfs4_op_getattr_free() */ 2529 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP); 2530 2531 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE); 2532 2533 na = ntov.na; 2534 amap = ntov.amap; 2535 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2536 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) { 2537 DTRACE_PROBE1(nfss__e__getattr4_encfail, 2538 int, *amap); 2539 status = NFS4ERR_SERVERFAULT; 2540 break; 2541 } 2542 } 2543 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 2544 } else { 2545 fattrp->attrlist4 = NULL; 2546 } 2547 done: 2548 2549 nfs4_ntov_table_free(&ntov, sargp); 2550 2551 if (error != 0) 2552 status = puterrno4(error); 2553 2554 return (status); 2555 } 2556 2557 /* ARGSUSED */ 2558 static void 2559 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2560 struct compound_state *cs) 2561 { 2562 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 2563 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2564 struct nfs4_svgetit_arg sarg; 2565 struct statvfs64 sb; 2566 nfsstat4 status; 2567 2568 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 2569 GETATTR4args *, args); 2570 2571 if (cs->vp == NULL) { 2572 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2573 goto out; 2574 } 2575 2576 if (cs->access == CS_ACCESS_DENIED) { 2577 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2578 goto out; 2579 } 2580 2581 sarg.sbp = &sb; 2582 sarg.cs = cs; 2583 sarg.is_referral = B_FALSE; 2584 2585 status = bitmap4_to_attrmask(args->attr_request, &sarg); 2586 if (status == NFS4_OK) { 2587 2588 status = bitmap4_get_sysattrs(&sarg); 2589 if (status == NFS4_OK) { 2590 2591 /* Is this a referral? */ 2592 if (vn_is_nfs_reparse(cs->vp, cs->cr)) { 2593 /* Older V4 Solaris client sees a link */ 2594 if (client_is_downrev(req)) 2595 sarg.vap->va_type = VLNK; 2596 else 2597 sarg.is_referral = B_TRUE; 2598 } 2599 2600 status = do_rfs4_op_getattr(args->attr_request, 2601 &resp->obj_attributes, &sarg); 2602 } 2603 } 2604 *cs->statusp = resp->status = status; 2605 out: 2606 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 2607 GETATTR4res *, resp); 2608 } 2609 2610 static void 2611 rfs4_op_getattr_free(nfs_resop4 *resop) 2612 { 2613 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2614 2615 nfs4_fattr4_free(&resp->obj_attributes); 2616 } 2617 2618 /* ARGSUSED */ 2619 static void 2620 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2621 struct compound_state *cs) 2622 { 2623 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2624 2625 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs); 2626 2627 if (cs->vp == NULL) { 2628 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2629 goto out; 2630 } 2631 if (cs->access == CS_ACCESS_DENIED) { 2632 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2633 goto out; 2634 } 2635 2636 /* check for reparse point at the share point */ 2637 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) { 2638 /* it's all bad */ 2639 cs->exi->exi_moved = 1; 2640 *cs->statusp = resp->status = NFS4ERR_MOVED; 2641 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved, 2642 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2643 return; 2644 } 2645 2646 /* check for reparse point at vp */ 2647 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) { 2648 /* it's not all bad */ 2649 *cs->statusp = resp->status = NFS4ERR_MOVED; 2650 DTRACE_PROBE2(nfs4serv__func__referral__moved, 2651 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2652 return; 2653 } 2654 2655 resp->object.nfs_fh4_val = 2656 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 2657 nfs_fh4_copy(&cs->fh, &resp->object); 2658 *cs->statusp = resp->status = NFS4_OK; 2659 out: 2660 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 2661 GETFH4res *, resp); 2662 } 2663 2664 static void 2665 rfs4_op_getfh_free(nfs_resop4 *resop) 2666 { 2667 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2668 2669 if (resp->status == NFS4_OK && 2670 resp->object.nfs_fh4_val != NULL) { 2671 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len); 2672 resp->object.nfs_fh4_val = NULL; 2673 resp->object.nfs_fh4_len = 0; 2674 } 2675 } 2676 2677 /* 2678 * illegal: args: void 2679 * res : status (NFS4ERR_OP_ILLEGAL) 2680 */ 2681 /* ARGSUSED */ 2682 static void 2683 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, 2684 struct svc_req *req, struct compound_state *cs) 2685 { 2686 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal; 2687 2688 resop->resop = OP_ILLEGAL; 2689 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 2690 } 2691 2692 /* ARGSUSED */ 2693 static void 2694 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2695 struct compound_state *cs) 2696 { 2697 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP; 2698 } 2699 2700 /* 2701 * link: args: SAVED_FH: file, CURRENT_FH: target directory 2702 * res: status. If success - CURRENT_FH unchanged, return change_info 2703 */ 2704 /* ARGSUSED */ 2705 static void 2706 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2707 struct compound_state *cs) 2708 { 2709 LINK4args *args = &argop->nfs_argop4_u.oplink; 2710 LINK4res *resp = &resop->nfs_resop4_u.oplink; 2711 int error; 2712 vnode_t *vp; 2713 vnode_t *dvp; 2714 struct vattr bdva, idva, adva; 2715 char *nm; 2716 uint_t len; 2717 struct sockaddr *ca; 2718 char *name = NULL; 2719 nfsstat4 status; 2720 2721 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 2722 LINK4args *, args); 2723 2724 /* SAVED_FH: source object */ 2725 vp = cs->saved_vp; 2726 if (vp == NULL) { 2727 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2728 goto out; 2729 } 2730 2731 /* CURRENT_FH: target directory */ 2732 dvp = cs->vp; 2733 if (dvp == NULL) { 2734 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2735 goto out; 2736 } 2737 2738 /* 2739 * If there is a non-shared filesystem mounted on this vnode, 2740 * do not allow to link any file in this directory. 2741 */ 2742 if (vn_ismntpt(dvp)) { 2743 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2744 goto out; 2745 } 2746 2747 if (cs->access == CS_ACCESS_DENIED) { 2748 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2749 goto out; 2750 } 2751 2752 /* Check source object's type validity */ 2753 if (vp->v_type == VDIR) { 2754 *cs->statusp = resp->status = NFS4ERR_ISDIR; 2755 goto out; 2756 } 2757 2758 /* Check target directory's type */ 2759 if (dvp->v_type != VDIR) { 2760 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2761 goto out; 2762 } 2763 2764 if (cs->saved_exi != cs->exi) { 2765 *cs->statusp = resp->status = NFS4ERR_XDEV; 2766 goto out; 2767 } 2768 2769 status = utf8_dir_verify(&args->newname); 2770 if (status != NFS4_OK) { 2771 *cs->statusp = resp->status = status; 2772 goto out; 2773 } 2774 2775 nm = utf8_to_fn(&args->newname, &len, NULL); 2776 if (nm == NULL) { 2777 *cs->statusp = resp->status = NFS4ERR_INVAL; 2778 goto out; 2779 } 2780 2781 if (len > MAXNAMELEN) { 2782 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2783 kmem_free(nm, len); 2784 goto out; 2785 } 2786 2787 if (rdonly4(req, cs)) { 2788 *cs->statusp = resp->status = NFS4ERR_ROFS; 2789 kmem_free(nm, len); 2790 goto out; 2791 } 2792 2793 /* Get "before" change value */ 2794 bdva.va_mask = AT_CTIME|AT_SEQ; 2795 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 2796 if (error) { 2797 *cs->statusp = resp->status = puterrno4(error); 2798 kmem_free(nm, len); 2799 goto out; 2800 } 2801 2802 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2803 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 2804 MAXPATHLEN + 1); 2805 2806 if (name == NULL) { 2807 *cs->statusp = resp->status = NFS4ERR_INVAL; 2808 kmem_free(nm, len); 2809 goto out; 2810 } 2811 2812 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2813 2814 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0); 2815 2816 if (nm != name) 2817 kmem_free(name, MAXPATHLEN + 1); 2818 kmem_free(nm, len); 2819 2820 /* 2821 * Get the initial "after" sequence number, if it fails, set to zero 2822 */ 2823 idva.va_mask = AT_SEQ; 2824 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 2825 idva.va_seq = 0; 2826 2827 /* 2828 * Force modified data and metadata out to stable storage. 2829 */ 2830 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 2831 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 2832 2833 if (error) { 2834 *cs->statusp = resp->status = puterrno4(error); 2835 goto out; 2836 } 2837 2838 /* 2839 * Get "after" change value, if it fails, simply return the 2840 * before value. 2841 */ 2842 adva.va_mask = AT_CTIME|AT_SEQ; 2843 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 2844 adva.va_ctime = bdva.va_ctime; 2845 adva.va_seq = 0; 2846 } 2847 2848 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2849 2850 /* 2851 * The cinfo.atomic = TRUE only if we have 2852 * non-zero va_seq's, and it has incremented by exactly one 2853 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2854 */ 2855 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2856 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 2857 resp->cinfo.atomic = TRUE; 2858 else 2859 resp->cinfo.atomic = FALSE; 2860 2861 *cs->statusp = resp->status = NFS4_OK; 2862 out: 2863 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2864 LINK4res *, resp); 2865 } 2866 2867 /* 2868 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work. 2869 */ 2870 2871 /* ARGSUSED */ 2872 static nfsstat4 2873 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) 2874 { 2875 int error; 2876 int different_export = 0; 2877 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL; 2878 struct exportinfo *exi = NULL, *pre_exi = NULL; 2879 nfsstat4 stat; 2880 fid_t fid; 2881 int attrdir, dotdot, walk; 2882 bool_t is_newvp = FALSE; 2883 2884 if (cs->vp->v_flag & V_XATTRDIR) { 2885 attrdir = 1; 2886 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2887 } else { 2888 attrdir = 0; 2889 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2890 } 2891 2892 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2893 2894 /* 2895 * If dotdotting, then need to check whether it's 2896 * above the root of a filesystem, or above an 2897 * export point. 2898 */ 2899 if (dotdot) { 2900 vnode_t *zone_rootvp; 2901 2902 ASSERT(cs->exi != NULL); 2903 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp; 2904 /* 2905 * If dotdotting at the root of a filesystem, then 2906 * need to traverse back to the mounted-on filesystem 2907 * and do the dotdot lookup there. 2908 */ 2909 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) { 2910 2911 /* 2912 * If at the system root, then can 2913 * go up no further. 2914 */ 2915 if (VN_CMP(cs->vp, zone_rootvp)) 2916 return (puterrno4(ENOENT)); 2917 2918 /* 2919 * Traverse back to the mounted-on filesystem 2920 */ 2921 cs->vp = untraverse(cs->vp, zone_rootvp); 2922 2923 /* 2924 * Set the different_export flag so we remember 2925 * to pick up a new exportinfo entry for 2926 * this new filesystem. 2927 */ 2928 different_export = 1; 2929 } else { 2930 2931 /* 2932 * If dotdotting above an export point then set 2933 * the different_export to get new export info. 2934 */ 2935 different_export = nfs_exported(cs->exi, cs->vp); 2936 } 2937 } 2938 2939 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2940 NULL, NULL, NULL); 2941 if (error) 2942 return (puterrno4(error)); 2943 2944 /* 2945 * If the vnode is in a pseudo filesystem, check whether it is visible. 2946 * 2947 * XXX if the vnode is a symlink and it is not visible in 2948 * a pseudo filesystem, return ENOENT (not following symlink). 2949 * V4 client can not mount such symlink. This is a regression 2950 * from V2/V3. 2951 * 2952 * In the same exported filesystem, if the security flavor used 2953 * is not an explicitly shared flavor, limit the view to the visible 2954 * list entries only. This is not a WRONGSEC case because it's already 2955 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 2956 */ 2957 if (!different_export && 2958 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 2959 cs->access & CS_ACCESS_LIMITED)) { 2960 if (! nfs_visible(cs->exi, vp, &different_export)) { 2961 VN_RELE(vp); 2962 return (puterrno4(ENOENT)); 2963 } 2964 } 2965 2966 /* 2967 * If it's a mountpoint, then traverse it. 2968 */ 2969 if (vn_ismntpt(vp)) { 2970 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 2971 pre_tvp = vp; /* save pre-traversed vnode */ 2972 2973 /* 2974 * hold pre_tvp to counteract rele by traverse. We will 2975 * need pre_tvp below if checkexport4 fails 2976 */ 2977 VN_HOLD(pre_tvp); 2978 if ((error = traverse(&vp)) != 0) { 2979 VN_RELE(vp); 2980 VN_RELE(pre_tvp); 2981 return (puterrno4(error)); 2982 } 2983 different_export = 1; 2984 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 2985 /* 2986 * The vfsp comparison is to handle the case where 2987 * a LOFS mount is shared. lo_lookup traverses mount points, 2988 * and NFS is unaware of local fs transistions because 2989 * v_vfsmountedhere isn't set. For this special LOFS case, 2990 * the dir and the obj returned by lookup will have different 2991 * vfs ptrs. 2992 */ 2993 different_export = 1; 2994 } 2995 2996 if (different_export) { 2997 2998 bzero(&fid, sizeof (fid)); 2999 fid.fid_len = MAXFIDSZ; 3000 error = vop_fid_pseudo(vp, &fid); 3001 if (error) { 3002 VN_RELE(vp); 3003 if (pre_tvp) 3004 VN_RELE(pre_tvp); 3005 return (puterrno4(error)); 3006 } 3007 3008 if (dotdot) 3009 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 3010 else 3011 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 3012 3013 if (exi == NULL) { 3014 if (pre_tvp) { 3015 /* 3016 * If this vnode is a mounted-on vnode, 3017 * but the mounted-on file system is not 3018 * exported, send back the filehandle for 3019 * the mounted-on vnode, not the root of 3020 * the mounted-on file system. 3021 */ 3022 VN_RELE(vp); 3023 vp = pre_tvp; 3024 exi = pre_exi; 3025 } else { 3026 VN_RELE(vp); 3027 return (puterrno4(EACCES)); 3028 } 3029 } else if (pre_tvp) { 3030 /* we're done with pre_tvp now. release extra hold */ 3031 VN_RELE(pre_tvp); 3032 } 3033 3034 cs->exi = exi; 3035 3036 /* 3037 * Now we do a checkauth4. The reason is that 3038 * this client/user may not have access to the new 3039 * exported file system, and if they do, 3040 * the client/user may be mapped to a different uid. 3041 * 3042 * We start with a new cr, because the checkauth4 done 3043 * in the PUT*FH operation over wrote the cred's uid, 3044 * gid, etc, and we want the real thing before calling 3045 * checkauth4() 3046 */ 3047 crfree(cs->cr); 3048 cs->cr = crdup(cs->basecr); 3049 3050 oldvp = cs->vp; 3051 cs->vp = vp; 3052 is_newvp = TRUE; 3053 3054 stat = call_checkauth4(cs, req); 3055 if (stat != NFS4_OK) { 3056 VN_RELE(cs->vp); 3057 cs->vp = oldvp; 3058 return (stat); 3059 } 3060 } 3061 3062 /* 3063 * After various NFS checks, do a label check on the path 3064 * component. The label on this path should either be the 3065 * global zone's label or a zone's label. We are only 3066 * interested in the zone's label because exported files 3067 * in global zone is accessible (though read-only) to 3068 * clients. The exportability/visibility check is already 3069 * done before reaching this code. 3070 */ 3071 if (is_system_labeled()) { 3072 bslabel_t *clabel; 3073 3074 ASSERT(req->rq_label != NULL); 3075 clabel = req->rq_label; 3076 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 3077 "got client label from request(1)", struct svc_req *, req); 3078 3079 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3080 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3081 cs->exi)) { 3082 error = EACCES; 3083 goto err_out; 3084 } 3085 } else { 3086 /* 3087 * We grant access to admin_low label clients 3088 * only if the client is trusted, i.e. also 3089 * running Solaris Trusted Extension. 3090 */ 3091 struct sockaddr *ca; 3092 int addr_type; 3093 void *ipaddr; 3094 tsol_tpc_t *tp; 3095 3096 ca = (struct sockaddr *)svc_getrpccaller( 3097 req->rq_xprt)->buf; 3098 if (ca->sa_family == AF_INET) { 3099 addr_type = IPV4_VERSION; 3100 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 3101 } else if (ca->sa_family == AF_INET6) { 3102 addr_type = IPV6_VERSION; 3103 ipaddr = &((struct sockaddr_in6 *) 3104 ca)->sin6_addr; 3105 } 3106 tp = find_tpc(ipaddr, addr_type, B_FALSE); 3107 if (tp == NULL || tp->tpc_tp.tp_doi != 3108 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 3109 SUN_CIPSO) { 3110 if (tp != NULL) 3111 TPC_RELE(tp); 3112 error = EACCES; 3113 goto err_out; 3114 } 3115 TPC_RELE(tp); 3116 } 3117 } 3118 3119 error = makefh4(&cs->fh, vp, cs->exi); 3120 3121 err_out: 3122 if (error) { 3123 if (is_newvp) { 3124 VN_RELE(cs->vp); 3125 cs->vp = oldvp; 3126 } else 3127 VN_RELE(vp); 3128 return (puterrno4(error)); 3129 } 3130 3131 if (!is_newvp) { 3132 if (cs->vp) 3133 VN_RELE(cs->vp); 3134 cs->vp = vp; 3135 } else if (oldvp) 3136 VN_RELE(oldvp); 3137 3138 /* 3139 * if did lookup on attrdir and didn't lookup .., set named 3140 * attr fh flag 3141 */ 3142 if (attrdir && ! dotdot) 3143 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 3144 3145 /* Assume false for now, open proc will set this */ 3146 cs->mandlock = FALSE; 3147 3148 return (NFS4_OK); 3149 } 3150 3151 /* ARGSUSED */ 3152 static void 3153 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3154 struct compound_state *cs) 3155 { 3156 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 3157 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 3158 char *nm; 3159 uint_t len; 3160 struct sockaddr *ca; 3161 char *name = NULL; 3162 nfsstat4 status; 3163 3164 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 3165 LOOKUP4args *, args); 3166 3167 if (cs->vp == NULL) { 3168 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3169 goto out; 3170 } 3171 3172 if (cs->vp->v_type == VLNK) { 3173 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 3174 goto out; 3175 } 3176 3177 if (cs->vp->v_type != VDIR) { 3178 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3179 goto out; 3180 } 3181 3182 status = utf8_dir_verify(&args->objname); 3183 if (status != NFS4_OK) { 3184 *cs->statusp = resp->status = status; 3185 goto out; 3186 } 3187 3188 nm = utf8_to_str(&args->objname, &len, NULL); 3189 if (nm == NULL) { 3190 *cs->statusp = resp->status = NFS4ERR_INVAL; 3191 goto out; 3192 } 3193 3194 if (len > MAXNAMELEN) { 3195 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3196 kmem_free(nm, len); 3197 goto out; 3198 } 3199 3200 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3201 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 3202 MAXPATHLEN + 1); 3203 3204 if (name == NULL) { 3205 *cs->statusp = resp->status = NFS4ERR_INVAL; 3206 kmem_free(nm, len); 3207 goto out; 3208 } 3209 3210 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs); 3211 3212 if (name != nm) 3213 kmem_free(name, MAXPATHLEN + 1); 3214 kmem_free(nm, len); 3215 3216 out: 3217 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 3218 LOOKUP4res *, resp); 3219 } 3220 3221 /* ARGSUSED */ 3222 static void 3223 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3224 struct compound_state *cs) 3225 { 3226 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 3227 3228 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 3229 3230 if (cs->vp == NULL) { 3231 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3232 goto out; 3233 } 3234 3235 if (cs->vp->v_type == VLNK) { 3236 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 3237 goto out; 3238 } 3239 3240 if (cs->vp->v_type != VDIR) { 3241 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3242 goto out; 3243 } 3244 3245 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs); 3246 3247 /* 3248 * From NFSV4 Specification, LOOKUPP should not check for 3249 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 3250 */ 3251 if (resp->status == NFS4ERR_WRONGSEC) { 3252 *cs->statusp = resp->status = NFS4_OK; 3253 } 3254 3255 out: 3256 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 3257 LOOKUPP4res *, resp); 3258 } 3259 3260 3261 /*ARGSUSED2*/ 3262 static void 3263 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3264 struct compound_state *cs) 3265 { 3266 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 3267 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 3268 vnode_t *avp = NULL; 3269 int lookup_flags = LOOKUP_XATTR, error; 3270 int exp_ro = 0; 3271 3272 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 3273 OPENATTR4args *, args); 3274 3275 if (cs->vp == NULL) { 3276 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3277 goto out; 3278 } 3279 3280 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 && 3281 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) { 3282 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3283 goto out; 3284 } 3285 3286 /* 3287 * If file system supports passing ACE mask to VOP_ACCESS then 3288 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks 3289 */ 3290 3291 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS)) 3292 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS, 3293 V_ACE_MASK, cs->cr, NULL); 3294 else 3295 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) && 3296 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) && 3297 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0)); 3298 3299 if (error) { 3300 *cs->statusp = resp->status = puterrno4(EACCES); 3301 goto out; 3302 } 3303 3304 /* 3305 * The CREATE_XATTR_DIR VOP flag cannot be specified if 3306 * the file system is exported read-only -- regardless of 3307 * createdir flag. Otherwise the attrdir would be created 3308 * (assuming server fs isn't mounted readonly locally). If 3309 * VOP_LOOKUP returns ENOENT in this case, the error will 3310 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 3311 * because specfs has no VOP_LOOKUP op, so the macro would 3312 * return ENOSYS. EINVAL is returned by all (current) 3313 * Solaris file system implementations when any of their 3314 * restrictions are violated (xattr(dir) can't have xattrdir). 3315 * Returning NOTSUPP is more appropriate in this case 3316 * because the object will never be able to have an attrdir. 3317 */ 3318 if (args->createdir && ! (exp_ro = rdonly4(req, cs))) 3319 lookup_flags |= CREATE_XATTR_DIR; 3320 3321 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr, 3322 NULL, NULL, NULL); 3323 3324 if (error) { 3325 if (error == ENOENT && args->createdir && exp_ro) 3326 *cs->statusp = resp->status = puterrno4(EROFS); 3327 else if (error == EINVAL || error == ENOSYS) 3328 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3329 else 3330 *cs->statusp = resp->status = puterrno4(error); 3331 goto out; 3332 } 3333 3334 ASSERT(avp->v_flag & V_XATTRDIR); 3335 3336 error = makefh4(&cs->fh, avp, cs->exi); 3337 3338 if (error) { 3339 VN_RELE(avp); 3340 *cs->statusp = resp->status = puterrno4(error); 3341 goto out; 3342 } 3343 3344 VN_RELE(cs->vp); 3345 cs->vp = avp; 3346 3347 /* 3348 * There is no requirement for an attrdir fh flag 3349 * because the attrdir has a vnode flag to distinguish 3350 * it from regular (non-xattr) directories. The 3351 * FH4_ATTRDIR flag is set for future sanity checks. 3352 */ 3353 set_fh4_flag(&cs->fh, FH4_ATTRDIR); 3354 *cs->statusp = resp->status = NFS4_OK; 3355 3356 out: 3357 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 3358 OPENATTR4res *, resp); 3359 } 3360 3361 static int 3362 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 3363 caller_context_t *ct) 3364 { 3365 int error; 3366 int i; 3367 clock_t delaytime; 3368 3369 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 3370 3371 /* 3372 * Don't block on mandatory locks. If this routine returns 3373 * EAGAIN, the caller should return NFS4ERR_LOCKED. 3374 */ 3375 uio->uio_fmode = FNONBLOCK; 3376 3377 for (i = 0; i < rfs4_maxlock_tries; i++) { 3378 3379 3380 if (direction == FREAD) { 3381 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 3382 error = VOP_READ(vp, uio, ioflag, cred, ct); 3383 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 3384 } else { 3385 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 3386 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 3387 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 3388 } 3389 3390 if (error != EAGAIN) 3391 break; 3392 3393 if (i < rfs4_maxlock_tries - 1) { 3394 delay(delaytime); 3395 delaytime *= 2; 3396 } 3397 } 3398 3399 return (error); 3400 } 3401 3402 /* ARGSUSED */ 3403 static void 3404 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3405 struct compound_state *cs) 3406 { 3407 READ4args *args = &argop->nfs_argop4_u.opread; 3408 READ4res *resp = &resop->nfs_resop4_u.opread; 3409 int error; 3410 int verror; 3411 vnode_t *vp; 3412 struct vattr va; 3413 struct iovec iov, *iovp = NULL; 3414 int iovcnt; 3415 struct uio uio; 3416 u_offset_t offset; 3417 bool_t *deleg = &cs->deleg; 3418 nfsstat4 stat; 3419 int in_crit = 0; 3420 mblk_t *mp = NULL; 3421 int alloc_err = 0; 3422 int rdma_used = 0; 3423 int loaned_buffers; 3424 caller_context_t ct; 3425 struct uio *uiop; 3426 3427 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 3428 READ4args, args); 3429 3430 vp = cs->vp; 3431 if (vp == NULL) { 3432 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3433 goto out; 3434 } 3435 if (cs->access == CS_ACCESS_DENIED) { 3436 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3437 goto out; 3438 } 3439 3440 get_stateid4(cs, &args->stateid); 3441 3442 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE, 3443 deleg, TRUE, &ct, cs)) != NFS4_OK) { 3444 *cs->statusp = resp->status = stat; 3445 goto out; 3446 } 3447 3448 /* 3449 * Enter the critical region before calling VOP_RWLOCK 3450 * to avoid a deadlock with write requests. 3451 */ 3452 if (nbl_need_check(vp)) { 3453 nbl_start_crit(vp, RW_READER); 3454 in_crit = 1; 3455 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0, 3456 &ct)) { 3457 *cs->statusp = resp->status = NFS4ERR_LOCKED; 3458 goto out; 3459 } 3460 } 3461 3462 if (args->wlist) { 3463 if (args->count > clist_len(args->wlist)) { 3464 *cs->statusp = resp->status = NFS4ERR_INVAL; 3465 goto out; 3466 } 3467 rdma_used = 1; 3468 } 3469 3470 /* use loaned buffers for TCP */ 3471 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0; 3472 3473 va.va_mask = AT_MODE|AT_SIZE|AT_UID; 3474 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3475 3476 /* 3477 * If we can't get the attributes, then we can't do the 3478 * right access checking. So, we'll fail the request. 3479 */ 3480 if (verror) { 3481 *cs->statusp = resp->status = puterrno4(verror); 3482 goto out; 3483 } 3484 3485 if (vp->v_type != VREG) { 3486 *cs->statusp = resp->status = 3487 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 3488 goto out; 3489 } 3490 3491 if (crgetuid(cs->cr) != va.va_uid && 3492 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) && 3493 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) { 3494 *cs->statusp = resp->status = puterrno4(error); 3495 goto out; 3496 } 3497 3498 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */ 3499 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3500 goto out; 3501 } 3502 3503 offset = args->offset; 3504 if (offset >= va.va_size) { 3505 *cs->statusp = resp->status = NFS4_OK; 3506 resp->eof = TRUE; 3507 resp->data_len = 0; 3508 resp->data_val = NULL; 3509 resp->mblk = NULL; 3510 /* RDMA */ 3511 resp->wlist = args->wlist; 3512 resp->wlist_len = resp->data_len; 3513 *cs->statusp = resp->status = NFS4_OK; 3514 if (resp->wlist) 3515 clist_zero_len(resp->wlist); 3516 goto out; 3517 } 3518 3519 if (args->count == 0) { 3520 *cs->statusp = resp->status = NFS4_OK; 3521 resp->eof = FALSE; 3522 resp->data_len = 0; 3523 resp->data_val = NULL; 3524 resp->mblk = NULL; 3525 /* RDMA */ 3526 resp->wlist = args->wlist; 3527 resp->wlist_len = resp->data_len; 3528 if (resp->wlist) 3529 clist_zero_len(resp->wlist); 3530 goto out; 3531 } 3532 3533 /* 3534 * Do not allocate memory more than maximum allowed 3535 * transfer size 3536 */ 3537 if (args->count > rfs4_tsize(req)) 3538 args->count = rfs4_tsize(req); 3539 3540 if (loaned_buffers) { 3541 uiop = (uio_t *)rfs_setup_xuio(vp); 3542 ASSERT(uiop != NULL); 3543 uiop->uio_segflg = UIO_SYSSPACE; 3544 uiop->uio_loffset = args->offset; 3545 uiop->uio_resid = args->count; 3546 3547 /* Jump to do the read if successful */ 3548 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) { 3549 /* 3550 * Need to hold the vnode until after VOP_RETZCBUF() 3551 * is called. 3552 */ 3553 VN_HOLD(vp); 3554 goto doio_read; 3555 } 3556 3557 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int, 3558 uiop->uio_loffset, int, uiop->uio_resid); 3559 3560 uiop->uio_extflg = 0; 3561 3562 /* failure to setup for zero copy */ 3563 rfs_free_xuio((void *)uiop); 3564 loaned_buffers = 0; 3565 } 3566 3567 /* 3568 * If returning data via RDMA Write, then grab the chunk list. If we 3569 * aren't returning READ data w/RDMA_WRITE, then grab a mblk. 3570 */ 3571 if (rdma_used) { 3572 mp = NULL; 3573 (void) rdma_get_wchunk(req, &iov, args->wlist); 3574 uio.uio_iov = &iov; 3575 uio.uio_iovcnt = 1; 3576 } else { 3577 /* 3578 * mp will contain the data to be sent out in the read reply. 3579 * It will be freed after the reply has been sent. 3580 */ 3581 mp = rfs_read_alloc(args->count, &iovp, &iovcnt); 3582 ASSERT(mp != NULL); 3583 ASSERT(alloc_err == 0); 3584 uio.uio_iov = iovp; 3585 uio.uio_iovcnt = iovcnt; 3586 } 3587 3588 uio.uio_segflg = UIO_SYSSPACE; 3589 uio.uio_extflg = UIO_COPY_CACHED; 3590 uio.uio_loffset = args->offset; 3591 uio.uio_resid = args->count; 3592 uiop = &uio; 3593 3594 doio_read: 3595 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct); 3596 3597 va.va_mask = AT_SIZE; 3598 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3599 3600 if (error) { 3601 if (mp) 3602 freemsg(mp); 3603 *cs->statusp = resp->status = puterrno4(error); 3604 goto out; 3605 } 3606 3607 /* make mblk using zc buffers */ 3608 if (loaned_buffers) { 3609 mp = uio_to_mblk(uiop); 3610 ASSERT(mp != NULL); 3611 } 3612 3613 *cs->statusp = resp->status = NFS4_OK; 3614 3615 ASSERT(uiop->uio_resid >= 0); 3616 resp->data_len = args->count - uiop->uio_resid; 3617 if (mp) { 3618 resp->data_val = (char *)mp->b_datap->db_base; 3619 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers); 3620 } else { 3621 resp->data_val = (caddr_t)iov.iov_base; 3622 } 3623 3624 resp->mblk = mp; 3625 3626 if (!verror && offset + resp->data_len == va.va_size) 3627 resp->eof = TRUE; 3628 else 3629 resp->eof = FALSE; 3630 3631 if (rdma_used) { 3632 if (!rdma_setup_read_data4(args, resp)) { 3633 *cs->statusp = resp->status = NFS4ERR_INVAL; 3634 } 3635 } else { 3636 resp->wlist = NULL; 3637 } 3638 3639 out: 3640 if (in_crit) 3641 nbl_end_crit(vp); 3642 3643 if (iovp != NULL) 3644 kmem_free(iovp, iovcnt * sizeof (struct iovec)); 3645 3646 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 3647 READ4res *, resp); 3648 } 3649 3650 static void 3651 rfs4_op_read_free(nfs_resop4 *resop) 3652 { 3653 READ4res *resp = &resop->nfs_resop4_u.opread; 3654 3655 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3656 freemsg(resp->mblk); 3657 resp->mblk = NULL; 3658 resp->data_val = NULL; 3659 resp->data_len = 0; 3660 } 3661 } 3662 3663 static void 3664 rfs4_op_readdir_free(nfs_resop4 * resop) 3665 { 3666 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir; 3667 3668 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3669 freeb(resp->mblk); 3670 resp->mblk = NULL; 3671 resp->data_len = 0; 3672 } 3673 } 3674 3675 3676 /* ARGSUSED */ 3677 static void 3678 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3679 struct compound_state *cs) 3680 { 3681 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 3682 int error; 3683 vnode_t *vp; 3684 struct exportinfo *exi, *sav_exi; 3685 nfs_fh4_fmt_t *fh_fmtp; 3686 nfs_export_t *ne = nfs_get_export(); 3687 3688 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 3689 3690 if (cs->vp) { 3691 VN_RELE(cs->vp); 3692 cs->vp = NULL; 3693 } 3694 3695 if (cs->cr) 3696 crfree(cs->cr); 3697 3698 cs->cr = crdup(cs->basecr); 3699 3700 vp = ne->exi_public->exi_vp; 3701 if (vp == NULL) { 3702 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3703 goto out; 3704 } 3705 3706 error = makefh4(&cs->fh, vp, ne->exi_public); 3707 if (error != 0) { 3708 *cs->statusp = resp->status = puterrno4(error); 3709 goto out; 3710 } 3711 sav_exi = cs->exi; 3712 if (ne->exi_public == ne->exi_root) { 3713 /* 3714 * No filesystem is actually shared public, so we default 3715 * to exi_root. In this case, we must check whether root 3716 * is exported. 3717 */ 3718 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val; 3719 3720 /* 3721 * if root filesystem is exported, the exportinfo struct that we 3722 * should use is what checkexport4 returns, because root_exi is 3723 * actually a mostly empty struct. 3724 */ 3725 exi = checkexport4(&fh_fmtp->fh4_fsid, 3726 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 3727 cs->exi = ((exi != NULL) ? exi : ne->exi_public); 3728 } else { 3729 /* 3730 * it's a properly shared filesystem 3731 */ 3732 cs->exi = ne->exi_public; 3733 } 3734 3735 if (is_system_labeled()) { 3736 bslabel_t *clabel; 3737 3738 ASSERT(req->rq_label != NULL); 3739 clabel = req->rq_label; 3740 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *, 3741 "got client label from request(1)", 3742 struct svc_req *, req); 3743 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3744 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3745 cs->exi)) { 3746 *cs->statusp = resp->status = 3747 NFS4ERR_SERVERFAULT; 3748 goto out; 3749 } 3750 } 3751 } 3752 3753 VN_HOLD(vp); 3754 cs->vp = vp; 3755 3756 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3757 VN_RELE(cs->vp); 3758 cs->vp = NULL; 3759 cs->exi = sav_exi; 3760 goto out; 3761 } 3762 3763 *cs->statusp = resp->status = NFS4_OK; 3764 out: 3765 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 3766 PUTPUBFH4res *, resp); 3767 } 3768 3769 /* 3770 * XXX - issue with put*fh operations. Suppose /export/home is exported. 3771 * Suppose an NFS client goes to mount /export/home/joe. If /export, home, 3772 * or joe have restrictive search permissions, then we shouldn't let 3773 * the client get a file handle. This is easy to enforce. However, we 3774 * don't know what security flavor should be used until we resolve the 3775 * path name. Another complication is uid mapping. If root is 3776 * the user, then it will be mapped to the anonymous user by default, 3777 * but we won't know that till we've resolved the path name. And we won't 3778 * know what the anonymous user is. 3779 * Luckily, SECINFO is specified to take a full filename. 3780 * So what we will have to in rfs4_op_lookup is check that flavor of 3781 * the target object matches that of the request, and if root was the 3782 * caller, check for the root= and anon= options, and if necessary, 3783 * repeat the lookup using the right cred_t. But that's not done yet. 3784 */ 3785 /* ARGSUSED */ 3786 static void 3787 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3788 struct compound_state *cs) 3789 { 3790 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 3791 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 3792 nfs_fh4_fmt_t *fh_fmtp; 3793 3794 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 3795 PUTFH4args *, args); 3796 3797 if (cs->vp) { 3798 VN_RELE(cs->vp); 3799 cs->vp = NULL; 3800 } 3801 3802 if (cs->cr) { 3803 crfree(cs->cr); 3804 cs->cr = NULL; 3805 } 3806 3807 3808 if (args->object.nfs_fh4_len < NFS_FH4_LEN) { 3809 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 3810 goto out; 3811 } 3812 3813 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val; 3814 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, 3815 NULL); 3816 3817 if (cs->exi == NULL) { 3818 *cs->statusp = resp->status = NFS4ERR_STALE; 3819 goto out; 3820 } 3821 3822 cs->cr = crdup(cs->basecr); 3823 3824 ASSERT(cs->cr != NULL); 3825 3826 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) { 3827 *cs->statusp = resp->status; 3828 goto out; 3829 } 3830 3831 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3832 VN_RELE(cs->vp); 3833 cs->vp = NULL; 3834 goto out; 3835 } 3836 3837 nfs_fh4_copy(&args->object, &cs->fh); 3838 *cs->statusp = resp->status = NFS4_OK; 3839 cs->deleg = FALSE; 3840 3841 out: 3842 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 3843 PUTFH4res *, resp); 3844 } 3845 3846 /* ARGSUSED */ 3847 static void 3848 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3849 struct compound_state *cs) 3850 { 3851 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 3852 int error; 3853 fid_t fid; 3854 struct exportinfo *exi, *sav_exi; 3855 3856 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 3857 3858 if (cs->vp) { 3859 VN_RELE(cs->vp); 3860 cs->vp = NULL; 3861 } 3862 3863 if (cs->cr) 3864 crfree(cs->cr); 3865 3866 cs->cr = crdup(cs->basecr); 3867 3868 /* 3869 * Using rootdir, the system root vnode, 3870 * get its fid. 3871 */ 3872 bzero(&fid, sizeof (fid)); 3873 fid.fid_len = MAXFIDSZ; 3874 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid); 3875 if (error != 0) { 3876 *cs->statusp = resp->status = puterrno4(error); 3877 goto out; 3878 } 3879 3880 /* 3881 * Then use the root fsid & fid it to find out if it's exported 3882 * 3883 * If the server root isn't exported directly, then 3884 * it should at least be a pseudo export based on 3885 * one or more exports further down in the server's 3886 * file tree. 3887 */ 3888 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL); 3889 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 3890 NFS4_DEBUG(rfs4_debug, 3891 (CE_WARN, "rfs4_op_putrootfh: export check failure")); 3892 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3893 goto out; 3894 } 3895 3896 /* 3897 * Now make a filehandle based on the root 3898 * export and root vnode. 3899 */ 3900 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi); 3901 if (error != 0) { 3902 *cs->statusp = resp->status = puterrno4(error); 3903 goto out; 3904 } 3905 3906 sav_exi = cs->exi; 3907 cs->exi = exi; 3908 3909 VN_HOLD(ZONE_ROOTVP()); 3910 cs->vp = ZONE_ROOTVP(); 3911 3912 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3913 VN_RELE(cs->vp); 3914 cs->vp = NULL; 3915 cs->exi = sav_exi; 3916 goto out; 3917 } 3918 3919 *cs->statusp = resp->status = NFS4_OK; 3920 cs->deleg = FALSE; 3921 out: 3922 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 3923 PUTROOTFH4res *, resp); 3924 } 3925 3926 /* 3927 * readlink: args: CURRENT_FH. 3928 * res: status. If success - CURRENT_FH unchanged, return linktext. 3929 */ 3930 3931 /* ARGSUSED */ 3932 static void 3933 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3934 struct compound_state *cs) 3935 { 3936 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3937 int error; 3938 vnode_t *vp; 3939 struct iovec iov; 3940 struct vattr va; 3941 struct uio uio; 3942 char *data; 3943 struct sockaddr *ca; 3944 char *name = NULL; 3945 int is_referral; 3946 3947 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 3948 3949 /* CURRENT_FH: directory */ 3950 vp = cs->vp; 3951 if (vp == NULL) { 3952 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3953 goto out; 3954 } 3955 3956 if (cs->access == CS_ACCESS_DENIED) { 3957 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3958 goto out; 3959 } 3960 3961 /* Is it a referral? */ 3962 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) { 3963 3964 is_referral = 1; 3965 3966 } else { 3967 3968 is_referral = 0; 3969 3970 if (vp->v_type == VDIR) { 3971 *cs->statusp = resp->status = NFS4ERR_ISDIR; 3972 goto out; 3973 } 3974 3975 if (vp->v_type != VLNK) { 3976 *cs->statusp = resp->status = NFS4ERR_INVAL; 3977 goto out; 3978 } 3979 3980 } 3981 3982 va.va_mask = AT_MODE; 3983 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 3984 if (error) { 3985 *cs->statusp = resp->status = puterrno4(error); 3986 goto out; 3987 } 3988 3989 if (MANDLOCK(vp, va.va_mode)) { 3990 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3991 goto out; 3992 } 3993 3994 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 3995 3996 if (is_referral) { 3997 char *s; 3998 size_t strsz; 3999 kstat_named_t *stat = 4000 cs->exi->exi_ne->ne_globals->svstat[NFS_V4]; 4001 4002 /* Get an artificial symlink based on a referral */ 4003 s = build_symlink(vp, cs->cr, &strsz); 4004 stat[NFS_REFERLINKS].value.ui64++; 4005 DTRACE_PROBE2(nfs4serv__func__referral__reflink, 4006 vnode_t *, vp, char *, s); 4007 if (s == NULL) 4008 error = EINVAL; 4009 else { 4010 error = 0; 4011 (void) strlcpy(data, s, MAXPATHLEN + 1); 4012 kmem_free(s, strsz); 4013 } 4014 4015 } else { 4016 4017 iov.iov_base = data; 4018 iov.iov_len = MAXPATHLEN; 4019 uio.uio_iov = &iov; 4020 uio.uio_iovcnt = 1; 4021 uio.uio_segflg = UIO_SYSSPACE; 4022 uio.uio_extflg = UIO_COPY_CACHED; 4023 uio.uio_loffset = 0; 4024 uio.uio_resid = MAXPATHLEN; 4025 4026 error = VOP_READLINK(vp, &uio, cs->cr, NULL); 4027 4028 if (!error) 4029 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 4030 } 4031 4032 if (error) { 4033 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4034 *cs->statusp = resp->status = puterrno4(error); 4035 goto out; 4036 } 4037 4038 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4039 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND, 4040 MAXPATHLEN + 1); 4041 4042 if (name == NULL) { 4043 /* 4044 * Even though the conversion failed, we return 4045 * something. We just don't translate it. 4046 */ 4047 name = data; 4048 } 4049 4050 /* 4051 * treat link name as data 4052 */ 4053 (void) str_to_utf8(name, (utf8string *)&resp->link); 4054 4055 if (name != data) 4056 kmem_free(name, MAXPATHLEN + 1); 4057 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4058 *cs->statusp = resp->status = NFS4_OK; 4059 4060 out: 4061 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 4062 READLINK4res *, resp); 4063 } 4064 4065 static void 4066 rfs4_op_readlink_free(nfs_resop4 *resop) 4067 { 4068 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 4069 utf8string *symlink = (utf8string *)&resp->link; 4070 4071 if (symlink->utf8string_val) { 4072 UTF8STRING_FREE(*symlink) 4073 } 4074 } 4075 4076 /* 4077 * release_lockowner: 4078 * Release any state associated with the supplied 4079 * lockowner. Note if any lo_state is holding locks we will not 4080 * rele that lo_state and thus the lockowner will not be destroyed. 4081 * A client using lock after the lock owner stateid has been released 4082 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have 4083 * to reissue the lock with new_lock_owner set to TRUE. 4084 * args: lock_owner 4085 * res: status 4086 */ 4087 /* ARGSUSED */ 4088 static void 4089 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop, 4090 struct svc_req *req, struct compound_state *cs) 4091 { 4092 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner; 4093 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner; 4094 rfs4_lockowner_t *lo; 4095 rfs4_openowner_t *oo; 4096 rfs4_state_t *sp; 4097 rfs4_lo_state_t *lsp; 4098 rfs4_client_t *cp; 4099 bool_t create = FALSE; 4100 locklist_t *llist; 4101 sysid_t sysid; 4102 4103 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *, 4104 cs, RELEASE_LOCKOWNER4args *, ap); 4105 4106 /* Make sure there is a clientid around for this request */ 4107 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE); 4108 4109 if (cp == NULL) { 4110 *cs->statusp = resp->status = 4111 rfs4_check_clientid(&ap->lock_owner.clientid, 0); 4112 goto out; 4113 } 4114 rfs4_client_rele(cp); 4115 4116 lo = rfs4_findlockowner(&ap->lock_owner, &create); 4117 if (lo == NULL) { 4118 *cs->statusp = resp->status = NFS4_OK; 4119 goto out; 4120 } 4121 ASSERT(lo->rl_client != NULL); 4122 4123 /* 4124 * Check for EXPIRED client. If so will reap state with in a lease 4125 * period or on next set_clientid_confirm step 4126 */ 4127 if (rfs4_lease_expired(lo->rl_client)) { 4128 rfs4_lockowner_rele(lo); 4129 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 4130 goto out; 4131 } 4132 4133 /* 4134 * If no sysid has been assigned, then no locks exist; just return. 4135 */ 4136 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4137 if (lo->rl_client->rc_sysidt == LM_NOSYSID) { 4138 rfs4_lockowner_rele(lo); 4139 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4140 goto out; 4141 } 4142 4143 sysid = lo->rl_client->rc_sysidt; 4144 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4145 4146 /* 4147 * Mark the lockowner invalid. 4148 */ 4149 rfs4_dbe_hide(lo->rl_dbe); 4150 4151 /* 4152 * sysid-pid pair should now not be used since the lockowner is 4153 * invalid. If the client were to instantiate the lockowner again 4154 * it would be assigned a new pid. Thus we can get the list of 4155 * current locks. 4156 */ 4157 4158 llist = flk_get_active_locks(sysid, lo->rl_pid); 4159 /* If we are still holding locks fail */ 4160 if (llist != NULL) { 4161 4162 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD; 4163 4164 flk_free_locklist(llist); 4165 /* 4166 * We need to unhide the lockowner so the client can 4167 * try it again. The bad thing here is if the client 4168 * has a logic error that took it here in the first place 4169 * they probably have lost accounting of the locks that it 4170 * is holding. So we may have dangling state until the 4171 * open owner state is reaped via close. One scenario 4172 * that could possibly occur is that the client has 4173 * sent the unlock request(s) in separate threads 4174 * and has not waited for the replies before sending the 4175 * RELEASE_LOCKOWNER request. Presumably, it would expect 4176 * and deal appropriately with NFS4ERR_LOCKS_HELD, by 4177 * reissuing the request. 4178 */ 4179 rfs4_dbe_unhide(lo->rl_dbe); 4180 rfs4_lockowner_rele(lo); 4181 goto out; 4182 } 4183 4184 /* 4185 * For the corresponding client we need to check each open 4186 * owner for any opens that have lockowner state associated 4187 * with this lockowner. 4188 */ 4189 4190 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4191 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL; 4192 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) { 4193 4194 rfs4_dbe_lock(oo->ro_dbe); 4195 for (sp = list_head(&oo->ro_statelist); sp != NULL; 4196 sp = list_next(&oo->ro_statelist, sp)) { 4197 4198 rfs4_dbe_lock(sp->rs_dbe); 4199 for (lsp = list_head(&sp->rs_lostatelist); 4200 lsp != NULL; 4201 lsp = list_next(&sp->rs_lostatelist, lsp)) { 4202 if (lsp->rls_locker == lo) { 4203 rfs4_dbe_lock(lsp->rls_dbe); 4204 rfs4_dbe_invalidate(lsp->rls_dbe); 4205 rfs4_dbe_unlock(lsp->rls_dbe); 4206 } 4207 } 4208 rfs4_dbe_unlock(sp->rs_dbe); 4209 } 4210 rfs4_dbe_unlock(oo->ro_dbe); 4211 } 4212 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4213 4214 rfs4_lockowner_rele(lo); 4215 4216 *cs->statusp = resp->status = NFS4_OK; 4217 4218 out: 4219 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *, 4220 cs, RELEASE_LOCKOWNER4res *, resp); 4221 } 4222 4223 /* 4224 * short utility function to lookup a file and recall the delegation 4225 */ 4226 static rfs4_file_t * 4227 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 4228 int *lkup_error, cred_t *cr) 4229 { 4230 vnode_t *vp; 4231 rfs4_file_t *fp = NULL; 4232 bool_t fcreate = FALSE; 4233 int error; 4234 4235 if (vpp) 4236 *vpp = NULL; 4237 4238 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL, 4239 NULL)) == 0) { 4240 if (vp->v_type == VREG) 4241 fp = rfs4_findfile(vp, NULL, &fcreate); 4242 if (vpp) 4243 *vpp = vp; 4244 else 4245 VN_RELE(vp); 4246 } 4247 4248 if (lkup_error) 4249 *lkup_error = error; 4250 4251 return (fp); 4252 } 4253 4254 /* 4255 * remove: args: CURRENT_FH: directory; name. 4256 * res: status. If success - CURRENT_FH unchanged, return change_info 4257 * for directory. 4258 */ 4259 /* ARGSUSED */ 4260 static void 4261 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4262 struct compound_state *cs) 4263 { 4264 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 4265 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 4266 int error; 4267 vnode_t *dvp, *vp; 4268 struct vattr bdva, idva, adva; 4269 char *nm; 4270 uint_t len; 4271 rfs4_file_t *fp; 4272 int in_crit = 0; 4273 bslabel_t *clabel; 4274 struct sockaddr *ca; 4275 char *name = NULL; 4276 nfsstat4 status; 4277 4278 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 4279 REMOVE4args *, args); 4280 4281 /* CURRENT_FH: directory */ 4282 dvp = cs->vp; 4283 if (dvp == NULL) { 4284 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4285 goto out; 4286 } 4287 4288 if (cs->access == CS_ACCESS_DENIED) { 4289 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4290 goto out; 4291 } 4292 4293 /* 4294 * If there is an unshared filesystem mounted on this vnode, 4295 * Do not allow to remove anything in this directory. 4296 */ 4297 if (vn_ismntpt(dvp)) { 4298 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4299 goto out; 4300 } 4301 4302 if (dvp->v_type != VDIR) { 4303 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4304 goto out; 4305 } 4306 4307 status = utf8_dir_verify(&args->target); 4308 if (status != NFS4_OK) { 4309 *cs->statusp = resp->status = status; 4310 goto out; 4311 } 4312 4313 /* 4314 * Lookup the file so that we can check if it's a directory 4315 */ 4316 nm = utf8_to_fn(&args->target, &len, NULL); 4317 if (nm == NULL) { 4318 *cs->statusp = resp->status = NFS4ERR_INVAL; 4319 goto out; 4320 } 4321 4322 if (len > MAXNAMELEN) { 4323 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4324 kmem_free(nm, len); 4325 goto out; 4326 } 4327 4328 if (rdonly4(req, cs)) { 4329 *cs->statusp = resp->status = NFS4ERR_ROFS; 4330 kmem_free(nm, len); 4331 goto out; 4332 } 4333 4334 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4335 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 4336 MAXPATHLEN + 1); 4337 4338 if (name == NULL) { 4339 *cs->statusp = resp->status = NFS4ERR_INVAL; 4340 kmem_free(nm, len); 4341 goto out; 4342 } 4343 4344 /* 4345 * Lookup the file to determine type and while we are see if 4346 * there is a file struct around and check for delegation. 4347 * We don't need to acquire va_seq before this lookup, if 4348 * it causes an update, cinfo.before will not match, which will 4349 * trigger a cache flush even if atomic is TRUE. 4350 */ 4351 fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr); 4352 if (fp != NULL) { 4353 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4354 NULL)) { 4355 VN_RELE(vp); 4356 rfs4_file_rele(fp); 4357 *cs->statusp = resp->status = NFS4ERR_DELAY; 4358 if (nm != name) 4359 kmem_free(name, MAXPATHLEN + 1); 4360 kmem_free(nm, len); 4361 goto out; 4362 } 4363 } 4364 4365 /* Didn't find anything to remove */ 4366 if (vp == NULL) { 4367 *cs->statusp = resp->status = error; 4368 if (nm != name) 4369 kmem_free(name, MAXPATHLEN + 1); 4370 kmem_free(nm, len); 4371 goto out; 4372 } 4373 4374 if (nbl_need_check(vp)) { 4375 nbl_start_crit(vp, RW_READER); 4376 in_crit = 1; 4377 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { 4378 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4379 if (nm != name) 4380 kmem_free(name, MAXPATHLEN + 1); 4381 kmem_free(nm, len); 4382 nbl_end_crit(vp); 4383 VN_RELE(vp); 4384 if (fp) { 4385 rfs4_clear_dont_grant(fp); 4386 rfs4_file_rele(fp); 4387 } 4388 goto out; 4389 } 4390 } 4391 4392 /* check label before allowing removal */ 4393 if (is_system_labeled()) { 4394 ASSERT(req->rq_label != NULL); 4395 clabel = req->rq_label; 4396 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 4397 "got client label from request(1)", 4398 struct svc_req *, req); 4399 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4400 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 4401 cs->exi)) { 4402 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4403 if (name != nm) 4404 kmem_free(name, MAXPATHLEN + 1); 4405 kmem_free(nm, len); 4406 if (in_crit) 4407 nbl_end_crit(vp); 4408 VN_RELE(vp); 4409 if (fp) { 4410 rfs4_clear_dont_grant(fp); 4411 rfs4_file_rele(fp); 4412 } 4413 goto out; 4414 } 4415 } 4416 } 4417 4418 /* Get dir "before" change value */ 4419 bdva.va_mask = AT_CTIME|AT_SEQ; 4420 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 4421 if (error) { 4422 *cs->statusp = resp->status = puterrno4(error); 4423 if (nm != name) 4424 kmem_free(name, MAXPATHLEN + 1); 4425 kmem_free(nm, len); 4426 if (in_crit) 4427 nbl_end_crit(vp); 4428 VN_RELE(vp); 4429 if (fp) { 4430 rfs4_clear_dont_grant(fp); 4431 rfs4_file_rele(fp); 4432 } 4433 goto out; 4434 } 4435 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 4436 4437 /* Actually do the REMOVE operation */ 4438 if (vp->v_type == VDIR) { 4439 /* 4440 * Can't remove a directory that has a mounted-on filesystem. 4441 */ 4442 if (vn_ismntpt(vp)) { 4443 error = EACCES; 4444 } else { 4445 /* 4446 * System V defines rmdir to return EEXIST, 4447 * not ENOTEMPTY, if the directory is not 4448 * empty. A System V NFS server needs to map 4449 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 4450 * transmit over the wire. 4451 */ 4452 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr, 4453 NULL, 0)) == EEXIST) 4454 error = ENOTEMPTY; 4455 } 4456 } else { 4457 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 && 4458 fp != NULL) { 4459 struct vattr va; 4460 vnode_t *tvp; 4461 4462 rfs4_dbe_lock(fp->rf_dbe); 4463 tvp = fp->rf_vp; 4464 if (tvp) 4465 VN_HOLD(tvp); 4466 rfs4_dbe_unlock(fp->rf_dbe); 4467 4468 if (tvp) { 4469 /* 4470 * This is va_seq safe because we are not 4471 * manipulating dvp. 4472 */ 4473 va.va_mask = AT_NLINK; 4474 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4475 va.va_nlink == 0) { 4476 /* Remove state on file remove */ 4477 if (in_crit) { 4478 nbl_end_crit(vp); 4479 in_crit = 0; 4480 } 4481 rfs4_close_all_state(fp); 4482 } 4483 VN_RELE(tvp); 4484 } 4485 } 4486 } 4487 4488 if (in_crit) 4489 nbl_end_crit(vp); 4490 VN_RELE(vp); 4491 4492 if (fp) { 4493 rfs4_clear_dont_grant(fp); 4494 rfs4_file_rele(fp); 4495 } 4496 if (nm != name) 4497 kmem_free(name, MAXPATHLEN + 1); 4498 kmem_free(nm, len); 4499 4500 if (error) { 4501 *cs->statusp = resp->status = puterrno4(error); 4502 goto out; 4503 } 4504 4505 /* 4506 * Get the initial "after" sequence number, if it fails, set to zero 4507 */ 4508 idva.va_mask = AT_SEQ; 4509 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 4510 idva.va_seq = 0; 4511 4512 /* 4513 * Force modified data and metadata out to stable storage. 4514 */ 4515 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 4516 4517 /* 4518 * Get "after" change value, if it fails, simply return the 4519 * before value. 4520 */ 4521 adva.va_mask = AT_CTIME|AT_SEQ; 4522 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 4523 adva.va_ctime = bdva.va_ctime; 4524 adva.va_seq = 0; 4525 } 4526 4527 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 4528 4529 /* 4530 * The cinfo.atomic = TRUE only if we have 4531 * non-zero va_seq's, and it has incremented by exactly one 4532 * during the VOP_REMOVE/RMDIR and it didn't change during 4533 * the VOP_FSYNC. 4534 */ 4535 if (bdva.va_seq && idva.va_seq && adva.va_seq && 4536 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 4537 resp->cinfo.atomic = TRUE; 4538 else 4539 resp->cinfo.atomic = FALSE; 4540 4541 *cs->statusp = resp->status = NFS4_OK; 4542 4543 out: 4544 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 4545 REMOVE4res *, resp); 4546 } 4547 4548 /* 4549 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 4550 * oldname and newname. 4551 * res: status. If success - CURRENT_FH unchanged, return change_info 4552 * for both from and target directories. 4553 */ 4554 /* ARGSUSED */ 4555 static void 4556 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4557 struct compound_state *cs) 4558 { 4559 RENAME4args *args = &argop->nfs_argop4_u.oprename; 4560 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 4561 int error; 4562 vnode_t *odvp; 4563 vnode_t *ndvp; 4564 vnode_t *srcvp, *targvp, *tvp; 4565 struct vattr obdva, oidva, oadva; 4566 struct vattr nbdva, nidva, nadva; 4567 char *onm, *nnm; 4568 uint_t olen, nlen; 4569 rfs4_file_t *fp, *sfp; 4570 int in_crit_src, in_crit_targ; 4571 int fp_rele_grant_hold, sfp_rele_grant_hold; 4572 int unlinked; 4573 bslabel_t *clabel; 4574 struct sockaddr *ca; 4575 char *converted_onm = NULL; 4576 char *converted_nnm = NULL; 4577 nfsstat4 status; 4578 4579 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 4580 RENAME4args *, args); 4581 4582 fp = sfp = NULL; 4583 srcvp = targvp = tvp = NULL; 4584 in_crit_src = in_crit_targ = 0; 4585 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 4586 unlinked = 0; 4587 4588 /* CURRENT_FH: target directory */ 4589 ndvp = cs->vp; 4590 if (ndvp == NULL) { 4591 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4592 goto out; 4593 } 4594 4595 /* SAVED_FH: from directory */ 4596 odvp = cs->saved_vp; 4597 if (odvp == NULL) { 4598 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4599 goto out; 4600 } 4601 4602 if (cs->access == CS_ACCESS_DENIED) { 4603 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4604 goto out; 4605 } 4606 4607 /* 4608 * If there is an unshared filesystem mounted on this vnode, 4609 * do not allow to rename objects in this directory. 4610 */ 4611 if (vn_ismntpt(odvp)) { 4612 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4613 goto out; 4614 } 4615 4616 /* 4617 * If there is an unshared filesystem mounted on this vnode, 4618 * do not allow to rename to this directory. 4619 */ 4620 if (vn_ismntpt(ndvp)) { 4621 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4622 goto out; 4623 } 4624 4625 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 4626 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4627 goto out; 4628 } 4629 4630 if (cs->saved_exi != cs->exi) { 4631 *cs->statusp = resp->status = NFS4ERR_XDEV; 4632 goto out; 4633 } 4634 4635 status = utf8_dir_verify(&args->oldname); 4636 if (status != NFS4_OK) { 4637 *cs->statusp = resp->status = status; 4638 goto out; 4639 } 4640 4641 status = utf8_dir_verify(&args->newname); 4642 if (status != NFS4_OK) { 4643 *cs->statusp = resp->status = status; 4644 goto out; 4645 } 4646 4647 onm = utf8_to_fn(&args->oldname, &olen, NULL); 4648 if (onm == NULL) { 4649 *cs->statusp = resp->status = NFS4ERR_INVAL; 4650 goto out; 4651 } 4652 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4653 nlen = MAXPATHLEN + 1; 4654 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND, 4655 nlen); 4656 4657 if (converted_onm == NULL) { 4658 *cs->statusp = resp->status = NFS4ERR_INVAL; 4659 kmem_free(onm, olen); 4660 goto out; 4661 } 4662 4663 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4664 if (nnm == NULL) { 4665 *cs->statusp = resp->status = NFS4ERR_INVAL; 4666 if (onm != converted_onm) 4667 kmem_free(converted_onm, MAXPATHLEN + 1); 4668 kmem_free(onm, olen); 4669 goto out; 4670 } 4671 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND, 4672 MAXPATHLEN + 1); 4673 4674 if (converted_nnm == NULL) { 4675 *cs->statusp = resp->status = NFS4ERR_INVAL; 4676 kmem_free(nnm, nlen); 4677 nnm = NULL; 4678 if (onm != converted_onm) 4679 kmem_free(converted_onm, MAXPATHLEN + 1); 4680 kmem_free(onm, olen); 4681 goto out; 4682 } 4683 4684 4685 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 4686 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4687 kmem_free(onm, olen); 4688 kmem_free(nnm, nlen); 4689 goto out; 4690 } 4691 4692 4693 if (rdonly4(req, cs)) { 4694 *cs->statusp = resp->status = NFS4ERR_ROFS; 4695 if (onm != converted_onm) 4696 kmem_free(converted_onm, MAXPATHLEN + 1); 4697 kmem_free(onm, olen); 4698 if (nnm != converted_nnm) 4699 kmem_free(converted_nnm, MAXPATHLEN + 1); 4700 kmem_free(nnm, nlen); 4701 goto out; 4702 } 4703 4704 /* check label of the target dir */ 4705 if (is_system_labeled()) { 4706 ASSERT(req->rq_label != NULL); 4707 clabel = req->rq_label; 4708 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *, 4709 "got client label from request(1)", 4710 struct svc_req *, req); 4711 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4712 if (!do_rfs_label_check(clabel, ndvp, 4713 EQUALITY_CHECK, cs->exi)) { 4714 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4715 goto err_out; 4716 } 4717 } 4718 } 4719 4720 /* 4721 * Is the source a file and have a delegation? 4722 * We don't need to acquire va_seq before these lookups, if 4723 * it causes an update, cinfo.before will not match, which will 4724 * trigger a cache flush even if atomic is TRUE. 4725 */ 4726 sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp, 4727 &error, cs->cr); 4728 if (sfp != NULL) { 4729 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE, 4730 NULL)) { 4731 *cs->statusp = resp->status = NFS4ERR_DELAY; 4732 goto err_out; 4733 } 4734 } 4735 4736 if (srcvp == NULL) { 4737 *cs->statusp = resp->status = puterrno4(error); 4738 if (onm != converted_onm) 4739 kmem_free(converted_onm, MAXPATHLEN + 1); 4740 kmem_free(onm, olen); 4741 if (nnm != converted_nnm) 4742 kmem_free(converted_nnm, MAXPATHLEN + 1); 4743 kmem_free(nnm, nlen); 4744 goto out; 4745 } 4746 4747 sfp_rele_grant_hold = 1; 4748 4749 /* Does the destination exist and a file and have a delegation? */ 4750 fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL, 4751 cs->cr); 4752 if (fp != NULL) { 4753 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4754 NULL)) { 4755 *cs->statusp = resp->status = NFS4ERR_DELAY; 4756 goto err_out; 4757 } 4758 } 4759 fp_rele_grant_hold = 1; 4760 4761 /* Check for NBMAND lock on both source and target */ 4762 if (nbl_need_check(srcvp)) { 4763 nbl_start_crit(srcvp, RW_READER); 4764 in_crit_src = 1; 4765 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 4766 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4767 goto err_out; 4768 } 4769 } 4770 4771 if (targvp && nbl_need_check(targvp)) { 4772 nbl_start_crit(targvp, RW_READER); 4773 in_crit_targ = 1; 4774 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 4775 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4776 goto err_out; 4777 } 4778 } 4779 4780 /* Get source "before" change value */ 4781 obdva.va_mask = AT_CTIME|AT_SEQ; 4782 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL); 4783 if (!error) { 4784 nbdva.va_mask = AT_CTIME|AT_SEQ; 4785 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL); 4786 } 4787 if (error) { 4788 *cs->statusp = resp->status = puterrno4(error); 4789 goto err_out; 4790 } 4791 4792 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) 4793 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) 4794 4795 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr, 4796 NULL, 0); 4797 4798 /* 4799 * If target existed and was unlinked by VOP_RENAME, state will need 4800 * closed. To avoid deadlock, rfs4_close_all_state will be done after 4801 * any necessary nbl_end_crit on srcvp and tgtvp. 4802 */ 4803 if (error == 0 && fp != NULL) { 4804 rfs4_dbe_lock(fp->rf_dbe); 4805 tvp = fp->rf_vp; 4806 if (tvp) 4807 VN_HOLD(tvp); 4808 rfs4_dbe_unlock(fp->rf_dbe); 4809 4810 if (tvp) { 4811 struct vattr va; 4812 va.va_mask = AT_NLINK; 4813 4814 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4815 va.va_nlink == 0) { 4816 unlinked = 1; 4817 4818 /* DEBUG data */ 4819 if ((srcvp == targvp) || (tvp != targvp)) { 4820 cmn_err(CE_WARN, "rfs4_op_rename: " 4821 "srcvp %p, targvp: %p, tvp: %p", 4822 (void *)srcvp, (void *)targvp, 4823 (void *)tvp); 4824 } 4825 } else { 4826 VN_RELE(tvp); 4827 } 4828 } 4829 } 4830 if (error == 0) 4831 vn_renamepath(ndvp, srcvp, nnm, nlen - 1); 4832 4833 if (in_crit_src) 4834 nbl_end_crit(srcvp); 4835 if (srcvp) 4836 VN_RELE(srcvp); 4837 if (in_crit_targ) 4838 nbl_end_crit(targvp); 4839 if (targvp) 4840 VN_RELE(targvp); 4841 4842 if (unlinked) { 4843 ASSERT(fp != NULL); 4844 ASSERT(tvp != NULL); 4845 4846 /* DEBUG data */ 4847 if (RW_READ_HELD(&tvp->v_nbllock)) { 4848 cmn_err(CE_WARN, "rfs4_op_rename: " 4849 "RW_READ_HELD(%p)", (void *)tvp); 4850 } 4851 4852 /* The file is gone and so should the state */ 4853 rfs4_close_all_state(fp); 4854 VN_RELE(tvp); 4855 } 4856 4857 if (sfp) { 4858 rfs4_clear_dont_grant(sfp); 4859 rfs4_file_rele(sfp); 4860 } 4861 if (fp) { 4862 rfs4_clear_dont_grant(fp); 4863 rfs4_file_rele(fp); 4864 } 4865 4866 if (converted_onm != onm) 4867 kmem_free(converted_onm, MAXPATHLEN + 1); 4868 kmem_free(onm, olen); 4869 if (converted_nnm != nnm) 4870 kmem_free(converted_nnm, MAXPATHLEN + 1); 4871 kmem_free(nnm, nlen); 4872 4873 /* 4874 * Get the initial "after" sequence number, if it fails, set to zero 4875 */ 4876 oidva.va_mask = AT_SEQ; 4877 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL)) 4878 oidva.va_seq = 0; 4879 4880 nidva.va_mask = AT_SEQ; 4881 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL)) 4882 nidva.va_seq = 0; 4883 4884 /* 4885 * Force modified data and metadata out to stable storage. 4886 */ 4887 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL); 4888 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL); 4889 4890 if (error) { 4891 *cs->statusp = resp->status = puterrno4(error); 4892 goto out; 4893 } 4894 4895 /* 4896 * Get "after" change values, if it fails, simply return the 4897 * before value. 4898 */ 4899 oadva.va_mask = AT_CTIME|AT_SEQ; 4900 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) { 4901 oadva.va_ctime = obdva.va_ctime; 4902 oadva.va_seq = 0; 4903 } 4904 4905 nadva.va_mask = AT_CTIME|AT_SEQ; 4906 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) { 4907 nadva.va_ctime = nbdva.va_ctime; 4908 nadva.va_seq = 0; 4909 } 4910 4911 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime) 4912 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime) 4913 4914 /* 4915 * The cinfo.atomic = TRUE only if we have 4916 * non-zero va_seq's, and it has incremented by exactly one 4917 * during the VOP_RENAME and it didn't change during the VOP_FSYNC. 4918 */ 4919 if (obdva.va_seq && oidva.va_seq && oadva.va_seq && 4920 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq) 4921 resp->source_cinfo.atomic = TRUE; 4922 else 4923 resp->source_cinfo.atomic = FALSE; 4924 4925 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq && 4926 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq) 4927 resp->target_cinfo.atomic = TRUE; 4928 else 4929 resp->target_cinfo.atomic = FALSE; 4930 4931 #ifdef VOLATILE_FH_TEST 4932 { 4933 extern void add_volrnm_fh(struct exportinfo *, vnode_t *); 4934 4935 /* 4936 * Add the renamed file handle to the volatile rename list 4937 */ 4938 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) { 4939 /* file handles may expire on rename */ 4940 vnode_t *vp; 4941 4942 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4943 /* 4944 * Already know that nnm will be a valid string 4945 */ 4946 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr, 4947 NULL, NULL, NULL); 4948 kmem_free(nnm, nlen); 4949 if (!error) { 4950 add_volrnm_fh(cs->exi, vp); 4951 VN_RELE(vp); 4952 } 4953 } 4954 } 4955 #endif /* VOLATILE_FH_TEST */ 4956 4957 *cs->statusp = resp->status = NFS4_OK; 4958 out: 4959 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4960 RENAME4res *, resp); 4961 return; 4962 4963 err_out: 4964 if (onm != converted_onm) 4965 kmem_free(converted_onm, MAXPATHLEN + 1); 4966 if (onm != NULL) 4967 kmem_free(onm, olen); 4968 if (nnm != converted_nnm) 4969 kmem_free(converted_nnm, MAXPATHLEN + 1); 4970 if (nnm != NULL) 4971 kmem_free(nnm, nlen); 4972 4973 if (in_crit_src) nbl_end_crit(srcvp); 4974 if (in_crit_targ) nbl_end_crit(targvp); 4975 if (targvp) VN_RELE(targvp); 4976 if (srcvp) VN_RELE(srcvp); 4977 if (sfp) { 4978 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp); 4979 rfs4_file_rele(sfp); 4980 } 4981 if (fp) { 4982 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp); 4983 rfs4_file_rele(fp); 4984 } 4985 4986 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4987 RENAME4res *, resp); 4988 } 4989 4990 /* ARGSUSED */ 4991 static void 4992 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4993 struct compound_state *cs) 4994 { 4995 RENEW4args *args = &argop->nfs_argop4_u.oprenew; 4996 RENEW4res *resp = &resop->nfs_resop4_u.oprenew; 4997 rfs4_client_t *cp; 4998 4999 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs, 5000 RENEW4args *, args); 5001 5002 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) { 5003 *cs->statusp = resp->status = 5004 rfs4_check_clientid(&args->clientid, 0); 5005 goto out; 5006 } 5007 5008 if (rfs4_lease_expired(cp)) { 5009 rfs4_client_rele(cp); 5010 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 5011 goto out; 5012 } 5013 5014 rfs4_update_lease(cp); 5015 5016 mutex_enter(cp->rc_cbinfo.cb_lock); 5017 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) { 5018 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE; 5019 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN; 5020 } else { 5021 *cs->statusp = resp->status = NFS4_OK; 5022 } 5023 mutex_exit(cp->rc_cbinfo.cb_lock); 5024 5025 rfs4_client_rele(cp); 5026 5027 out: 5028 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs, 5029 RENEW4res *, resp); 5030 } 5031 5032 /* ARGSUSED */ 5033 static void 5034 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 5035 struct compound_state *cs) 5036 { 5037 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh; 5038 5039 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs); 5040 5041 /* No need to check cs->access - we are not accessing any object */ 5042 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) { 5043 *cs->statusp = resp->status = NFS4ERR_RESTOREFH; 5044 goto out; 5045 } 5046 if (cs->vp != NULL) { 5047 VN_RELE(cs->vp); 5048 } 5049 cs->vp = cs->saved_vp; 5050 cs->saved_vp = NULL; 5051 cs->exi = cs->saved_exi; 5052 nfs_fh4_copy(&cs->saved_fh, &cs->fh); 5053 *cs->statusp = resp->status = NFS4_OK; 5054 cs->deleg = FALSE; 5055 5056 if (cs->cs_flags & RFS4_SAVED_STATEID) { 5057 cs->current_stateid = cs->save_stateid; 5058 cs->cs_flags |= RFS4_CURRENT_STATEID; 5059 } 5060 out: 5061 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs, 5062 RESTOREFH4res *, resp); 5063 } 5064 5065 /* ARGSUSED */ 5066 static void 5067 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5068 struct compound_state *cs) 5069 { 5070 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh; 5071 5072 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs); 5073 5074 /* No need to check cs->access - we are not accessing any object */ 5075 if (cs->vp == NULL) { 5076 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5077 goto out; 5078 } 5079 if (cs->saved_vp != NULL) { 5080 VN_RELE(cs->saved_vp); 5081 } 5082 cs->saved_vp = cs->vp; 5083 VN_HOLD(cs->saved_vp); 5084 cs->saved_exi = cs->exi; 5085 /* 5086 * since SAVEFH is fairly rare, don't alloc space for its fh 5087 * unless necessary. 5088 */ 5089 if (cs->saved_fh.nfs_fh4_val == NULL) { 5090 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 5091 } 5092 nfs_fh4_copy(&cs->fh, &cs->saved_fh); 5093 *cs->statusp = resp->status = NFS4_OK; 5094 5095 if (cs->cs_flags & RFS4_CURRENT_STATEID) { 5096 cs->save_stateid = cs->current_stateid; 5097 cs->cs_flags |= RFS4_SAVED_STATEID; 5098 } 5099 out: 5100 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs, 5101 SAVEFH4res *, resp); 5102 } 5103 5104 /* 5105 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to 5106 * return the bitmap of attrs that were set successfully. It is also 5107 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should 5108 * always be called only after rfs4_do_set_attrs(). 5109 * 5110 * Verify that the attributes are same as the expected ones. sargp->vap 5111 * and sargp->sbp contain the input attributes as translated from fattr4. 5112 * 5113 * This function verifies only the attrs that correspond to a vattr or 5114 * vfsstat struct. That is because of the extra step needed to get the 5115 * corresponding system structs. Other attributes have already been set or 5116 * verified by do_rfs4_set_attrs. 5117 * 5118 * Return 0 if all attrs match, -1 if some don't, error if error processing. 5119 */ 5120 static int 5121 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp, 5122 bitmap4 *resp, struct nfs4_ntov_table *ntovp) 5123 { 5124 int error, ret_error = 0; 5125 int i, k; 5126 uint_t sva_mask = sargp->vap->va_mask; 5127 uint_t vbit; 5128 union nfs4_attr_u *na; 5129 uint8_t *amap; 5130 bool_t getsb = ntovp->vfsstat; 5131 5132 if (sva_mask != 0) { 5133 /* 5134 * Okay to overwrite sargp->vap because we verify based 5135 * on the incoming values. 5136 */ 5137 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0, 5138 sargp->cs->cr, NULL); 5139 if (ret_error) { 5140 if (resp == NULL) 5141 return (ret_error); 5142 /* 5143 * Must return bitmap of successful attrs 5144 */ 5145 sva_mask = 0; /* to prevent checking vap later */ 5146 } else { 5147 /* 5148 * Some file systems clobber va_mask. it is probably 5149 * wrong of them to do so, nonethless we practice 5150 * defensive coding. 5151 * See bug id 4276830. 5152 */ 5153 sargp->vap->va_mask = sva_mask; 5154 } 5155 } 5156 5157 if (getsb) { 5158 /* 5159 * Now get the superblock and loop on the bitmap, as there is 5160 * no simple way of translating from superblock to bitmap4. 5161 */ 5162 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp); 5163 if (ret_error) { 5164 if (resp == NULL) 5165 goto errout; 5166 getsb = FALSE; 5167 } 5168 } 5169 5170 /* 5171 * Now loop and verify each attribute which getattr returned 5172 * whether it's the same as the input. 5173 */ 5174 if (resp == NULL && !getsb && (sva_mask == 0)) 5175 goto errout; 5176 5177 na = ntovp->na; 5178 amap = ntovp->amap; 5179 k = 0; 5180 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) { 5181 k = *amap; 5182 ASSERT(nfs4_ntov_map[k].nval == k); 5183 vbit = nfs4_ntov_map[k].vbit; 5184 5185 /* 5186 * If vattr attribute but VOP_GETATTR failed, or it's 5187 * superblock attribute but VFS_STATVFS failed, skip 5188 */ 5189 if (vbit) { 5190 if ((vbit & sva_mask) == 0) 5191 continue; 5192 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) { 5193 continue; 5194 } 5195 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na); 5196 if (resp != NULL) { 5197 if (error) 5198 ret_error = -1; /* not all match */ 5199 else /* update response bitmap */ 5200 *resp |= nfs4_ntov_map[k].fbit; 5201 continue; 5202 } 5203 if (error) { 5204 ret_error = -1; /* not all match */ 5205 break; 5206 } 5207 } 5208 errout: 5209 return (ret_error); 5210 } 5211 5212 /* 5213 * Decode the attribute to be set/verified. If the attr requires a sys op 5214 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't 5215 * call the sv_getit function for it, because the sys op hasn't yet been done. 5216 * Return 0 for success, error code if failed. 5217 * 5218 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free. 5219 */ 5220 static int 5221 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp, 5222 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap) 5223 { 5224 int error = 0; 5225 bool_t set_later; 5226 5227 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit; 5228 5229 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) { 5230 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat; 5231 /* 5232 * don't verify yet if a vattr or sb dependent attr, 5233 * because we don't have their sys values yet. 5234 * Will be done later. 5235 */ 5236 if (! (set_later && (cmd == NFS4ATTR_VERIT))) { 5237 /* 5238 * ACLs are a special case, since setting the MODE 5239 * conflicts with setting the ACL. We delay setting 5240 * the ACL until all other attributes have been set. 5241 * The ACL gets set in do_rfs4_op_setattr(). 5242 */ 5243 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) { 5244 error = (*nfs4_ntov_map[k].sv_getit)(cmd, 5245 sargp, nap); 5246 if (error) { 5247 xdr_free(nfs4_ntov_map[k].xfunc, 5248 (caddr_t)nap); 5249 } 5250 } 5251 } 5252 } else { 5253 #ifdef DEBUG 5254 cmn_err(CE_NOTE, "decode_fattr4_attr: error " 5255 "decoding attribute %d\n", k); 5256 #endif 5257 error = EINVAL; 5258 } 5259 if (!error && resp_bval && !set_later) { 5260 *resp_bval |= nfs4_ntov_map[k].fbit; 5261 } 5262 5263 return (error); 5264 } 5265 5266 /* 5267 * Set vattr based on incoming fattr4 attrs - used by setattr. 5268 * Set response mask. Ignore any values that are not writable vattr attrs. 5269 */ 5270 static nfsstat4 5271 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5272 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, 5273 nfs4_attr_cmd_t cmd) 5274 { 5275 int error = 0; 5276 int i; 5277 char *attrs = fattrp->attrlist4; 5278 uint32_t attrslen = fattrp->attrlist4_len; 5279 XDR xdr; 5280 nfsstat4 status = NFS4_OK; 5281 vnode_t *vp = cs->vp; 5282 union nfs4_attr_u *na; 5283 uint8_t *amap; 5284 5285 #ifndef lint 5286 /* 5287 * Make sure that maximum attribute number can be expressed as an 5288 * 8 bit quantity. 5289 */ 5290 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1)); 5291 #endif 5292 5293 if (vp == NULL) { 5294 if (resp) 5295 *resp = 0; 5296 return (NFS4ERR_NOFILEHANDLE); 5297 } 5298 if (cs->access == CS_ACCESS_DENIED) { 5299 if (resp) 5300 *resp = 0; 5301 return (NFS4ERR_ACCESS); 5302 } 5303 5304 sargp->op = cmd; 5305 sargp->cs = cs; 5306 sargp->flag = 0; /* may be set later */ 5307 sargp->vap->va_mask = 0; 5308 sargp->rdattr_error = NFS4_OK; 5309 sargp->rdattr_error_req = FALSE; 5310 /* sargp->sbp is set by the caller */ 5311 5312 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE); 5313 5314 na = ntovp->na; 5315 amap = ntovp->amap; 5316 5317 /* 5318 * The following loop iterates on the nfs4_ntov_map checking 5319 * if the fbit is set in the requested bitmap. 5320 * If set then we process the arguments using the 5321 * rfs4_fattr4 conversion functions to populate the setattr 5322 * vattr and va_mask. Any settable attrs that are not using vattr 5323 * will be set in this loop. 5324 */ 5325 for (i = 0; i < nfs4_ntov_map_size; i++) { 5326 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) { 5327 continue; 5328 } 5329 /* 5330 * If setattr, must be a writable attr. 5331 * If verify/nverify, must be a readable attr. 5332 */ 5333 if ((error = (*nfs4_ntov_map[i].sv_getit)( 5334 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) { 5335 /* 5336 * Client tries to set/verify an 5337 * unsupported attribute, tries to set 5338 * a read only attr or verify a write 5339 * only one - error! 5340 */ 5341 break; 5342 } 5343 /* 5344 * Decode the attribute to set/verify 5345 */ 5346 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval, 5347 &xdr, resp ? resp : NULL, na); 5348 if (error) 5349 break; 5350 *amap++ = (uint8_t)nfs4_ntov_map[i].nval; 5351 na++; 5352 (ntovp->attrcnt)++; 5353 if (nfs4_ntov_map[i].vfsstat) 5354 ntovp->vfsstat = TRUE; 5355 } 5356 5357 if (error != 0) 5358 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP : 5359 puterrno4(error)); 5360 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 5361 return (status); 5362 } 5363 5364 static nfsstat4 5365 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5366 stateid4 *stateid) 5367 { 5368 int error = 0; 5369 struct nfs4_svgetit_arg sarg; 5370 bool_t trunc; 5371 5372 nfsstat4 status = NFS4_OK; 5373 cred_t *cr = cs->cr; 5374 vnode_t *vp = cs->vp; 5375 struct nfs4_ntov_table ntov; 5376 struct statvfs64 sb; 5377 struct vattr bva; 5378 struct flock64 bf; 5379 int in_crit = 0; 5380 uint_t saved_mask = 0; 5381 caller_context_t ct; 5382 5383 *resp = 0; 5384 sarg.sbp = &sb; 5385 sarg.is_referral = B_FALSE; 5386 nfs4_ntov_table_init(&ntov); 5387 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 5388 NFS4ATTR_SETIT); 5389 if (status != NFS4_OK) { 5390 /* 5391 * failed set attrs 5392 */ 5393 goto done; 5394 } 5395 5396 if ((sarg.vap->va_mask == 0) && 5397 (! (fattrp->attrmask & FATTR4_ACL_MASK))) { 5398 /* 5399 * no further work to be done 5400 */ 5401 goto done; 5402 } 5403 5404 /* 5405 * If we got a request to set the ACL and the MODE, only 5406 * allow changing VSUID, VSGID, and VSVTX. Attempting 5407 * to change any other bits, along with setting an ACL, 5408 * gives NFS4ERR_INVAL. 5409 */ 5410 if ((fattrp->attrmask & FATTR4_ACL_MASK) && 5411 (fattrp->attrmask & FATTR4_MODE_MASK)) { 5412 vattr_t va; 5413 5414 va.va_mask = AT_MODE; 5415 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 5416 if (error) { 5417 status = puterrno4(error); 5418 goto done; 5419 } 5420 if ((sarg.vap->va_mode ^ va.va_mode) & 5421 ~(VSUID | VSGID | VSVTX)) { 5422 status = NFS4ERR_INVAL; 5423 goto done; 5424 } 5425 } 5426 5427 /* Check stateid only if size has been set */ 5428 if (sarg.vap->va_mask & AT_SIZE) { 5429 trunc = (sarg.vap->va_size == 0); 5430 status = rfs4_check_stateid(FWRITE, cs->vp, stateid, 5431 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs); 5432 if (status != NFS4_OK) 5433 goto done; 5434 } else { 5435 ct.cc_sysid = 0; 5436 ct.cc_pid = 0; 5437 ct.cc_caller_id = nfs4_srv_caller_id; 5438 ct.cc_flags = CC_DONTBLOCK; 5439 } 5440 5441 /* XXX start of possible race with delegations */ 5442 5443 /* 5444 * We need to specially handle size changes because it is 5445 * possible for the client to create a file with read-only 5446 * modes, but with the file opened for writing. If the client 5447 * then tries to set the file size, e.g. ftruncate(3C), 5448 * fcntl(F_FREESP), the normal access checking done in 5449 * VOP_SETATTR would prevent the client from doing it even though 5450 * it should be allowed to do so. To get around this, we do the 5451 * access checking for ourselves and use VOP_SPACE which doesn't 5452 * do the access checking. 5453 * Also the client should not be allowed to change the file 5454 * size if there is a conflicting non-blocking mandatory lock in 5455 * the region of the change. 5456 */ 5457 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 5458 u_offset_t offset; 5459 ssize_t length; 5460 5461 /* 5462 * ufs_setattr clears AT_SIZE from vap->va_mask, but 5463 * before returning, sarg.vap->va_mask is used to 5464 * generate the setattr reply bitmap. We also clear 5465 * AT_SIZE below before calling VOP_SPACE. For both 5466 * of these cases, the va_mask needs to be saved here 5467 * and restored after calling VOP_SETATTR. 5468 */ 5469 saved_mask = sarg.vap->va_mask; 5470 5471 /* 5472 * Check any possible conflict due to NBMAND locks. 5473 * Get into critical region before VOP_GETATTR, so the 5474 * size attribute is valid when checking conflicts. 5475 */ 5476 if (nbl_need_check(vp)) { 5477 nbl_start_crit(vp, RW_READER); 5478 in_crit = 1; 5479 } 5480 5481 bva.va_mask = AT_UID|AT_SIZE; 5482 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5483 if (error != 0) { 5484 status = puterrno4(error); 5485 goto done; 5486 } 5487 5488 if (in_crit) { 5489 if (sarg.vap->va_size < bva.va_size) { 5490 offset = sarg.vap->va_size; 5491 length = bva.va_size - sarg.vap->va_size; 5492 } else { 5493 offset = bva.va_size; 5494 length = sarg.vap->va_size - bva.va_size; 5495 } 5496 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 5497 &ct)) { 5498 status = NFS4ERR_LOCKED; 5499 goto done; 5500 } 5501 } 5502 5503 if (crgetuid(cr) == bva.va_uid) { 5504 sarg.vap->va_mask &= ~AT_SIZE; 5505 bf.l_type = F_WRLCK; 5506 bf.l_whence = 0; 5507 bf.l_start = (off64_t)sarg.vap->va_size; 5508 bf.l_len = 0; 5509 bf.l_sysid = 0; 5510 bf.l_pid = 0; 5511 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 5512 (offset_t)sarg.vap->va_size, cr, &ct); 5513 } 5514 } 5515 5516 if (!error && sarg.vap->va_mask != 0) 5517 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 5518 5519 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 5520 if (saved_mask & AT_SIZE) 5521 sarg.vap->va_mask |= AT_SIZE; 5522 5523 /* 5524 * If an ACL was being set, it has been delayed until now, 5525 * in order to set the mode (via the VOP_SETATTR() above) first. 5526 */ 5527 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) { 5528 int i; 5529 5530 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++) 5531 if (ntov.amap[i] == FATTR4_ACL) 5532 break; 5533 if (i < NFS4_MAXNUM_ATTRS) { 5534 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)( 5535 NFS4ATTR_SETIT, &sarg, &ntov.na[i]); 5536 if (error == 0) { 5537 *resp |= FATTR4_ACL_MASK; 5538 } else if (error == ENOTSUP) { 5539 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5540 status = NFS4ERR_ATTRNOTSUPP; 5541 goto done; 5542 } 5543 } else { 5544 NFS4_DEBUG(rfs4_debug, 5545 (CE_NOTE, "do_rfs4_op_setattr: " 5546 "unable to find ACL in fattr4")); 5547 error = EINVAL; 5548 } 5549 } 5550 5551 if (error) { 5552 /* check if a monitor detected a delegation conflict */ 5553 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 5554 status = NFS4ERR_DELAY; 5555 else 5556 status = puterrno4(error); 5557 5558 /* 5559 * Set the response bitmap when setattr failed. 5560 * If VOP_SETATTR partially succeeded, test by doing a 5561 * VOP_GETATTR on the object and comparing the data 5562 * to the setattr arguments. 5563 */ 5564 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5565 } else { 5566 /* 5567 * Force modified metadata out to stable storage. 5568 */ 5569 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 5570 /* 5571 * Set response bitmap 5572 */ 5573 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 5574 } 5575 5576 /* Return early and already have a NFSv4 error */ 5577 done: 5578 /* 5579 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 5580 * conversion sets both readable and writeable NFS4 attrs 5581 * for AT_MTIME and AT_ATIME. The line below masks out 5582 * unrequested attrs from the setattr result bitmap. This 5583 * is placed after the done: label to catch the ATTRNOTSUP 5584 * case. 5585 */ 5586 *resp &= fattrp->attrmask; 5587 5588 if (in_crit) 5589 nbl_end_crit(vp); 5590 5591 nfs4_ntov_table_free(&ntov, &sarg); 5592 5593 return (status); 5594 } 5595 5596 /* ARGSUSED */ 5597 static void 5598 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5599 struct compound_state *cs) 5600 { 5601 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr; 5602 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr; 5603 bslabel_t *clabel; 5604 5605 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs, 5606 SETATTR4args *, args); 5607 5608 if (cs->vp == NULL) { 5609 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5610 goto out; 5611 } 5612 5613 /* 5614 * If there is an unshared filesystem mounted on this vnode, 5615 * do not allow to setattr on this vnode. 5616 */ 5617 if (vn_ismntpt(cs->vp)) { 5618 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5619 goto out; 5620 } 5621 5622 resp->attrsset = 0; 5623 5624 if (rdonly4(req, cs)) { 5625 *cs->statusp = resp->status = NFS4ERR_ROFS; 5626 goto out; 5627 } 5628 5629 /* check label before setting attributes */ 5630 if (is_system_labeled()) { 5631 ASSERT(req->rq_label != NULL); 5632 clabel = req->rq_label; 5633 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *, 5634 "got client label from request(1)", 5635 struct svc_req *, req); 5636 if (!blequal(&l_admin_low->tsl_label, clabel)) { 5637 if (!do_rfs_label_check(clabel, cs->vp, 5638 EQUALITY_CHECK, cs->exi)) { 5639 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5640 goto out; 5641 } 5642 } 5643 } 5644 5645 get_stateid4(cs, &args->stateid); 5646 *cs->statusp = resp->status = 5647 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs, 5648 &args->stateid); 5649 5650 out: 5651 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs, 5652 SETATTR4res *, resp); 5653 } 5654 5655 /* ARGSUSED */ 5656 static void 5657 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5658 struct compound_state *cs) 5659 { 5660 /* 5661 * verify and nverify are exactly the same, except that nverify 5662 * succeeds when some argument changed, and verify succeeds when 5663 * when none changed. 5664 */ 5665 5666 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 5667 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 5668 5669 int error; 5670 struct nfs4_svgetit_arg sarg; 5671 struct statvfs64 sb; 5672 struct nfs4_ntov_table ntov; 5673 5674 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 5675 VERIFY4args *, args); 5676 5677 if (cs->vp == NULL) { 5678 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5679 goto out; 5680 } 5681 5682 sarg.sbp = &sb; 5683 sarg.is_referral = B_FALSE; 5684 nfs4_ntov_table_init(&ntov); 5685 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5686 &sarg, &ntov, NFS4ATTR_VERIT); 5687 if (resp->status != NFS4_OK) { 5688 /* 5689 * do_rfs4_set_attrs will try to verify systemwide attrs, 5690 * so could return -1 for "no match". 5691 */ 5692 if (resp->status == -1) 5693 resp->status = NFS4ERR_NOT_SAME; 5694 goto done; 5695 } 5696 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5697 switch (error) { 5698 case 0: 5699 resp->status = NFS4_OK; 5700 break; 5701 case -1: 5702 resp->status = NFS4ERR_NOT_SAME; 5703 break; 5704 default: 5705 resp->status = puterrno4(error); 5706 break; 5707 } 5708 done: 5709 *cs->statusp = resp->status; 5710 nfs4_ntov_table_free(&ntov, &sarg); 5711 out: 5712 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 5713 VERIFY4res *, resp); 5714 } 5715 5716 /* ARGSUSED */ 5717 static void 5718 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5719 struct compound_state *cs) 5720 { 5721 /* 5722 * verify and nverify are exactly the same, except that nverify 5723 * succeeds when some argument changed, and verify succeeds when 5724 * when none changed. 5725 */ 5726 5727 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 5728 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 5729 5730 int error; 5731 struct nfs4_svgetit_arg sarg; 5732 struct statvfs64 sb; 5733 struct nfs4_ntov_table ntov; 5734 5735 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 5736 NVERIFY4args *, args); 5737 5738 if (cs->vp == NULL) { 5739 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5740 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5741 NVERIFY4res *, resp); 5742 return; 5743 } 5744 sarg.sbp = &sb; 5745 sarg.is_referral = B_FALSE; 5746 nfs4_ntov_table_init(&ntov); 5747 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5748 &sarg, &ntov, NFS4ATTR_VERIT); 5749 if (resp->status != NFS4_OK) { 5750 /* 5751 * do_rfs4_set_attrs will try to verify systemwide attrs, 5752 * so could return -1 for "no match". 5753 */ 5754 if (resp->status == -1) 5755 resp->status = NFS4_OK; 5756 goto done; 5757 } 5758 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5759 switch (error) { 5760 case 0: 5761 resp->status = NFS4ERR_SAME; 5762 break; 5763 case -1: 5764 resp->status = NFS4_OK; 5765 break; 5766 default: 5767 resp->status = puterrno4(error); 5768 break; 5769 } 5770 done: 5771 *cs->statusp = resp->status; 5772 nfs4_ntov_table_free(&ntov, &sarg); 5773 5774 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5775 NVERIFY4res *, resp); 5776 } 5777 5778 /* 5779 * XXX - This should live in an NFS header file. 5780 */ 5781 #define MAX_IOVECS 12 5782 5783 /* ARGSUSED */ 5784 static void 5785 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5786 struct compound_state *cs) 5787 { 5788 WRITE4args *args = &argop->nfs_argop4_u.opwrite; 5789 WRITE4res *resp = &resop->nfs_resop4_u.opwrite; 5790 int error; 5791 vnode_t *vp; 5792 struct vattr bva; 5793 u_offset_t rlimit; 5794 struct uio uio; 5795 struct iovec iov[MAX_IOVECS]; 5796 struct iovec *iovp; 5797 int iovcnt; 5798 int ioflag; 5799 cred_t *savecred, *cr; 5800 bool_t *deleg = &cs->deleg; 5801 nfsstat4 stat; 5802 int in_crit = 0; 5803 caller_context_t ct; 5804 nfs4_srv_t *nsrv4; 5805 5806 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, 5807 WRITE4args *, args); 5808 5809 vp = cs->vp; 5810 if (vp == NULL) { 5811 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5812 goto out; 5813 } 5814 5815 if (cs->access == CS_ACCESS_DENIED) { 5816 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5817 goto out; 5818 } 5819 5820 get_stateid4(cs, &args->stateid); 5821 5822 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE, 5823 deleg, TRUE, &ct, cs)) != NFS4_OK) { 5824 *cs->statusp = resp->status = stat; 5825 goto out; 5826 } 5827 5828 /* 5829 * We have to enter the critical region before calling VOP_RWLOCK 5830 * to avoid a deadlock with ufs. 5831 */ 5832 if (nbl_need_check(vp)) { 5833 nbl_start_crit(vp, RW_READER); 5834 in_crit = 1; 5835 if (nbl_conflict(vp, NBL_WRITE, 5836 args->offset, args->data_len, 0, &ct)) { 5837 *cs->statusp = resp->status = NFS4ERR_LOCKED; 5838 goto out; 5839 } 5840 } 5841 5842 cr = cs->cr; 5843 bva.va_mask = AT_MODE | AT_UID; 5844 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5845 5846 /* 5847 * If we can't get the attributes, then we can't do the 5848 * right access checking. So, we'll fail the request. 5849 */ 5850 if (error) { 5851 *cs->statusp = resp->status = puterrno4(error); 5852 goto out; 5853 } 5854 5855 if (rdonly4(req, cs)) { 5856 *cs->statusp = resp->status = NFS4ERR_ROFS; 5857 goto out; 5858 } 5859 5860 if (vp->v_type != VREG) { 5861 *cs->statusp = resp->status = 5862 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 5863 goto out; 5864 } 5865 5866 if (crgetuid(cr) != bva.va_uid && 5867 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) { 5868 *cs->statusp = resp->status = puterrno4(error); 5869 goto out; 5870 } 5871 5872 if (MANDLOCK(vp, bva.va_mode)) { 5873 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5874 goto out; 5875 } 5876 5877 nsrv4 = nfs4_get_srv(); 5878 if (args->data_len == 0) { 5879 *cs->statusp = resp->status = NFS4_OK; 5880 resp->count = 0; 5881 resp->committed = args->stable; 5882 resp->writeverf = nsrv4->write4verf; 5883 goto out; 5884 } 5885 5886 if (args->mblk != NULL) { 5887 mblk_t *m; 5888 uint_t bytes, round_len; 5889 5890 iovcnt = 0; 5891 bytes = 0; 5892 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT); 5893 for (m = args->mblk; 5894 m != NULL && bytes < round_len; 5895 m = m->b_cont) { 5896 iovcnt++; 5897 bytes += MBLKL(m); 5898 } 5899 #ifdef DEBUG 5900 /* should have ended on an mblk boundary */ 5901 if (bytes != round_len) { 5902 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n", 5903 bytes, round_len, args->data_len); 5904 printf("args=%p, args->mblk=%p, m=%p", (void *)args, 5905 (void *)args->mblk, (void *)m); 5906 ASSERT(bytes == round_len); 5907 } 5908 #endif 5909 if (iovcnt <= MAX_IOVECS) { 5910 iovp = iov; 5911 } else { 5912 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 5913 } 5914 mblk_to_iov(args->mblk, iovcnt, iovp); 5915 } else if (args->rlist != NULL) { 5916 iovcnt = 1; 5917 iovp = iov; 5918 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3); 5919 iovp->iov_len = args->data_len; 5920 } else { 5921 iovcnt = 1; 5922 iovp = iov; 5923 iovp->iov_base = args->data_val; 5924 iovp->iov_len = args->data_len; 5925 } 5926 5927 uio.uio_iov = iovp; 5928 uio.uio_iovcnt = iovcnt; 5929 5930 uio.uio_segflg = UIO_SYSSPACE; 5931 uio.uio_extflg = UIO_COPY_DEFAULT; 5932 uio.uio_loffset = args->offset; 5933 uio.uio_resid = args->data_len; 5934 uio.uio_llimit = curproc->p_fsz_ctl; 5935 rlimit = uio.uio_llimit - args->offset; 5936 if (rlimit < (u_offset_t)uio.uio_resid) 5937 uio.uio_resid = (int)rlimit; 5938 5939 if (args->stable == UNSTABLE4) 5940 ioflag = 0; 5941 else if (args->stable == FILE_SYNC4) 5942 ioflag = FSYNC; 5943 else if (args->stable == DATA_SYNC4) 5944 ioflag = FDSYNC; 5945 else { 5946 if (iovp != iov) 5947 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5948 *cs->statusp = resp->status = NFS4ERR_INVAL; 5949 goto out; 5950 } 5951 5952 /* 5953 * We're changing creds because VM may fault and we need 5954 * the cred of the current thread to be used if quota 5955 * checking is enabled. 5956 */ 5957 savecred = curthread->t_cred; 5958 curthread->t_cred = cr; 5959 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct); 5960 curthread->t_cred = savecred; 5961 5962 if (iovp != iov) 5963 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5964 5965 if (error) { 5966 *cs->statusp = resp->status = puterrno4(error); 5967 goto out; 5968 } 5969 5970 *cs->statusp = resp->status = NFS4_OK; 5971 resp->count = args->data_len - uio.uio_resid; 5972 5973 if (ioflag == 0) 5974 resp->committed = UNSTABLE4; 5975 else 5976 resp->committed = FILE_SYNC4; 5977 5978 resp->writeverf = nsrv4->write4verf; 5979 5980 out: 5981 if (in_crit) 5982 nbl_end_crit(vp); 5983 5984 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs, 5985 WRITE4res *, resp); 5986 } 5987 5988 static inline int 5989 rfs4_opnum_in_range(const compound_state_t *cs, int opnum) 5990 { 5991 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP) 5992 return (0); 5993 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP) 5994 return (0); 5995 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP) 5996 return (0); 5997 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP) 5998 return (0); 5999 return (1); 6000 } 6001 6002 void 6003 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs, 6004 struct svc_req *req, int *rv) 6005 { 6006 uint_t i; 6007 cred_t *cr; 6008 nfs4_srv_t *nsrv4; 6009 nfs_export_t *ne = nfs_get_export(); 6010 6011 if (rv != NULL) 6012 *rv = 0; 6013 /* 6014 * Form a reply tag by copying over the request tag. 6015 */ 6016 resp->tag.utf8string_len = args->tag.utf8string_len; 6017 if (args->tag.utf8string_len != 0) { 6018 resp->tag.utf8string_val = 6019 kmem_alloc(args->tag.utf8string_len, KM_SLEEP); 6020 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 6021 resp->tag.utf8string_len); 6022 } else { 6023 resp->tag.utf8string_val = NULL; 6024 } 6025 6026 cs->statusp = &resp->status; 6027 cs->req = req; 6028 cs->minorversion = args->minorversion; 6029 resp->array = NULL; 6030 resp->array_len = 0; 6031 6032 if (args->array_len == 0) { 6033 resp->status = NFS4_OK; 6034 return; 6035 } 6036 6037 cr = svc_xprt_cred(req->rq_xprt); 6038 ASSERT(cr != NULL); 6039 6040 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) { 6041 DTRACE_NFSV4_2(compound__start, struct compound_state *, 6042 cs, COMPOUND4args *, args); 6043 DTRACE_NFSV4_2(compound__done, struct compound_state *, 6044 cs, COMPOUND4res *, resp); 6045 svcerr_badcred(req->rq_xprt); 6046 if (rv != NULL) 6047 *rv = 1; 6048 return; 6049 } 6050 6051 resp->array_len = args->array_len; 6052 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4), 6053 KM_SLEEP); 6054 6055 cs->op_len = args->array_len; 6056 cs->basecr = cr; 6057 nsrv4 = nfs4_get_srv(); 6058 6059 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs, 6060 COMPOUND4args *, args); 6061 6062 /* 6063 * For now, NFS4 compound processing must be protected by 6064 * exported_lock because it can access more than one exportinfo 6065 * per compound and share/unshare can now change multiple 6066 * exinfo structs. The NFS2/3 code only refs 1 exportinfo 6067 * per proc (excluding public exinfo), and exi_count design 6068 * is sufficient to protect concurrent execution of NFS2/3 6069 * ops along with unexport. This lock will be removed as 6070 * part of the NFSv4 phase 2 namespace redesign work. 6071 */ 6072 rw_enter(&ne->exported_lock, RW_READER); 6073 6074 /* 6075 * If this is the first compound we've seen, we need to start all 6076 * new instances' grace periods. 6077 */ 6078 if (nsrv4->seen_first_compound == 0) { 6079 rfs4_grace_start_new(nsrv4); 6080 /* 6081 * This must be set after rfs4_grace_start_new(), otherwise 6082 * another thread could proceed past here before the former 6083 * is finished. 6084 */ 6085 nsrv4->seen_first_compound = 1; 6086 } 6087 6088 for (i = 0; i < args->array_len && cs->cont; i++) { 6089 nfs_argop4 *argop; 6090 nfs_resop4 *resop; 6091 uint_t op; 6092 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4]; 6093 6094 argop = &args->array[i]; 6095 resop = &resp->array[i]; 6096 resop->resop = argop->argop; 6097 op = (uint_t)resop->resop; 6098 6099 cs->op_pos = i; 6100 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) { 6101 /* 6102 * Count the individual ops here; NULL and COMPOUND 6103 * are counted in common_dispatch() 6104 */ 6105 stat[op].value.ui64++; 6106 6107 NFS4_DEBUG(rfs4_debug > 1, 6108 (CE_NOTE, "Executing %s", rfs4_op_string[op])); 6109 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs); 6110 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d", 6111 rfs4_op_string[op], *cs->statusp)); 6112 if (*cs->statusp != NFS4_OK) 6113 cs->cont = FALSE; 6114 if (rfsv4disptab[op].dis_flags & OP_CLEAR_STATEID) 6115 cs->cs_flags &= ~RFS4_CURRENT_STATEID; 6116 } else { 6117 /* 6118 * This is effectively dead code since XDR code 6119 * will have already returned BADXDR if op doesn't 6120 * decode to legal value. This only done for a 6121 * day when XDR code doesn't verify v4 opcodes. 6122 */ 6123 op = OP_ILLEGAL; 6124 stat[OP_ILLEGAL_IDX].value.ui64++; 6125 6126 rfs4_op_illegal(argop, resop, req, cs); 6127 cs->cont = FALSE; 6128 } 6129 6130 /* 6131 * If not at last op, and if we are to stop, then 6132 * compact the results array. 6133 */ 6134 if ((i + 1) < args->array_len && !cs->cont) { 6135 nfs_resop4 *new_res = kmem_alloc( 6136 (i+1) * sizeof (nfs_resop4), KM_SLEEP); 6137 bcopy(resp->array, 6138 new_res, (i+1) * sizeof (nfs_resop4)); 6139 kmem_free(resp->array, 6140 args->array_len * sizeof (nfs_resop4)); 6141 6142 resp->array_len = i + 1; 6143 resp->array = new_res; 6144 } 6145 } 6146 6147 rw_exit(&ne->exported_lock); 6148 6149 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs, 6150 COMPOUND4res *, resp); 6151 6152 /* 6153 * done with this compound request, free the label 6154 */ 6155 6156 if (req->rq_label != NULL) { 6157 kmem_free(req->rq_label, sizeof (bslabel_t)); 6158 req->rq_label = NULL; 6159 } 6160 } 6161 6162 /* 6163 * XXX because of what appears to be duplicate calls to rfs4_compound_free 6164 * XXX zero out the tag and array values. Need to investigate why the 6165 * XXX calls occur, but at least prevent the panic for now. 6166 */ 6167 void 6168 rfs4_compound_free(COMPOUND4res *resp) 6169 { 6170 uint_t i; 6171 6172 if (resp->tag.utf8string_val) { 6173 UTF8STRING_FREE(resp->tag) 6174 } 6175 6176 for (i = 0; i < resp->array_len; i++) { 6177 nfs_resop4 *resop; 6178 uint_t op; 6179 6180 resop = &resp->array[i]; 6181 op = (uint_t)resop->resop; 6182 if (op < rfsv4disp_cnt) { 6183 (*rfsv4disptab[op].dis_resfree)(resop); 6184 } 6185 } 6186 if (resp->array != NULL) { 6187 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4)); 6188 } 6189 } 6190 6191 /* 6192 * Check if entire requst is idempotent 6193 */ 6194 bool_t 6195 rfs4_idempotent_req(const COMPOUND4args *args) 6196 { 6197 int i; 6198 6199 for (i = 0; i < args->array_len; i++) { 6200 uint_t op; 6201 6202 op = (uint_t)args->array[i].argop; 6203 6204 if (op >= rfsv4disp_cnt || 6205 !(rfsv4disptab[op].dis_flags & OP_IDEMPOTENT)) { 6206 return (FALSE); 6207 } 6208 } 6209 return (TRUE); 6210 } 6211 6212 nfsstat4 6213 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp) 6214 { 6215 nfsstat4 e; 6216 6217 rfs4_dbe_lock(cp->rc_dbe); 6218 6219 if (cp->rc_sysidt != LM_NOSYSID) { 6220 *sp = cp->rc_sysidt; 6221 e = NFS4_OK; 6222 6223 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) { 6224 *sp = cp->rc_sysidt; 6225 e = NFS4_OK; 6226 6227 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 6228 "rfs4_client_sysid: allocated 0x%x\n", *sp)); 6229 } else 6230 e = NFS4ERR_DELAY; 6231 6232 rfs4_dbe_unlock(cp->rc_dbe); 6233 return (e); 6234 } 6235 6236 #if defined(DEBUG) && ! defined(lint) 6237 static void lock_print(char *str, int operation, struct flock64 *flk) 6238 { 6239 char *op, *type; 6240 6241 switch (operation) { 6242 case F_GETLK: op = "F_GETLK"; 6243 break; 6244 case F_SETLK: op = "F_SETLK"; 6245 break; 6246 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND"; 6247 break; 6248 default: op = "F_UNKNOWN"; 6249 break; 6250 } 6251 switch (flk->l_type) { 6252 case F_UNLCK: type = "F_UNLCK"; 6253 break; 6254 case F_RDLCK: type = "F_RDLCK"; 6255 break; 6256 case F_WRLCK: type = "F_WRLCK"; 6257 break; 6258 default: type = "F_UNKNOWN"; 6259 break; 6260 } 6261 6262 ASSERT(flk->l_whence == 0); 6263 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d", 6264 str, op, type, (longlong_t)flk->l_start, 6265 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid); 6266 } 6267 6268 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f) 6269 #else 6270 #define LOCK_PRINT(d, s, t, f) 6271 #endif 6272 6273 /*ARGSUSED*/ 6274 static bool_t 6275 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs) 6276 { 6277 return (TRUE); 6278 } 6279 6280 /* 6281 * Look up the pathname using the vp in cs as the directory vnode. 6282 * cs->vp will be the vnode for the file on success 6283 */ 6284 6285 static nfsstat4 6286 rfs4_lookup(component4 *component, struct svc_req *req, 6287 struct compound_state *cs) 6288 { 6289 char *nm; 6290 uint32_t len; 6291 nfsstat4 status; 6292 struct sockaddr *ca; 6293 char *name; 6294 6295 if (cs->vp == NULL) { 6296 return (NFS4ERR_NOFILEHANDLE); 6297 } 6298 if (cs->vp->v_type != VDIR) { 6299 return (NFS4ERR_NOTDIR); 6300 } 6301 6302 status = utf8_dir_verify(component); 6303 if (status != NFS4_OK) 6304 return (status); 6305 6306 nm = utf8_to_fn(component, &len, NULL); 6307 if (nm == NULL) { 6308 return (NFS4ERR_INVAL); 6309 } 6310 6311 if (len > MAXNAMELEN) { 6312 kmem_free(nm, len); 6313 return (NFS4ERR_NAMETOOLONG); 6314 } 6315 6316 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6317 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6318 MAXPATHLEN + 1); 6319 6320 if (name == NULL) { 6321 kmem_free(nm, len); 6322 return (NFS4ERR_INVAL); 6323 } 6324 6325 status = do_rfs4_op_lookup(name, req, cs); 6326 6327 if (name != nm) 6328 kmem_free(name, MAXPATHLEN + 1); 6329 6330 kmem_free(nm, len); 6331 6332 return (status); 6333 } 6334 6335 static nfsstat4 6336 rfs4_lookupfile(component4 *component, struct svc_req *req, 6337 struct compound_state *cs, uint32_t access, change_info4 *cinfo) 6338 { 6339 nfsstat4 status; 6340 vnode_t *dvp = cs->vp; 6341 vattr_t bva, ava, fva; 6342 int error; 6343 6344 /* Get "before" change value */ 6345 bva.va_mask = AT_CTIME|AT_SEQ; 6346 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6347 if (error) 6348 return (puterrno4(error)); 6349 6350 /* rfs4_lookup may VN_RELE directory */ 6351 VN_HOLD(dvp); 6352 6353 status = rfs4_lookup(component, req, cs); 6354 if (status != NFS4_OK) { 6355 VN_RELE(dvp); 6356 return (status); 6357 } 6358 6359 /* 6360 * Get "after" change value, if it fails, simply return the 6361 * before value. 6362 */ 6363 ava.va_mask = AT_CTIME|AT_SEQ; 6364 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6365 ava.va_ctime = bva.va_ctime; 6366 ava.va_seq = 0; 6367 } 6368 VN_RELE(dvp); 6369 6370 /* 6371 * Validate the file is a file 6372 */ 6373 fva.va_mask = AT_TYPE|AT_MODE; 6374 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL); 6375 if (error) 6376 return (puterrno4(error)); 6377 6378 if (fva.va_type != VREG) { 6379 if (fva.va_type == VDIR) 6380 return (NFS4ERR_ISDIR); 6381 if (fva.va_type == VLNK) 6382 return (NFS4ERR_SYMLINK); 6383 return (NFS4ERR_INVAL); 6384 } 6385 6386 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime); 6387 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6388 6389 /* 6390 * It is undefined if VOP_LOOKUP will change va_seq, so 6391 * cinfo.atomic = TRUE only if we have 6392 * non-zero va_seq's, and they have not changed. 6393 */ 6394 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq) 6395 cinfo->atomic = TRUE; 6396 else 6397 cinfo->atomic = FALSE; 6398 6399 /* Check for mandatory locking */ 6400 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode); 6401 return (check_open_access(access, cs, req)); 6402 } 6403 6404 static nfsstat4 6405 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode, 6406 cred_t *cr, vnode_t **vpp, bool_t *created) 6407 { 6408 int error; 6409 nfsstat4 status = NFS4_OK; 6410 vattr_t va; 6411 6412 tryagain: 6413 6414 /* 6415 * The file open mode used is VWRITE. If the client needs 6416 * some other semantic, then it should do the access checking 6417 * itself. It would have been nice to have the file open mode 6418 * passed as part of the arguments. 6419 */ 6420 6421 *created = TRUE; 6422 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL); 6423 6424 if (error) { 6425 *created = FALSE; 6426 6427 /* 6428 * If we got something other than file already exists 6429 * then just return this error. Otherwise, we got 6430 * EEXIST. If we were doing a GUARDED create, then 6431 * just return this error. Otherwise, we need to 6432 * make sure that this wasn't a duplicate of an 6433 * exclusive create request. 6434 * 6435 * The assumption is made that a non-exclusive create 6436 * request will never return EEXIST. 6437 */ 6438 6439 if (error != EEXIST || mode == GUARDED4) { 6440 status = puterrno4(error); 6441 return (status); 6442 } 6443 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr, 6444 NULL, NULL, NULL); 6445 6446 if (error) { 6447 /* 6448 * We couldn't find the file that we thought that 6449 * we just created. So, we'll just try creating 6450 * it again. 6451 */ 6452 if (error == ENOENT) 6453 goto tryagain; 6454 6455 status = puterrno4(error); 6456 return (status); 6457 } 6458 6459 if (mode == UNCHECKED4) { 6460 /* existing object must be regular file */ 6461 if ((*vpp)->v_type != VREG) { 6462 if ((*vpp)->v_type == VDIR) 6463 status = NFS4ERR_ISDIR; 6464 else if ((*vpp)->v_type == VLNK) 6465 status = NFS4ERR_SYMLINK; 6466 else 6467 status = NFS4ERR_INVAL; 6468 VN_RELE(*vpp); 6469 return (status); 6470 } 6471 6472 return (NFS4_OK); 6473 } 6474 6475 /* Check for duplicate request */ 6476 va.va_mask = AT_MTIME; 6477 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL); 6478 if (!error) { 6479 /* We found the file */ 6480 const timestruc_t *mtime = &vap->va_mtime; 6481 6482 if (va.va_mtime.tv_sec != mtime->tv_sec || 6483 va.va_mtime.tv_nsec != mtime->tv_nsec) { 6484 /* but its not our creation */ 6485 VN_RELE(*vpp); 6486 return (NFS4ERR_EXIST); 6487 } 6488 *created = TRUE; /* retrans of create == created */ 6489 return (NFS4_OK); 6490 } 6491 VN_RELE(*vpp); 6492 return (NFS4ERR_EXIST); 6493 } 6494 6495 return (NFS4_OK); 6496 } 6497 6498 static nfsstat4 6499 check_open_access(uint32_t access, struct compound_state *cs, 6500 struct svc_req *req) 6501 { 6502 int error; 6503 vnode_t *vp; 6504 bool_t readonly; 6505 cred_t *cr = cs->cr; 6506 6507 /* For now we don't allow mandatory locking as per V2/V3 */ 6508 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) { 6509 return (NFS4ERR_ACCESS); 6510 } 6511 6512 vp = cs->vp; 6513 ASSERT(cr != NULL && vp->v_type == VREG); 6514 6515 /* 6516 * If the file system is exported read only and we are trying 6517 * to open for write, then return NFS4ERR_ROFS 6518 */ 6519 6520 readonly = rdonly4(req, cs); 6521 6522 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly) 6523 return (NFS4ERR_ROFS); 6524 6525 if (access & OPEN4_SHARE_ACCESS_READ) { 6526 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) && 6527 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) { 6528 return (NFS4ERR_ACCESS); 6529 } 6530 } 6531 6532 if (access & OPEN4_SHARE_ACCESS_WRITE) { 6533 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 6534 if (error) 6535 return (NFS4ERR_ACCESS); 6536 } 6537 6538 return (NFS4_OK); 6539 } 6540 6541 static void 6542 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime) 6543 { 6544 timespec32_t *time = (timespec32_t *)&v; 6545 6546 /* 6547 * Ensure no time overflows. Assumes underlying 6548 * filesystem supports at least 32 bits. 6549 * Truncate nsec to usec resolution to allow valid 6550 * compares even if the underlying filesystem truncates. 6551 */ 6552 mtime->tv_sec = time->tv_sec % TIME32_MAX; 6553 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000; 6554 } 6555 6556 static nfsstat4 6557 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, 6558 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid) 6559 { 6560 struct nfs4_svgetit_arg sarg; 6561 struct nfs4_ntov_table ntov; 6562 6563 bool_t ntov_table_init = FALSE; 6564 struct statvfs64 sb; 6565 nfsstat4 status; 6566 vnode_t *vp; 6567 vattr_t bva, ava, iva, cva, *vap; 6568 vnode_t *dvp; 6569 char *nm = NULL; 6570 uint_t buflen; 6571 bool_t created; 6572 bool_t setsize = FALSE; 6573 len_t reqsize; 6574 int error; 6575 bool_t trunc; 6576 caller_context_t ct; 6577 component4 *component; 6578 bslabel_t *clabel; 6579 struct sockaddr *ca; 6580 char *name = NULL; 6581 fattr4 *fattr = NULL; 6582 6583 ASSERT(*attrset == 0); 6584 6585 sarg.sbp = &sb; 6586 sarg.is_referral = B_FALSE; 6587 6588 dvp = cs->vp; 6589 6590 /* Check if the file system is read only */ 6591 if (rdonly4(req, cs)) 6592 return (NFS4ERR_ROFS); 6593 6594 /* check the label of including directory */ 6595 if (is_system_labeled()) { 6596 ASSERT(req->rq_label != NULL); 6597 clabel = req->rq_label; 6598 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 6599 "got client label from request(1)", 6600 struct svc_req *, req); 6601 if (!blequal(&l_admin_low->tsl_label, clabel)) { 6602 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 6603 cs->exi)) { 6604 return (NFS4ERR_ACCESS); 6605 } 6606 } 6607 } 6608 6609 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) && 6610 dvp->v_flag & V_XATTRDIR) { 6611 /* prohibit EXCL create of named attributes */ 6612 return (NFS4ERR_INVAL); 6613 } 6614 6615 /* 6616 * Get the last component of path name in nm. cs will reference 6617 * the including directory on success. 6618 */ 6619 component = &args->claim.open_claim4_u.file; 6620 status = utf8_dir_verify(component); 6621 if (status != NFS4_OK) 6622 return (status); 6623 6624 nm = utf8_to_fn(component, &buflen, NULL); 6625 6626 if (nm == NULL) 6627 return (NFS4ERR_RESOURCE); 6628 6629 if (buflen > MAXNAMELEN) { 6630 kmem_free(nm, buflen); 6631 return (NFS4ERR_NAMETOOLONG); 6632 } 6633 6634 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ; 6635 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6636 if (error) { 6637 kmem_free(nm, buflen); 6638 return (puterrno4(error)); 6639 } 6640 6641 if (bva.va_type != VDIR) { 6642 kmem_free(nm, buflen); 6643 return (NFS4ERR_NOTDIR); 6644 } 6645 6646 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime) 6647 6648 switch (args->mode) { 6649 case GUARDED4: 6650 /*FALLTHROUGH*/ 6651 case UNCHECKED4: 6652 case EXCLUSIVE4_1: 6653 nfs4_ntov_table_init(&ntov); 6654 ntov_table_init = TRUE; 6655 6656 if (args->mode == EXCLUSIVE4_1) 6657 fattr = &args->createhow4_u.ch_createboth.cva_attrs; 6658 else 6659 fattr = &args->createhow4_u.createattrs; 6660 6661 status = do_rfs4_set_attrs(attrset, 6662 fattr, 6663 cs, &sarg, &ntov, NFS4ATTR_SETIT); 6664 6665 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) && 6666 sarg.vap->va_type != VREG) { 6667 if (sarg.vap->va_type == VDIR) 6668 status = NFS4ERR_ISDIR; 6669 else if (sarg.vap->va_type == VLNK) 6670 status = NFS4ERR_SYMLINK; 6671 else 6672 status = NFS4ERR_INVAL; 6673 } 6674 6675 if (status != NFS4_OK) { 6676 kmem_free(nm, buflen); 6677 nfs4_ntov_table_free(&ntov, &sarg); 6678 *attrset = 0; 6679 return (status); 6680 } 6681 6682 vap = sarg.vap; 6683 vap->va_type = VREG; 6684 vap->va_mask |= AT_TYPE; 6685 6686 if ((vap->va_mask & AT_MODE) == 0) { 6687 vap->va_mask |= AT_MODE; 6688 vap->va_mode = (mode_t)0600; 6689 } 6690 6691 if (vap->va_mask & AT_SIZE) { 6692 6693 /* Disallow create with a non-zero size */ 6694 6695 if ((reqsize = sarg.vap->va_size) != 0) { 6696 kmem_free(nm, buflen); 6697 nfs4_ntov_table_free(&ntov, &sarg); 6698 *attrset = 0; 6699 return (NFS4ERR_INVAL); 6700 } 6701 setsize = TRUE; 6702 } 6703 if (args->mode == EXCLUSIVE4_1) { 6704 rfs4_verifier_to_mtime( 6705 args->createhow4_u.ch_createboth.cva_verf, 6706 &vap->va_mtime); 6707 /* attrset will be set later */ 6708 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK; 6709 vap->va_mask |= AT_MTIME; 6710 } 6711 break; 6712 6713 case EXCLUSIVE4: 6714 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE; 6715 cva.va_type = VREG; 6716 cva.va_mode = (mode_t)0; 6717 6718 rfs4_verifier_to_mtime(args->createhow4_u.createverf, 6719 &cva.va_mtime); 6720 6721 vap = &cva; 6722 6723 /* 6724 * For EXCL create, attrset is set to the server attr 6725 * used to cache the client's verifier. 6726 */ 6727 *attrset = FATTR4_TIME_MODIFY_MASK; 6728 break; 6729 } 6730 6731 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6732 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6733 MAXPATHLEN + 1); 6734 6735 if (name == NULL) { 6736 kmem_free(nm, buflen); 6737 return (NFS4ERR_SERVERFAULT); 6738 } 6739 6740 status = create_vnode(dvp, name, vap, args->mode, 6741 cs->cr, &vp, &created); 6742 if (nm != name) 6743 kmem_free(name, MAXPATHLEN + 1); 6744 kmem_free(nm, buflen); 6745 6746 if (status != NFS4_OK) { 6747 if (ntov_table_init) 6748 nfs4_ntov_table_free(&ntov, &sarg); 6749 *attrset = 0; 6750 return (status); 6751 } 6752 6753 trunc = (setsize && !created); 6754 6755 if (args->mode != EXCLUSIVE4) { 6756 bitmap4 createmask = fattr->attrmask; 6757 6758 /* 6759 * True verification that object was created with correct 6760 * attrs is impossible. The attrs could have been changed 6761 * immediately after object creation. If attributes did 6762 * not verify, the only recourse for the server is to 6763 * destroy the object. Maybe if some attrs (like gid) 6764 * are set incorrectly, the object should be destroyed; 6765 * however, seems bad as a default policy. Do we really 6766 * want to destroy an object over one of the times not 6767 * verifying correctly? For these reasons, the server 6768 * currently sets bits in attrset for createattrs 6769 * that were set; however, no verification is done. 6770 * 6771 * vmask_to_nmask accounts for vattr bits set on create 6772 * [do_rfs4_set_attrs() only sets resp bits for 6773 * non-vattr/vfs bits.] 6774 * Mask off any bits we set by default so as not to return 6775 * more attrset bits than were requested in createattrs 6776 */ 6777 if (created) { 6778 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset); 6779 *attrset &= createmask; 6780 } else { 6781 /* 6782 * We did not create the vnode (we tried but it 6783 * already existed). In this case, the only createattr 6784 * that the spec allows the server to set is size, 6785 * and even then, it can only be set if it is 0. 6786 */ 6787 *attrset = 0; 6788 if (trunc) 6789 *attrset = FATTR4_SIZE_MASK; 6790 } 6791 } 6792 if (ntov_table_init) 6793 nfs4_ntov_table_free(&ntov, &sarg); 6794 6795 /* 6796 * Get the initial "after" sequence number, if it fails, 6797 * set to zero, time to before. 6798 */ 6799 iva.va_mask = AT_CTIME|AT_SEQ; 6800 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) { 6801 iva.va_seq = 0; 6802 iva.va_ctime = bva.va_ctime; 6803 } 6804 6805 /* 6806 * create_vnode attempts to create the file exclusive, 6807 * if it already exists the VOP_CREATE will fail and 6808 * may not increase va_seq. It is atomic if 6809 * we haven't changed the directory, but if it has changed 6810 * we don't know what changed it. 6811 */ 6812 if (!created) { 6813 if (bva.va_seq && iva.va_seq && 6814 bva.va_seq == iva.va_seq) 6815 cinfo->atomic = TRUE; 6816 else 6817 cinfo->atomic = FALSE; 6818 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime); 6819 } else { 6820 /* 6821 * The entry was created, we need to sync the 6822 * directory metadata. 6823 */ 6824 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 6825 6826 /* 6827 * Get "after" change value, if it fails, simply return the 6828 * before value. 6829 */ 6830 ava.va_mask = AT_CTIME|AT_SEQ; 6831 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6832 ava.va_ctime = bva.va_ctime; 6833 ava.va_seq = 0; 6834 } 6835 6836 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6837 6838 /* 6839 * The cinfo->atomic = TRUE only if we have 6840 * non-zero va_seq's, and it has incremented by exactly one 6841 * during the create_vnode and it didn't 6842 * change during the VOP_FSYNC. 6843 */ 6844 if (bva.va_seq && iva.va_seq && ava.va_seq && 6845 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 6846 cinfo->atomic = TRUE; 6847 else 6848 cinfo->atomic = FALSE; 6849 } 6850 6851 /* Check for mandatory locking and that the size gets set. */ 6852 cva.va_mask = AT_MODE; 6853 if (setsize) 6854 cva.va_mask |= AT_SIZE; 6855 6856 /* Assume the worst */ 6857 cs->mandlock = TRUE; 6858 6859 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) { 6860 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode); 6861 6862 /* 6863 * Truncate the file if necessary; this would be 6864 * the case for create over an existing file. 6865 */ 6866 6867 if (trunc) { 6868 int in_crit = 0; 6869 rfs4_file_t *fp; 6870 nfs4_srv_t *nsrv4; 6871 bool_t create = FALSE; 6872 6873 /* 6874 * We are writing over an existing file. 6875 * Check to see if we need to recall a delegation. 6876 */ 6877 nsrv4 = nfs4_get_srv(); 6878 rfs4_hold_deleg_policy(nsrv4); 6879 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { 6880 if (rfs4_check_delegated_byfp(FWRITE, fp, 6881 (reqsize == 0), FALSE, FALSE, &clientid)) { 6882 rfs4_file_rele(fp); 6883 rfs4_rele_deleg_policy(nsrv4); 6884 VN_RELE(vp); 6885 *attrset = 0; 6886 return (NFS4ERR_DELAY); 6887 } 6888 rfs4_file_rele(fp); 6889 } 6890 rfs4_rele_deleg_policy(nsrv4); 6891 6892 if (nbl_need_check(vp)) { 6893 in_crit = 1; 6894 6895 ASSERT(reqsize == 0); 6896 6897 nbl_start_crit(vp, RW_READER); 6898 if (nbl_conflict(vp, NBL_WRITE, 0, 6899 cva.va_size, 0, NULL)) { 6900 in_crit = 0; 6901 nbl_end_crit(vp); 6902 VN_RELE(vp); 6903 *attrset = 0; 6904 return (NFS4ERR_ACCESS); 6905 } 6906 } 6907 ct.cc_sysid = 0; 6908 ct.cc_pid = 0; 6909 ct.cc_caller_id = nfs4_srv_caller_id; 6910 ct.cc_flags = CC_DONTBLOCK; 6911 6912 cva.va_mask = AT_SIZE; 6913 cva.va_size = reqsize; 6914 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct); 6915 if (in_crit) 6916 nbl_end_crit(vp); 6917 } 6918 } 6919 6920 error = makefh4(&cs->fh, vp, cs->exi); 6921 6922 /* 6923 * Force modified data and metadata out to stable storage. 6924 */ 6925 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 6926 6927 if (error) { 6928 VN_RELE(vp); 6929 *attrset = 0; 6930 return (puterrno4(error)); 6931 } 6932 6933 /* if parent dir is attrdir, set namedattr fh flag */ 6934 if (dvp->v_flag & V_XATTRDIR) 6935 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 6936 6937 if (cs->vp) 6938 VN_RELE(cs->vp); 6939 6940 cs->vp = vp; 6941 6942 /* 6943 * if we did not create the file, we will need to check 6944 * the access bits on the file 6945 */ 6946 6947 if (!created) { 6948 if (setsize) 6949 args->share_access |= OPEN4_SHARE_ACCESS_WRITE; 6950 status = check_open_access(args->share_access, cs, req); 6951 if (status != NFS4_OK) 6952 *attrset = 0; 6953 } 6954 return (status); 6955 } 6956 6957 static void 6958 close_expired_state(rfs4_entry_t u_entry) 6959 { 6960 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 6961 6962 if (sp->rs_closed) 6963 return; 6964 6965 /* not expired ? */ 6966 if (gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access 6967 <= rfs4_lease_time) 6968 return; 6969 6970 rfs4_state_close(sp, TRUE, TRUE, CRED()); 6971 rfs4_dbe_invalidate(sp->rs_dbe); 6972 } 6973 6974 /*ARGSUSED*/ 6975 static void 6976 rfs4_do_open(struct compound_state *cs, struct svc_req *req, 6977 rfs4_openowner_t *oo, delegreq_t deleg, 6978 uint32_t access, uint32_t deny, 6979 OPEN4res *resp, int deleg_cur) 6980 { 6981 /* XXX Currently not using req */ 6982 rfs4_state_t *sp; 6983 rfs4_file_t *fp; 6984 bool_t screate = TRUE; 6985 bool_t fcreate = TRUE; 6986 uint32_t open_a, share_a; 6987 uint32_t open_d, share_d; 6988 rfs4_deleg_state_t *dsp; 6989 sysid_t sysid; 6990 nfsstat4 status; 6991 caller_context_t ct; 6992 int fflags = 0; 6993 int recall = 0; 6994 int err; 6995 int first_open; 6996 int tries = 0; 6997 6998 /* get the file struct and hold a lock on it during initial open */ 6999 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate); 7000 if (fp == NULL) { 7001 resp->status = NFS4ERR_RESOURCE; 7002 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status); 7003 return; 7004 } 7005 7006 sp = rfs4_findstate_by_owner_file(oo, fp, &screate); 7007 if (sp == NULL) { 7008 resp->status = NFS4ERR_RESOURCE; 7009 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status); 7010 /* No need to keep any reference */ 7011 rw_exit(&fp->rf_file_rwlock); 7012 rfs4_file_rele(fp); 7013 return; 7014 } 7015 7016 /* try to get the sysid before continuing */ 7017 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) { 7018 resp->status = status; 7019 rfs4_file_rele(fp); 7020 /* Not a fully formed open; "close" it */ 7021 if (screate == TRUE) 7022 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7023 rfs4_state_rele(sp); 7024 return; 7025 } 7026 7027 /* Calculate the fflags for this OPEN. */ 7028 if (access & OPEN4_SHARE_ACCESS_READ) 7029 fflags |= FREAD; 7030 if (access & OPEN4_SHARE_ACCESS_WRITE) 7031 fflags |= FWRITE; 7032 7033 again: 7034 rfs4_dbe_lock(sp->rs_dbe); 7035 7036 /* 7037 * Calculate the new deny and access mode that this open is adding to 7038 * the file for this open owner; 7039 */ 7040 open_d = (deny & ~sp->rs_open_deny); 7041 open_a = (access & ~sp->rs_open_access); 7042 7043 /* 7044 * Calculate the new share access and share deny modes that this open 7045 * is adding to the file for this open owner; 7046 */ 7047 share_a = (access & ~sp->rs_share_access); 7048 share_d = (deny & ~sp->rs_share_deny); 7049 7050 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0; 7051 7052 /* 7053 * Check to see the client has already sent an open for this 7054 * open owner on this file with the same share/deny modes. 7055 * If so, we don't need to check for a conflict and we don't 7056 * need to add another shrlock. If not, then we need to 7057 * check for conflicts in deny and access before checking for 7058 * conflicts in delegation. We don't want to recall a 7059 * delegation based on an open that will eventually fail based 7060 * on shares modes. 7061 */ 7062 7063 if (share_a || share_d) { 7064 if ((err = rfs4_share(sp, access, deny)) != 0) { 7065 rfs4_dbe_unlock(sp->rs_dbe); 7066 if (err == NFS4ERR_SHARE_DENIED && ++tries < 2) { 7067 /* 7068 * Cleanup recently expired (not yet cleaned by 7069 * reaper thread) and re-try. 7070 */ 7071 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 7072 7073 rfs4_dbsearch_cb(nsrv4->rfs4_state_file_idx, 7074 sp->rs_finfo, rfs4_lookup_exp_state_max, 7075 close_expired_state); 7076 goto again; 7077 } 7078 7079 resp->status = err; 7080 7081 rfs4_file_rele(fp); 7082 /* Not a fully formed open; "close" it */ 7083 if (screate == TRUE) 7084 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7085 rfs4_state_rele(sp); 7086 return; 7087 } 7088 } 7089 7090 rfs4_dbe_lock(fp->rf_dbe); 7091 7092 /* 7093 * Check to see if this file is delegated and if so, if a 7094 * recall needs to be done. 7095 */ 7096 if (rfs4_check_recall(sp, access)) { 7097 rfs4_dbe_unlock(fp->rf_dbe); 7098 rfs4_dbe_unlock(sp->rs_dbe); 7099 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client); 7100 delay(NFS4_DELEGATION_CONFLICT_DELAY); 7101 rfs4_dbe_lock(sp->rs_dbe); 7102 7103 /* if state closed while lock was dropped */ 7104 if (sp->rs_closed) { 7105 if (share_a || share_d) 7106 (void) rfs4_unshare(sp); 7107 rfs4_dbe_unlock(sp->rs_dbe); 7108 rfs4_file_rele(fp); 7109 /* Not a fully formed open; "close" it */ 7110 if (screate == TRUE) 7111 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7112 rfs4_state_rele(sp); 7113 resp->status = NFS4ERR_OLD_STATEID; 7114 return; 7115 } 7116 7117 rfs4_dbe_lock(fp->rf_dbe); 7118 /* Let's see if the delegation was returned */ 7119 if (rfs4_check_recall(sp, access)) { 7120 rfs4_dbe_unlock(fp->rf_dbe); 7121 if (share_a || share_d) 7122 (void) rfs4_unshare(sp); 7123 rfs4_dbe_unlock(sp->rs_dbe); 7124 rfs4_file_rele(fp); 7125 rfs4_update_lease(sp->rs_owner->ro_client); 7126 7127 /* Not a fully formed open; "close" it */ 7128 if (screate == TRUE) 7129 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7130 rfs4_state_rele(sp); 7131 resp->status = NFS4ERR_DELAY; 7132 return; 7133 } 7134 } 7135 /* 7136 * the share check passed and any delegation conflict has been 7137 * taken care of, now call vop_open. 7138 * if this is the first open then call vop_open with fflags. 7139 * if not, call vn_open_upgrade with just the upgrade flags. 7140 * 7141 * if the file has been opened already, it will have the current 7142 * access mode in the state struct. if it has no share access, then 7143 * this is a new open. 7144 * 7145 * However, if this is open with CLAIM_DLEGATE_CUR, then don't 7146 * call VOP_OPEN(), just do the open upgrade. 7147 */ 7148 if (first_open && !deleg_cur) { 7149 ct.cc_sysid = sysid; 7150 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 7151 ct.cc_caller_id = nfs4_srv_caller_id; 7152 ct.cc_flags = CC_DONTBLOCK; 7153 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct); 7154 if (err) { 7155 rfs4_dbe_unlock(fp->rf_dbe); 7156 if (share_a || share_d) 7157 (void) rfs4_unshare(sp); 7158 rfs4_dbe_unlock(sp->rs_dbe); 7159 rfs4_file_rele(fp); 7160 7161 /* Not a fully formed open; "close" it */ 7162 if (screate == TRUE) 7163 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7164 rfs4_state_rele(sp); 7165 /* check if a monitor detected a delegation conflict */ 7166 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 7167 resp->status = NFS4ERR_DELAY; 7168 else 7169 resp->status = NFS4ERR_SERVERFAULT; 7170 return; 7171 } 7172 } else { /* open upgrade */ 7173 /* 7174 * calculate the fflags for the new mode that is being added 7175 * by this upgrade. 7176 */ 7177 fflags = 0; 7178 if (open_a & OPEN4_SHARE_ACCESS_READ) 7179 fflags |= FREAD; 7180 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7181 fflags |= FWRITE; 7182 vn_open_upgrade(cs->vp, fflags); 7183 } 7184 sp->rs_open_access |= access; 7185 sp->rs_open_deny |= deny; 7186 7187 if (open_d & OPEN4_SHARE_DENY_READ) 7188 fp->rf_deny_read++; 7189 if (open_d & OPEN4_SHARE_DENY_WRITE) 7190 fp->rf_deny_write++; 7191 fp->rf_share_deny |= deny; 7192 7193 if (open_a & OPEN4_SHARE_ACCESS_READ) 7194 fp->rf_access_read++; 7195 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7196 fp->rf_access_write++; 7197 fp->rf_share_access |= access; 7198 7199 /* 7200 * Check for delegation here. if the deleg argument is not 7201 * DELEG_ANY, then this is a reclaim from a client and 7202 * we must honor the delegation requested. If necessary we can 7203 * set the recall flag. 7204 */ 7205 7206 dsp = rfs4_grant_delegation(deleg, sp, &recall); 7207 7208 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE); 7209 7210 next_stateid(&sp->rs_stateid); 7211 7212 resp->stateid = sp->rs_stateid.stateid; 7213 7214 rfs4_dbe_unlock(fp->rf_dbe); 7215 rfs4_dbe_unlock(sp->rs_dbe); 7216 7217 if (dsp) { 7218 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall); 7219 rfs4_deleg_state_rele(dsp); 7220 } 7221 7222 rfs4_file_rele(fp); 7223 rfs4_state_rele(sp); 7224 7225 resp->status = NFS4_OK; 7226 } 7227 7228 /*ARGSUSED*/ 7229 static void 7230 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args, 7231 rfs4_openowner_t *oo, OPEN4res *resp) 7232 { 7233 /* cs->vp and cs->fh have been updated by putfh. */ 7234 rfs4_do_open(cs, req, oo, DELEG_ANY, 7235 (args->share_access & 0xff), args->share_deny, resp, 0); 7236 } 7237 7238 /*ARGSUSED*/ 7239 static void 7240 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req, 7241 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7242 { 7243 change_info4 *cinfo = &resp->cinfo; 7244 bitmap4 *attrset = &resp->attrset; 7245 7246 if (args->opentype == OPEN4_NOCREATE) 7247 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file, 7248 req, cs, args->share_access, cinfo); 7249 else { 7250 /* inhibit delegation grants during exclusive create */ 7251 7252 if (args->mode == EXCLUSIVE4) 7253 rfs4_disable_delegation(); 7254 7255 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset, 7256 oo->ro_client->rc_clientid); 7257 } 7258 7259 if (resp->status == NFS4_OK) { 7260 7261 /* cs->vp cs->fh now reference the desired file */ 7262 7263 rfs4_do_open(cs, req, oo, 7264 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY, 7265 args->share_access, args->share_deny, resp, 0); 7266 7267 /* 7268 * If rfs4_createfile set attrset, we must 7269 * clear this attrset before the response is copied. 7270 */ 7271 if (resp->status != NFS4_OK && resp->attrset) { 7272 resp->attrset = 0; 7273 } 7274 } 7275 else 7276 *cs->statusp = resp->status; 7277 7278 if (args->mode == EXCLUSIVE4) 7279 rfs4_enable_delegation(); 7280 } 7281 7282 /*ARGSUSED*/ 7283 static void 7284 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req, 7285 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7286 { 7287 change_info4 *cinfo = &resp->cinfo; 7288 vattr_t va; 7289 vtype_t v_type = cs->vp->v_type; 7290 int error = 0; 7291 7292 /* Verify that we have a regular file */ 7293 if (v_type != VREG) { 7294 if (v_type == VDIR) 7295 resp->status = NFS4ERR_ISDIR; 7296 else if (v_type == VLNK) 7297 resp->status = NFS4ERR_SYMLINK; 7298 else 7299 resp->status = NFS4ERR_INVAL; 7300 return; 7301 } 7302 7303 va.va_mask = AT_MODE|AT_UID; 7304 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL); 7305 if (error) { 7306 resp->status = puterrno4(error); 7307 return; 7308 } 7309 7310 cs->mandlock = MANDLOCK(cs->vp, va.va_mode); 7311 7312 /* 7313 * Check if we have access to the file, Note the the file 7314 * could have originally been open UNCHECKED or GUARDED 7315 * with mode bits that will now fail, but there is nothing 7316 * we can really do about that except in the case that the 7317 * owner of the file is the one requesting the open. 7318 */ 7319 if (crgetuid(cs->cr) != va.va_uid) { 7320 resp->status = check_open_access(args->share_access, cs, req); 7321 if (resp->status != NFS4_OK) { 7322 return; 7323 } 7324 } 7325 7326 /* 7327 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero 7328 */ 7329 cinfo->before = 0; 7330 cinfo->after = 0; 7331 cinfo->atomic = FALSE; 7332 7333 rfs4_do_open(cs, req, oo, 7334 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type), 7335 args->share_access, args->share_deny, resp, 0); 7336 } 7337 7338 static void 7339 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req, 7340 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7341 { 7342 int error; 7343 nfsstat4 status; 7344 stateid4 stateid = 7345 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid; 7346 rfs4_deleg_state_t *dsp; 7347 7348 /* 7349 * Find the state info from the stateid and confirm that the 7350 * file is delegated. If the state openowner is the same as 7351 * the supplied openowner we're done. If not, get the file 7352 * info from the found state info. Use that file info to 7353 * create the state for this lock owner. Note solaris doen't 7354 * really need the pathname to find the file. We may want to 7355 * lookup the pathname and make sure that the vp exist and 7356 * matches the vp in the file structure. However it is 7357 * possible that the pathname nolonger exists (local process 7358 * unlinks the file), so this may not be that useful. 7359 */ 7360 7361 status = rfs4_get_deleg_state(&stateid, &dsp); 7362 if (status != NFS4_OK) { 7363 resp->status = status; 7364 return; 7365 } 7366 7367 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE); 7368 7369 /* 7370 * New lock owner, create state. Since this was probably called 7371 * in response to a CB_RECALL we set deleg to DELEG_NONE 7372 */ 7373 7374 ASSERT(cs->vp != NULL); 7375 VN_RELE(cs->vp); 7376 VN_HOLD(dsp->rds_finfo->rf_vp); 7377 cs->vp = dsp->rds_finfo->rf_vp; 7378 7379 error = makefh4(&cs->fh, cs->vp, cs->exi); 7380 if (error != 0) { 7381 rfs4_deleg_state_rele(dsp); 7382 *cs->statusp = resp->status = puterrno4(error); 7383 return; 7384 } 7385 7386 /* Mark progress for delegation returns */ 7387 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec(); 7388 rfs4_deleg_state_rele(dsp); 7389 rfs4_do_open(cs, req, oo, DELEG_NONE, 7390 args->share_access, args->share_deny, resp, 1); 7391 } 7392 7393 /*ARGSUSED*/ 7394 static void 7395 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req, 7396 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7397 { 7398 /* 7399 * Lookup the pathname, it must already exist since this file 7400 * was delegated. 7401 * 7402 * Find the file and state info for this vp and open owner pair. 7403 * check that they are in fact delegated. 7404 * check that the state access and deny modes are the same. 7405 * 7406 * Return the delgation possibly seting the recall flag. 7407 */ 7408 rfs4_file_t *fp; 7409 rfs4_state_t *sp; 7410 bool_t create = FALSE; 7411 bool_t dcreate = FALSE; 7412 rfs4_deleg_state_t *dsp; 7413 nfsace4 *ace; 7414 7415 /* Note we ignore oflags */ 7416 resp->status = rfs4_lookupfile( 7417 &args->claim.open_claim4_u.file_delegate_prev, 7418 req, cs, args->share_access, &resp->cinfo); 7419 7420 if (resp->status != NFS4_OK) { 7421 return; 7422 } 7423 7424 /* get the file struct and hold a lock on it during initial open */ 7425 fp = rfs4_findfile_withlock(cs->vp, NULL, &create); 7426 if (fp == NULL) { 7427 resp->status = NFS4ERR_RESOURCE; 7428 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status); 7429 return; 7430 } 7431 7432 sp = rfs4_findstate_by_owner_file(oo, fp, &create); 7433 if (sp == NULL) { 7434 resp->status = NFS4ERR_SERVERFAULT; 7435 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status); 7436 rw_exit(&fp->rf_file_rwlock); 7437 rfs4_file_rele(fp); 7438 return; 7439 } 7440 7441 rfs4_dbe_lock(sp->rs_dbe); 7442 rfs4_dbe_lock(fp->rf_dbe); 7443 if (args->share_access != sp->rs_share_access || 7444 args->share_deny != sp->rs_share_deny || 7445 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 7446 NFS4_DEBUG(rfs4_debug, 7447 (CE_NOTE, "rfs4_do_opendelprev: state mixup")); 7448 rfs4_dbe_unlock(fp->rf_dbe); 7449 rfs4_dbe_unlock(sp->rs_dbe); 7450 rfs4_file_rele(fp); 7451 rfs4_state_rele(sp); 7452 resp->status = NFS4ERR_SERVERFAULT; 7453 return; 7454 } 7455 rfs4_dbe_unlock(fp->rf_dbe); 7456 rfs4_dbe_unlock(sp->rs_dbe); 7457 7458 dsp = rfs4_finddeleg(sp, &dcreate); 7459 if (dsp == NULL) { 7460 rfs4_state_rele(sp); 7461 rfs4_file_rele(fp); 7462 resp->status = NFS4ERR_SERVERFAULT; 7463 return; 7464 } 7465 7466 next_stateid(&sp->rs_stateid); 7467 7468 resp->stateid = sp->rs_stateid.stateid; 7469 7470 resp->delegation.delegation_type = dsp->rds_dtype; 7471 7472 if (dsp->rds_dtype == OPEN_DELEGATE_READ) { 7473 open_read_delegation4 *rv = 7474 &resp->delegation.open_delegation4_u.read; 7475 7476 rv->stateid = dsp->rds_delegid.stateid; 7477 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7478 ace = &rv->permissions; 7479 } else { 7480 open_write_delegation4 *rv = 7481 &resp->delegation.open_delegation4_u.write; 7482 7483 rv->stateid = dsp->rds_delegid.stateid; 7484 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7485 ace = &rv->permissions; 7486 rv->space_limit.limitby = NFS_LIMIT_SIZE; 7487 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX; 7488 } 7489 7490 /* XXX For now */ 7491 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE; 7492 ace->flag = 0; 7493 ace->access_mask = 0; 7494 ace->who.utf8string_len = 0; 7495 ace->who.utf8string_val = 0; 7496 7497 rfs4_deleg_state_rele(dsp); 7498 rfs4_state_rele(sp); 7499 rfs4_file_rele(fp); 7500 } 7501 7502 typedef enum { 7503 NFS4_CHKSEQ_OKAY = 0, 7504 NFS4_CHKSEQ_REPLAY = 1, 7505 NFS4_CHKSEQ_BAD = 2 7506 } rfs4_chkseq_t; 7507 7508 /* 7509 * Generic function for sequence number checks. 7510 */ 7511 static rfs4_chkseq_t 7512 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop, 7513 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres) 7514 { 7515 /* Same sequence ids and matching operations? */ 7516 if (seqid == rqst_seq && resop->resop == lastop->resop) { 7517 if (copyres == TRUE) { 7518 rfs4_free_reply(resop); 7519 rfs4_copy_reply(resop, lastop); 7520 } 7521 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 7522 "Replayed SEQID %d\n", seqid)); 7523 return (NFS4_CHKSEQ_REPLAY); 7524 } 7525 7526 /* If the incoming sequence is not the next expected then it is bad */ 7527 if (rqst_seq != seqid + 1) { 7528 if (rqst_seq == seqid) { 7529 NFS4_DEBUG(rfs4_debug, 7530 (CE_NOTE, "BAD SEQID: Replayed sequence id " 7531 "but last op was %d current op is %d\n", 7532 lastop->resop, resop->resop)); 7533 return (NFS4_CHKSEQ_BAD); 7534 } 7535 NFS4_DEBUG(rfs4_debug, 7536 (CE_NOTE, "BAD SEQID: got %u expecting %u\n", 7537 rqst_seq, seqid)); 7538 return (NFS4_CHKSEQ_BAD); 7539 } 7540 7541 /* Everything okay -- next expected */ 7542 return (NFS4_CHKSEQ_OKAY); 7543 } 7544 7545 7546 static rfs4_chkseq_t 7547 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop, 7548 const compound_state_t *cs) 7549 { 7550 rfs4_chkseq_t rc; 7551 7552 if (rfs4_has_session(cs)) 7553 return (NFS4_CHKSEQ_OKAY); 7554 7555 rfs4_dbe_lock(op->ro_dbe); 7556 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop, 7557 TRUE); 7558 rfs4_dbe_unlock(op->ro_dbe); 7559 7560 if (rc == NFS4_CHKSEQ_OKAY) 7561 rfs4_update_lease(op->ro_client); 7562 7563 return (rc); 7564 } 7565 7566 static rfs4_chkseq_t 7567 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop) 7568 { 7569 rfs4_chkseq_t rc; 7570 7571 rfs4_dbe_lock(op->ro_dbe); 7572 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, 7573 olo_seqid, resop, FALSE); 7574 rfs4_dbe_unlock(op->ro_dbe); 7575 7576 return (rc); 7577 } 7578 7579 static rfs4_chkseq_t 7580 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop) 7581 { 7582 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY; 7583 7584 rfs4_dbe_lock(lsp->rls_dbe); 7585 if (!lsp->rls_skip_seqid_check) 7586 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid, 7587 resop, TRUE); 7588 rfs4_dbe_unlock(lsp->rls_dbe); 7589 7590 return (rc); 7591 } 7592 7593 static void 7594 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop, 7595 struct svc_req *req, struct compound_state *cs) 7596 { 7597 OPEN4args *args = &argop->nfs_argop4_u.opopen; 7598 OPEN4res *resp = &resop->nfs_resop4_u.opopen; 7599 open_owner4 *owner = &args->owner; 7600 open_claim_type4 claim = args->claim.claim; 7601 rfs4_client_t *cp; 7602 rfs4_openowner_t *oo; 7603 bool_t create; 7604 bool_t replay = FALSE; 7605 int can_reclaim; 7606 7607 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs, 7608 OPEN4args *, args); 7609 7610 if (cs->vp == NULL) { 7611 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7612 goto end; 7613 } 7614 7615 /* rfc5661 section 18.16.3 */ 7616 if (rfs4_has_session(cs)) 7617 owner->clientid = cs->client->rc_clientid; 7618 7619 /* 7620 * Need to check clientid and lease expiration first based on 7621 * error ordering and incrementing sequence id. 7622 */ 7623 cp = rfs4_findclient_by_id(owner->clientid, FALSE); 7624 if (cp == NULL) { 7625 *cs->statusp = resp->status = 7626 rfs4_check_clientid(&owner->clientid, 0); 7627 goto end; 7628 } 7629 7630 if (rfs4_lease_expired(cp)) { 7631 rfs4_client_close(cp); 7632 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7633 goto end; 7634 } 7635 can_reclaim = cp->rc_can_reclaim; 7636 7637 /* 7638 * RFC8881 18.51.3 7639 * If non-reclaim locking operations are done before the 7640 * RECLAIM_COMPLETE, error NFS4ERR_GRACE will be returned 7641 */ 7642 if (rfs4_has_session(cs) && !cp->rc_reclaim_completed && 7643 claim != CLAIM_PREVIOUS) { 7644 rfs4_client_rele(cp); 7645 *cs->statusp = resp->status = NFS4ERR_GRACE; 7646 goto end; 7647 } 7648 7649 /* 7650 * Find the open_owner for use from this point forward. Take 7651 * care in updating the sequence id based on the type of error 7652 * being returned. 7653 */ 7654 retry: 7655 create = TRUE; 7656 oo = rfs4_findopenowner(owner, &create, args->seqid); 7657 if (oo == NULL) { 7658 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 7659 rfs4_client_rele(cp); 7660 goto end; 7661 } 7662 7663 /* 7664 * OPEN_CONFIRM must not be implemented in v4.1 7665 */ 7666 if (rfs4_has_session(cs)) { 7667 oo->ro_need_confirm = FALSE; 7668 } 7669 7670 /* Hold off access to the sequence space while the open is done */ 7671 /* Workaround to avoid deadlock */ 7672 if (!rfs4_has_session(cs)) 7673 rfs4_sw_enter(&oo->ro_sw); 7674 7675 /* 7676 * If the open_owner existed before at the server, then check 7677 * the sequence id. 7678 */ 7679 if (!create && !oo->ro_postpone_confirm) { 7680 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) { 7681 case NFS4_CHKSEQ_BAD: 7682 ASSERT(!rfs4_has_session(cs)); 7683 if ((args->seqid > oo->ro_open_seqid) && 7684 oo->ro_need_confirm) { 7685 rfs4_free_opens(oo, TRUE, FALSE); 7686 rfs4_sw_exit(&oo->ro_sw); 7687 rfs4_openowner_rele(oo); 7688 goto retry; 7689 } 7690 resp->status = NFS4ERR_BAD_SEQID; 7691 goto out; 7692 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */ 7693 replay = TRUE; 7694 goto out; 7695 default: 7696 break; 7697 } 7698 7699 /* 7700 * Sequence was ok and open owner exists 7701 * check to see if we have yet to see an 7702 * open_confirm. 7703 */ 7704 if (oo->ro_need_confirm) { 7705 rfs4_free_opens(oo, TRUE, FALSE); 7706 ASSERT(!rfs4_has_session(cs)); 7707 rfs4_sw_exit(&oo->ro_sw); 7708 rfs4_openowner_rele(oo); 7709 goto retry; 7710 } 7711 } 7712 /* Grace only applies to regular-type OPENs */ 7713 if (rfs4_clnt_in_grace(cp) && 7714 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR || 7715 claim == CLAIM_FH)) { 7716 *cs->statusp = resp->status = NFS4ERR_GRACE; 7717 goto out; 7718 } 7719 7720 /* 7721 * If previous state at the server existed then can_reclaim 7722 * will be set. If not reply NFS4ERR_NO_GRACE to the 7723 * client. 7724 */ 7725 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) { 7726 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7727 goto out; 7728 } 7729 7730 7731 /* 7732 * Reject the open if the client has missed the grace period 7733 */ 7734 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) { 7735 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7736 goto out; 7737 } 7738 7739 /* Couple of up-front bookkeeping items */ 7740 if (oo->ro_need_confirm) { 7741 /* 7742 * If this is a reclaim OPEN then we should not ask 7743 * for a confirmation of the open_owner per the 7744 * protocol specification. 7745 */ 7746 if (claim == CLAIM_PREVIOUS) 7747 oo->ro_need_confirm = FALSE; 7748 else 7749 resp->rflags |= OPEN4_RESULT_CONFIRM; 7750 } 7751 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX; 7752 7753 /* 7754 * If there is an unshared filesystem mounted on this vnode, 7755 * do not allow to open/create in this directory. 7756 */ 7757 if (vn_ismntpt(cs->vp)) { 7758 *cs->statusp = resp->status = NFS4ERR_ACCESS; 7759 goto out; 7760 } 7761 7762 /* 7763 * access must READ, WRITE, or BOTH. No access is invalid. 7764 * deny can be READ, WRITE, BOTH, or NONE. 7765 * bits not defined for access/deny are invalid. 7766 */ 7767 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) || 7768 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) || 7769 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) { 7770 *cs->statusp = resp->status = NFS4ERR_INVAL; 7771 goto out; 7772 } 7773 7774 7775 /* 7776 * make sure attrset is zero before response is built. 7777 */ 7778 resp->attrset = 0; 7779 7780 switch (claim) { 7781 case CLAIM_NULL: 7782 rfs4_do_opennull(cs, req, args, oo, resp); 7783 break; 7784 case CLAIM_PREVIOUS: 7785 rfs4_do_openprev(cs, req, args, oo, resp); 7786 break; 7787 case CLAIM_DELEGATE_CUR: 7788 rfs4_do_opendelcur(cs, req, args, oo, resp); 7789 break; 7790 case CLAIM_DELEGATE_PREV: 7791 rfs4_do_opendelprev(cs, req, args, oo, resp); 7792 break; 7793 case CLAIM_FH: 7794 rfs4_do_openfh(cs, req, args, oo, resp); 7795 break; 7796 default: 7797 resp->status = NFS4ERR_INVAL; 7798 break; 7799 } 7800 7801 out: 7802 rfs4_client_rele(cp); 7803 7804 /* Catch sequence id handling here to make it a little easier */ 7805 switch (resp->status) { 7806 case NFS4ERR_BADXDR: 7807 case NFS4ERR_BAD_SEQID: 7808 case NFS4ERR_BAD_STATEID: 7809 case NFS4ERR_NOFILEHANDLE: 7810 case NFS4ERR_RESOURCE: 7811 case NFS4ERR_STALE_CLIENTID: 7812 case NFS4ERR_STALE_STATEID: 7813 /* 7814 * The protocol states that if any of these errors are 7815 * being returned, the sequence id should not be 7816 * incremented. Any other return requires an 7817 * increment. 7818 */ 7819 break; 7820 default: 7821 /* Always update the lease in this case */ 7822 rfs4_update_lease(oo->ro_client); 7823 7824 /* Regular response - copy the result */ 7825 if (!replay) 7826 rfs4_update_open_resp(oo, resop, &cs->fh); 7827 7828 /* 7829 * REPLAY case: Only if the previous response was OK 7830 * do we copy the filehandle. If not OK, no 7831 * filehandle to copy. 7832 */ 7833 if (replay == TRUE && 7834 resp->status == NFS4_OK && 7835 oo->ro_reply_fh.nfs_fh4_val) { 7836 /* 7837 * If this is a replay, we must restore the 7838 * current filehandle/vp to that of what was 7839 * returned originally. Try our best to do 7840 * it. 7841 */ 7842 nfs_fh4_fmt_t *fh_fmtp = 7843 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val; 7844 7845 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, 7846 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 7847 7848 if (cs->exi == NULL) { 7849 resp->status = NFS4ERR_STALE; 7850 goto finish; 7851 } 7852 7853 VN_RELE(cs->vp); 7854 7855 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi, 7856 &resp->status); 7857 7858 if (cs->vp == NULL) 7859 goto finish; 7860 7861 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh); 7862 } 7863 7864 /* 7865 * If this was a replay, no need to update the 7866 * sequence id. If the open_owner was not created on 7867 * this pass, then update. The first use of an 7868 * open_owner will not bump the sequence id. 7869 */ 7870 if (replay == FALSE && !create) 7871 rfs4_update_open_sequence(oo); 7872 /* 7873 * If the client is receiving an error and the 7874 * open_owner needs to be confirmed, there is no way 7875 * to notify the client of this fact ignoring the fact 7876 * that the server has no method of returning a 7877 * stateid to confirm. Therefore, the server needs to 7878 * mark this open_owner in a way as to avoid the 7879 * sequence id checking the next time the client uses 7880 * this open_owner. 7881 */ 7882 if (resp->status != NFS4_OK && oo->ro_need_confirm) 7883 oo->ro_postpone_confirm = TRUE; 7884 /* 7885 * If OK response then clear the postpone flag and 7886 * reset the sequence id to keep in sync with the 7887 * client. 7888 */ 7889 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) { 7890 oo->ro_postpone_confirm = FALSE; 7891 oo->ro_open_seqid = args->seqid; 7892 } 7893 break; 7894 } 7895 7896 finish: 7897 *cs->statusp = resp->status; 7898 7899 if (!rfs4_has_session(cs)) 7900 rfs4_sw_exit(&oo->ro_sw); 7901 rfs4_openowner_rele(oo); 7902 7903 put_stateid4(cs, &resp->stateid); 7904 end: 7905 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs, 7906 OPEN4res *, resp); 7907 } 7908 7909 /*ARGSUSED*/ 7910 void 7911 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 7912 struct svc_req *req, struct compound_state *cs) 7913 { 7914 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm; 7915 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm; 7916 rfs4_state_t *sp; 7917 nfsstat4 status; 7918 7919 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs, 7920 OPEN_CONFIRM4args *, args); 7921 7922 ASSERT(!rfs4_has_session(cs)); 7923 7924 if (cs->vp == NULL) { 7925 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7926 goto out; 7927 } 7928 7929 if (cs->vp->v_type != VREG) { 7930 *cs->statusp = resp->status = 7931 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL; 7932 return; 7933 } 7934 7935 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7936 if (status != NFS4_OK) { 7937 *cs->statusp = resp->status = status; 7938 goto out; 7939 } 7940 7941 /* Ensure specified filehandle matches */ 7942 if (cs->vp != sp->rs_finfo->rf_vp) { 7943 rfs4_state_rele(sp); 7944 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7945 goto out; 7946 } 7947 7948 /* hold off other access to open_owner while we tinker */ 7949 rfs4_sw_enter(&sp->rs_owner->ro_sw); 7950 7951 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 7952 case NFS4_CHECK_STATEID_OKAY: 7953 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7954 resop, cs) != 0) { 7955 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7956 break; 7957 } 7958 /* 7959 * If it is the appropriate stateid and determined to 7960 * be "OKAY" then this means that the stateid does not 7961 * need to be confirmed and the client is in error for 7962 * sending an OPEN_CONFIRM. 7963 */ 7964 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7965 break; 7966 case NFS4_CHECK_STATEID_OLD: 7967 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7968 break; 7969 case NFS4_CHECK_STATEID_BAD: 7970 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7971 break; 7972 case NFS4_CHECK_STATEID_EXPIRED: 7973 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7974 break; 7975 case NFS4_CHECK_STATEID_CLOSED: 7976 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7977 break; 7978 case NFS4_CHECK_STATEID_REPLAY: 7979 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7980 resop, cs)) { 7981 case NFS4_CHKSEQ_OKAY: 7982 /* 7983 * This is replayed stateid; if seqid matches 7984 * next expected, then client is using wrong seqid. 7985 */ 7986 /* fall through */ 7987 case NFS4_CHKSEQ_BAD: 7988 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7989 break; 7990 case NFS4_CHKSEQ_REPLAY: 7991 /* 7992 * Note this case is the duplicate case so 7993 * resp->status is already set. 7994 */ 7995 *cs->statusp = resp->status; 7996 rfs4_update_lease(sp->rs_owner->ro_client); 7997 break; 7998 } 7999 break; 8000 case NFS4_CHECK_STATEID_UNCONFIRMED: 8001 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8002 resop, cs) != NFS4_CHKSEQ_OKAY) { 8003 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8004 break; 8005 } 8006 *cs->statusp = resp->status = NFS4_OK; 8007 8008 next_stateid(&sp->rs_stateid); 8009 resp->open_stateid = sp->rs_stateid.stateid; 8010 sp->rs_owner->ro_need_confirm = FALSE; 8011 rfs4_update_lease(sp->rs_owner->ro_client); 8012 rfs4_update_open_sequence(sp->rs_owner); 8013 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8014 break; 8015 default: 8016 ASSERT(FALSE); 8017 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8018 break; 8019 } 8020 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8021 rfs4_state_rele(sp); 8022 8023 out: 8024 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs, 8025 OPEN_CONFIRM4res *, resp); 8026 } 8027 8028 /*ARGSUSED*/ 8029 void 8030 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop, 8031 struct svc_req *req, struct compound_state *cs) 8032 { 8033 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade; 8034 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade; 8035 uint32_t access = args->share_access; 8036 uint32_t deny = args->share_deny; 8037 nfsstat4 status; 8038 rfs4_state_t *sp; 8039 rfs4_file_t *fp; 8040 int fflags = 0; 8041 8042 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs, 8043 OPEN_DOWNGRADE4args *, args); 8044 8045 if (cs->vp == NULL) { 8046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8047 goto out; 8048 } 8049 8050 if (cs->vp->v_type != VREG) { 8051 *cs->statusp = resp->status = NFS4ERR_INVAL; 8052 return; 8053 } 8054 8055 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 8056 if (status != NFS4_OK) { 8057 *cs->statusp = resp->status = status; 8058 goto out; 8059 } 8060 8061 /* Ensure specified filehandle matches */ 8062 if (cs->vp != sp->rs_finfo->rf_vp) { 8063 rfs4_state_rele(sp); 8064 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8065 goto out; 8066 } 8067 8068 /* hold off other access to open_owner while we tinker */ 8069 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8070 8071 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8072 case NFS4_CHECK_STATEID_OKAY: 8073 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8074 resop, cs) != NFS4_CHKSEQ_OKAY) { 8075 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8076 goto end; 8077 } 8078 break; 8079 case NFS4_CHECK_STATEID_OLD: 8080 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8081 goto end; 8082 case NFS4_CHECK_STATEID_BAD: 8083 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8084 goto end; 8085 case NFS4_CHECK_STATEID_EXPIRED: 8086 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8087 goto end; 8088 case NFS4_CHECK_STATEID_CLOSED: 8089 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8090 goto end; 8091 case NFS4_CHECK_STATEID_UNCONFIRMED: 8092 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8093 goto end; 8094 case NFS4_CHECK_STATEID_REPLAY: 8095 ASSERT(!rfs4_has_session(cs)); 8096 8097 /* Check the sequence id for the open owner */ 8098 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8099 resop, cs)) { 8100 case NFS4_CHKSEQ_OKAY: 8101 /* 8102 * This is replayed stateid; if seqid matches 8103 * next expected, then client is using wrong seqid. 8104 */ 8105 /* fall through */ 8106 case NFS4_CHKSEQ_BAD: 8107 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8108 goto end; 8109 case NFS4_CHKSEQ_REPLAY: 8110 /* 8111 * Note this case is the duplicate case so 8112 * resp->status is already set. 8113 */ 8114 *cs->statusp = resp->status; 8115 rfs4_update_lease(sp->rs_owner->ro_client); 8116 goto end; 8117 } 8118 break; 8119 default: 8120 ASSERT(FALSE); 8121 break; 8122 } 8123 8124 rfs4_dbe_lock(sp->rs_dbe); 8125 /* 8126 * Check that the new access modes and deny modes are valid. 8127 * Check that no invalid bits are set. 8128 */ 8129 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) || 8130 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) { 8131 *cs->statusp = resp->status = NFS4ERR_INVAL; 8132 rfs4_update_open_sequence(sp->rs_owner); 8133 rfs4_dbe_unlock(sp->rs_dbe); 8134 goto end; 8135 } 8136 8137 /* 8138 * The new modes must be a subset of the current modes and 8139 * the access must specify at least one mode. To test that 8140 * the new mode is a subset of the current modes we bitwise 8141 * AND them together and check that the result equals the new 8142 * mode. For example: 8143 * New mode, access == R and current mode, sp->rs_open_access == RW 8144 * access & sp->rs_open_access == R == access, so the new access mode 8145 * is valid. Consider access == RW, sp->rs_open_access = R 8146 * access & sp->rs_open_access == R != access, so the new access mode 8147 * is invalid. 8148 */ 8149 if ((access & sp->rs_open_access) != access || 8150 (deny & sp->rs_open_deny) != deny || 8151 (access & 8152 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) { 8153 *cs->statusp = resp->status = NFS4ERR_INVAL; 8154 rfs4_update_open_sequence(sp->rs_owner); 8155 rfs4_dbe_unlock(sp->rs_dbe); 8156 goto end; 8157 } 8158 8159 /* 8160 * Release any share locks associated with this stateID. 8161 * Strictly speaking, this violates the spec because the 8162 * spec effectively requires that open downgrade be atomic. 8163 * At present, fs_shrlock does not have this capability. 8164 */ 8165 (void) rfs4_unshare(sp); 8166 8167 status = rfs4_share(sp, access, deny); 8168 if (status != NFS4_OK) { 8169 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8170 rfs4_update_open_sequence(sp->rs_owner); 8171 rfs4_dbe_unlock(sp->rs_dbe); 8172 goto end; 8173 } 8174 8175 fp = sp->rs_finfo; 8176 rfs4_dbe_lock(fp->rf_dbe); 8177 8178 /* 8179 * If the current mode has deny read and the new mode 8180 * does not, decrement the number of deny read mode bits 8181 * and if it goes to zero turn off the deny read bit 8182 * on the file. 8183 */ 8184 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) && 8185 (deny & OPEN4_SHARE_DENY_READ) == 0) { 8186 fp->rf_deny_read--; 8187 if (fp->rf_deny_read == 0) 8188 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8189 } 8190 8191 /* 8192 * If the current mode has deny write and the new mode 8193 * does not, decrement the number of deny write mode bits 8194 * and if it goes to zero turn off the deny write bit 8195 * on the file. 8196 */ 8197 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) && 8198 (deny & OPEN4_SHARE_DENY_WRITE) == 0) { 8199 fp->rf_deny_write--; 8200 if (fp->rf_deny_write == 0) 8201 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8202 } 8203 8204 /* 8205 * If the current mode has access read and the new mode 8206 * does not, decrement the number of access read mode bits 8207 * and if it goes to zero turn off the access read bit 8208 * on the file. set fflags to FREAD for the call to 8209 * vn_open_downgrade(). 8210 */ 8211 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) && 8212 (access & OPEN4_SHARE_ACCESS_READ) == 0) { 8213 fp->rf_access_read--; 8214 if (fp->rf_access_read == 0) 8215 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8216 fflags |= FREAD; 8217 } 8218 8219 /* 8220 * If the current mode has access write and the new mode 8221 * does not, decrement the number of access write mode bits 8222 * and if it goes to zero turn off the access write bit 8223 * on the file. set fflags to FWRITE for the call to 8224 * vn_open_downgrade(). 8225 */ 8226 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) && 8227 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) { 8228 fp->rf_access_write--; 8229 if (fp->rf_access_write == 0) 8230 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE; 8231 fflags |= FWRITE; 8232 } 8233 8234 /* Check that the file is still accessible */ 8235 ASSERT(fp->rf_share_access); 8236 8237 rfs4_dbe_unlock(fp->rf_dbe); 8238 8239 /* now set the new open access and deny modes */ 8240 sp->rs_open_access = access; 8241 sp->rs_open_deny = deny; 8242 8243 /* 8244 * we successfully downgraded the share lock, now we need to downgrade 8245 * the open. it is possible that the downgrade was only for a deny 8246 * mode and we have nothing else to do. 8247 */ 8248 if ((fflags & (FREAD|FWRITE)) != 0) 8249 vn_open_downgrade(cs->vp, fflags); 8250 8251 /* Update the stateid */ 8252 next_stateid(&sp->rs_stateid); 8253 resp->open_stateid = sp->rs_stateid.stateid; 8254 8255 rfs4_dbe_unlock(sp->rs_dbe); 8256 8257 *cs->statusp = resp->status = NFS4_OK; 8258 /* Update the lease */ 8259 rfs4_update_lease(sp->rs_owner->ro_client); 8260 /* And the sequence */ 8261 rfs4_update_open_sequence(sp->rs_owner); 8262 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8263 8264 end: 8265 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8266 rfs4_state_rele(sp); 8267 out: 8268 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs, 8269 OPEN_DOWNGRADE4res *, resp); 8270 } 8271 8272 static void * 8273 memstr(const void *s1, const char *s2, size_t n) 8274 { 8275 size_t l = strlen(s2); 8276 char *p = (char *)s1; 8277 8278 while (n >= l) { 8279 if (bcmp(p, s2, l) == 0) 8280 return (p); 8281 p++; 8282 n--; 8283 } 8284 8285 return (NULL); 8286 } 8287 8288 /* 8289 * The logic behind this function is detailed in the NFSv4 RFC in the 8290 * SETCLIENTID operation description under IMPLEMENTATION. Refer to 8291 * that section for explicit guidance to server behavior for 8292 * SETCLIENTID. 8293 */ 8294 void 8295 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop, 8296 struct svc_req *req, struct compound_state *cs) 8297 { 8298 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid; 8299 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid; 8300 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed; 8301 rfs4_clntip_t *ci; 8302 bool_t create; 8303 char *addr, *netid; 8304 int len; 8305 8306 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs, 8307 SETCLIENTID4args *, args); 8308 retry: 8309 newcp = cp_confirmed = cp_unconfirmed = NULL; 8310 8311 /* 8312 * Save the caller's IP address 8313 */ 8314 args->client.cl_addr = 8315 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 8316 8317 /* 8318 * Record if it is a Solaris client that cannot handle referrals. 8319 */ 8320 if (memstr(args->client.id_val, "Solaris", args->client.id_len) && 8321 !memstr(args->client.id_val, "+referrals", args->client.id_len)) { 8322 /* Add a "yes, it's downrev" record */ 8323 create = TRUE; 8324 ci = rfs4_find_clntip(args->client.cl_addr, &create); 8325 ASSERT(ci != NULL); 8326 rfs4_dbe_rele(ci->ri_dbe); 8327 } else { 8328 /* Remove any previous record */ 8329 rfs4_invalidate_clntip(args->client.cl_addr); 8330 } 8331 8332 /* 8333 * In search of an EXISTING client matching the incoming 8334 * request to establish a new client identifier at the server 8335 */ 8336 create = TRUE; 8337 cp = rfs4_findclient(&args->client, &create, NULL); 8338 8339 /* Should never happen */ 8340 ASSERT(cp != NULL); 8341 8342 if (cp == NULL) { 8343 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8344 goto out; 8345 } 8346 8347 /* 8348 * Easiest case. Client identifier is newly created and is 8349 * unconfirmed. Also note that for this case, no other 8350 * entries exist for the client identifier. Nothing else to 8351 * check. Just setup the response and respond. 8352 */ 8353 if (create) { 8354 *cs->statusp = res->status = NFS4_OK; 8355 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid; 8356 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8357 cp->rc_confirm_verf; 8358 /* Setup callback information; CB_NULL confirmation later */ 8359 rfs4_client_setcb(cp, &args->callback, args->callback_ident); 8360 8361 rfs4_client_rele(cp); 8362 goto out; 8363 } 8364 8365 /* 8366 * An existing, confirmed client may exist but it may not have 8367 * been active for at least one lease period. If so, then 8368 * "close" the client and create a new client identifier 8369 */ 8370 if (rfs4_lease_expired(cp)) { 8371 rfs4_client_close(cp); 8372 goto retry; 8373 } 8374 8375 if (cp->rc_need_confirm == TRUE) 8376 cp_unconfirmed = cp; 8377 else 8378 cp_confirmed = cp; 8379 8380 cp = NULL; 8381 8382 /* 8383 * We have a confirmed client, now check for an 8384 * unconfimred entry 8385 */ 8386 if (cp_confirmed) { 8387 /* If creds don't match then client identifier is inuse */ 8388 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) { 8389 rfs4_cbinfo_t *cbp; 8390 /* 8391 * Some one else has established this client 8392 * id. Try and say * who they are. We will use 8393 * the call back address supplied by * the 8394 * first client. 8395 */ 8396 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8397 8398 addr = netid = NULL; 8399 8400 cbp = &cp_confirmed->rc_cbinfo; 8401 if (cbp->cb_callback.cb_location.r_addr && 8402 cbp->cb_callback.cb_location.r_netid) { 8403 cb_client4 *cbcp = &cbp->cb_callback; 8404 8405 len = strlen(cbcp->cb_location.r_addr)+1; 8406 addr = kmem_alloc(len, KM_SLEEP); 8407 bcopy(cbcp->cb_location.r_addr, addr, len); 8408 len = strlen(cbcp->cb_location.r_netid)+1; 8409 netid = kmem_alloc(len, KM_SLEEP); 8410 bcopy(cbcp->cb_location.r_netid, netid, len); 8411 } 8412 8413 res->SETCLIENTID4res_u.client_using.r_addr = addr; 8414 res->SETCLIENTID4res_u.client_using.r_netid = netid; 8415 8416 rfs4_client_rele(cp_confirmed); 8417 } 8418 8419 /* 8420 * Confirmed, creds match, and verifier matches; must 8421 * be an update of the callback info 8422 */ 8423 if (cp_confirmed->rc_nfs_client.verifier == 8424 args->client.verifier) { 8425 /* Setup callback information */ 8426 rfs4_client_setcb(cp_confirmed, &args->callback, 8427 args->callback_ident); 8428 8429 /* everything okay -- move ahead */ 8430 *cs->statusp = res->status = NFS4_OK; 8431 res->SETCLIENTID4res_u.resok4.clientid = 8432 cp_confirmed->rc_clientid; 8433 8434 /* update the confirm_verifier and return it */ 8435 rfs4_client_scv_next(cp_confirmed); 8436 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8437 cp_confirmed->rc_confirm_verf; 8438 8439 rfs4_client_rele(cp_confirmed); 8440 goto out; 8441 } 8442 8443 /* 8444 * Creds match but the verifier doesn't. Must search 8445 * for an unconfirmed client that would be replaced by 8446 * this request. 8447 */ 8448 create = FALSE; 8449 cp_unconfirmed = rfs4_findclient(&args->client, &create, 8450 cp_confirmed); 8451 } 8452 8453 /* 8454 * At this point, we have taken care of the brand new client 8455 * struct, INUSE case, update of an existing, and confirmed 8456 * client struct. 8457 */ 8458 8459 /* 8460 * check to see if things have changed while we originally 8461 * picked up the client struct. If they have, then return and 8462 * retry the processing of this SETCLIENTID request. 8463 */ 8464 if (cp_unconfirmed) { 8465 rfs4_dbe_lock(cp_unconfirmed->rc_dbe); 8466 if (!cp_unconfirmed->rc_need_confirm) { 8467 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8468 rfs4_client_rele(cp_unconfirmed); 8469 if (cp_confirmed) 8470 rfs4_client_rele(cp_confirmed); 8471 goto retry; 8472 } 8473 /* do away with the old unconfirmed one */ 8474 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe); 8475 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8476 rfs4_client_rele(cp_unconfirmed); 8477 cp_unconfirmed = NULL; 8478 } 8479 8480 /* 8481 * This search will temporarily hide the confirmed client 8482 * struct while a new client struct is created as the 8483 * unconfirmed one. 8484 */ 8485 create = TRUE; 8486 newcp = rfs4_findclient(&args->client, &create, cp_confirmed); 8487 8488 ASSERT(newcp != NULL); 8489 8490 if (newcp == NULL) { 8491 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8492 rfs4_client_rele(cp_confirmed); 8493 goto out; 8494 } 8495 8496 /* 8497 * If one was not created, then a similar request must be in 8498 * process so release and start over with this one 8499 */ 8500 if (create != TRUE) { 8501 rfs4_client_rele(newcp); 8502 if (cp_confirmed) 8503 rfs4_client_rele(cp_confirmed); 8504 goto retry; 8505 } 8506 8507 *cs->statusp = res->status = NFS4_OK; 8508 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid; 8509 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8510 newcp->rc_confirm_verf; 8511 /* Setup callback information; CB_NULL confirmation later */ 8512 rfs4_client_setcb(newcp, &args->callback, args->callback_ident); 8513 8514 newcp->rc_cp_confirmed = cp_confirmed; 8515 8516 rfs4_client_rele(newcp); 8517 8518 out: 8519 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs, 8520 SETCLIENTID4res *, res); 8521 } 8522 8523 /*ARGSUSED*/ 8524 void 8525 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 8526 struct svc_req *req, struct compound_state *cs) 8527 { 8528 SETCLIENTID_CONFIRM4args *args = 8529 &argop->nfs_argop4_u.opsetclientid_confirm; 8530 SETCLIENTID_CONFIRM4res *res = 8531 &resop->nfs_resop4_u.opsetclientid_confirm; 8532 rfs4_client_t *cp, *cptoclose = NULL; 8533 nfs4_srv_t *nsrv4; 8534 8535 DTRACE_NFSV4_2(op__setclientid__confirm__start, 8536 struct compound_state *, cs, 8537 SETCLIENTID_CONFIRM4args *, args); 8538 8539 nsrv4 = nfs4_get_srv(); 8540 *cs->statusp = res->status = NFS4_OK; 8541 8542 cp = rfs4_findclient_by_id(args->clientid, TRUE); 8543 8544 if (cp == NULL) { 8545 *cs->statusp = res->status = 8546 rfs4_check_clientid(&args->clientid, 1); 8547 goto out; 8548 } 8549 8550 if (!creds_ok(&cp->rc_cr_set, req, cs)) { 8551 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8552 rfs4_client_rele(cp); 8553 goto out; 8554 } 8555 8556 /* If the verifier doesn't match, the record doesn't match */ 8557 if (cp->rc_confirm_verf != args->setclientid_confirm) { 8558 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID; 8559 rfs4_client_rele(cp); 8560 goto out; 8561 } 8562 8563 rfs4_dbe_lock(cp->rc_dbe); 8564 cp->rc_need_confirm = FALSE; 8565 if (cp->rc_cp_confirmed) { 8566 cptoclose = cp->rc_cp_confirmed; 8567 cptoclose->rc_ss_remove = 1; 8568 cp->rc_cp_confirmed = NULL; 8569 } 8570 8571 /* 8572 * Update the client's associated server instance, if it's changed 8573 * since the client was created. 8574 */ 8575 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst) 8576 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); 8577 8578 /* 8579 * Record clientid in stable storage. 8580 * Must be done after server instance has been assigned. 8581 */ 8582 rfs4_ss_clid(nsrv4, cp); 8583 8584 rfs4_dbe_unlock(cp->rc_dbe); 8585 8586 if (cptoclose) 8587 /* don't need to rele, client_close does it */ 8588 rfs4_client_close(cptoclose); 8589 8590 /* If needed, initiate CB_NULL call for callback path */ 8591 rfs4_deleg_cb_check(cp); 8592 rfs4_update_lease(cp); 8593 8594 /* 8595 * Check to see if client can perform reclaims 8596 */ 8597 rfs4_ss_chkclid(nsrv4, cp); 8598 8599 rfs4_client_rele(cp); 8600 8601 out: 8602 DTRACE_NFSV4_2(op__setclientid__confirm__done, 8603 struct compound_state *, cs, 8604 SETCLIENTID_CONFIRM4 *, res); 8605 } 8606 8607 extern stateid4 invalid_stateid; 8608 8609 /*ARGSUSED*/ 8610 void 8611 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop, 8612 struct svc_req *req, struct compound_state *cs) 8613 { 8614 CLOSE4args *args = &argop->nfs_argop4_u.opclose; 8615 CLOSE4res *resp = &resop->nfs_resop4_u.opclose; 8616 rfs4_state_t *sp; 8617 nfsstat4 status; 8618 8619 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs, 8620 CLOSE4args *, args); 8621 8622 if (cs->vp == NULL) { 8623 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8624 goto out; 8625 } 8626 8627 get_stateid4(cs, &args->open_stateid); 8628 8629 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID); 8630 if (status != NFS4_OK) { 8631 *cs->statusp = resp->status = status; 8632 goto out; 8633 } 8634 8635 /* Ensure specified filehandle matches */ 8636 if (cs->vp != sp->rs_finfo->rf_vp) { 8637 rfs4_state_rele(sp); 8638 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8639 goto out; 8640 } 8641 8642 /* hold off other access to open_owner while we tinker */ 8643 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8644 8645 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8646 case NFS4_CHECK_STATEID_OKAY: 8647 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8648 resop, cs) != NFS4_CHKSEQ_OKAY) { 8649 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8650 goto end; 8651 } 8652 break; 8653 case NFS4_CHECK_STATEID_OLD: 8654 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8655 goto end; 8656 case NFS4_CHECK_STATEID_BAD: 8657 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8658 goto end; 8659 case NFS4_CHECK_STATEID_EXPIRED: 8660 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8661 goto end; 8662 case NFS4_CHECK_STATEID_CLOSED: 8663 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8664 goto end; 8665 case NFS4_CHECK_STATEID_UNCONFIRMED: 8666 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8667 goto end; 8668 case NFS4_CHECK_STATEID_REPLAY: 8669 ASSERT(!rfs4_has_session(cs)); 8670 8671 /* Check the sequence id for the open owner */ 8672 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8673 resop, cs)) { 8674 case NFS4_CHKSEQ_OKAY: 8675 /* 8676 * This is replayed stateid; if seqid matches 8677 * next expected, then client is using wrong seqid. 8678 */ 8679 /* FALL THROUGH */ 8680 case NFS4_CHKSEQ_BAD: 8681 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8682 goto end; 8683 case NFS4_CHKSEQ_REPLAY: 8684 /* 8685 * Note this case is the duplicate case so 8686 * resp->status is already set. 8687 */ 8688 *cs->statusp = resp->status; 8689 rfs4_update_lease(sp->rs_owner->ro_client); 8690 goto end; 8691 } 8692 break; 8693 default: 8694 ASSERT(FALSE); 8695 break; 8696 } 8697 8698 rfs4_dbe_lock(sp->rs_dbe); 8699 8700 /* Update the stateid. */ 8701 next_stateid(&sp->rs_stateid); 8702 rfs4_dbe_unlock(sp->rs_dbe); 8703 8704 rfs4_update_lease(sp->rs_owner->ro_client); 8705 rfs4_update_open_sequence(sp->rs_owner); 8706 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8707 8708 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 8709 8710 /* See RFC8881 section 18.2.4, and RFC7530 section 16.2.5 */ 8711 resp->open_stateid = invalid_stateid; 8712 *cs->statusp = resp->status = status; 8713 8714 end: 8715 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8716 rfs4_state_rele(sp); 8717 out: 8718 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs, 8719 CLOSE4res *, resp); 8720 } 8721 8722 /* 8723 * Manage the counts on the file struct and close all file locks 8724 */ 8725 /*ARGSUSED*/ 8726 void 8727 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr, 8728 bool_t close_of_client) 8729 { 8730 rfs4_file_t *fp = sp->rs_finfo; 8731 rfs4_lo_state_t *lsp; 8732 int fflags = 0; 8733 8734 /* 8735 * If this call is part of the larger closing down of client 8736 * state then it is just easier to release all locks 8737 * associated with this client instead of going through each 8738 * individual file and cleaning locks there. 8739 */ 8740 if (close_of_client) { 8741 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE && 8742 !list_is_empty(&sp->rs_lostatelist) && 8743 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) { 8744 /* Is the PxFS kernel module loaded? */ 8745 if (lm_remove_file_locks != NULL) { 8746 int new_sysid; 8747 8748 /* Encode the cluster nodeid in new sysid */ 8749 new_sysid = sp->rs_owner->ro_client->rc_sysidt; 8750 lm_set_nlmid_flk(&new_sysid); 8751 8752 /* 8753 * This PxFS routine removes file locks for a 8754 * client over all nodes of a cluster. 8755 */ 8756 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 8757 "lm_remove_file_locks(sysid=0x%x)\n", 8758 new_sysid)); 8759 (*lm_remove_file_locks)(new_sysid); 8760 } else { 8761 struct flock64 flk; 8762 8763 /* Release all locks for this client */ 8764 flk.l_type = F_UNLKSYS; 8765 flk.l_whence = 0; 8766 flk.l_start = 0; 8767 flk.l_len = 0; 8768 flk.l_sysid = 8769 sp->rs_owner->ro_client->rc_sysidt; 8770 flk.l_pid = 0; 8771 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK, 8772 &flk, F_REMOTELOCK | FREAD | FWRITE, 8773 (u_offset_t)0, NULL, CRED(), NULL); 8774 } 8775 8776 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE; 8777 } 8778 } 8779 8780 /* 8781 * Release all locks on this file by this lock owner or at 8782 * least mark the locks as having been released 8783 */ 8784 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL; 8785 lsp = list_next(&sp->rs_lostatelist, lsp)) { 8786 lsp->rls_locks_cleaned = TRUE; 8787 8788 /* Was this already taken care of above? */ 8789 if (!close_of_client && 8790 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8791 (void) cleanlocks(sp->rs_finfo->rf_vp, 8792 lsp->rls_locker->rl_pid, 8793 lsp->rls_locker->rl_client->rc_sysidt); 8794 } 8795 8796 /* 8797 * Release any shrlocks associated with this open state ID. 8798 * This must be done before the rfs4_state gets marked closed. 8799 */ 8800 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8801 (void) rfs4_unshare(sp); 8802 8803 if (sp->rs_open_access) { 8804 rfs4_dbe_lock(fp->rf_dbe); 8805 8806 /* 8807 * Decrement the count for each access and deny bit that this 8808 * state has contributed to the file. 8809 * If the file counts go to zero 8810 * clear the appropriate bit in the appropriate mask. 8811 */ 8812 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) { 8813 fp->rf_access_read--; 8814 fflags |= FREAD; 8815 if (fp->rf_access_read == 0) 8816 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8817 } 8818 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) { 8819 fp->rf_access_write--; 8820 fflags |= FWRITE; 8821 if (fp->rf_access_write == 0) 8822 fp->rf_share_access &= 8823 ~OPEN4_SHARE_ACCESS_WRITE; 8824 } 8825 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) { 8826 fp->rf_deny_read--; 8827 if (fp->rf_deny_read == 0) 8828 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8829 } 8830 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) { 8831 fp->rf_deny_write--; 8832 if (fp->rf_deny_write == 0) 8833 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8834 } 8835 8836 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL); 8837 8838 rfs4_dbe_unlock(fp->rf_dbe); 8839 8840 sp->rs_open_access = 0; 8841 sp->rs_open_deny = 0; 8842 } 8843 } 8844 8845 /* 8846 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure. 8847 */ 8848 static nfsstat4 8849 lock_denied(LOCK4denied *dp, struct flock64 *flk) 8850 { 8851 rfs4_lockowner_t *lo; 8852 rfs4_client_t *cp; 8853 uint32_t len; 8854 8855 lo = rfs4_findlockowner_by_pid(flk->l_pid); 8856 if (lo != NULL) { 8857 cp = lo->rl_client; 8858 if (rfs4_lease_expired(cp)) { 8859 rfs4_lockowner_rele(lo); 8860 rfs4_dbe_hold(cp->rc_dbe); 8861 rfs4_client_close(cp); 8862 return (NFS4ERR_EXPIRED); 8863 } 8864 dp->owner.clientid = lo->rl_owner.clientid; 8865 len = lo->rl_owner.owner_len; 8866 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8867 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len); 8868 dp->owner.owner_len = len; 8869 rfs4_lockowner_rele(lo); 8870 goto finish; 8871 } 8872 8873 /* 8874 * Its not a NFS4 lock. We take advantage that the upper 32 bits 8875 * of the client id contain the boot time for a NFS4 lock. So we 8876 * fabricate and identity by setting clientid to the sysid, and 8877 * the lock owner to the pid. 8878 */ 8879 dp->owner.clientid = flk->l_sysid; 8880 len = sizeof (pid_t); 8881 dp->owner.owner_len = len; 8882 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8883 bcopy(&flk->l_pid, dp->owner.owner_val, len); 8884 finish: 8885 dp->offset = flk->l_start; 8886 dp->length = flk->l_len; 8887 8888 if (flk->l_type == F_RDLCK) 8889 dp->locktype = READ_LT; 8890 else if (flk->l_type == F_WRLCK) 8891 dp->locktype = WRITE_LT; 8892 else 8893 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */ 8894 8895 return (NFS4_OK); 8896 } 8897 8898 /* 8899 * The NFSv4.0 LOCK operation does not support the blocking lock (at the 8900 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a 8901 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared 8902 * for that (obviously); they are sending the LOCK requests with some delays 8903 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the 8904 * locking and delay implementation at the client side. 8905 * 8906 * To make the life of the clients easier, the NFSv4.0 server tries to do some 8907 * fast retries on its own (the for loop below) in a hope the lock will be 8908 * available soon. And if not, the client won't need to resend the LOCK 8909 * requests so fast to check the lock availability. This basically saves some 8910 * network traffic and tries to make sure the client gets the lock ASAP. 8911 */ 8912 static int 8913 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred) 8914 { 8915 int error; 8916 struct flock64 flk; 8917 int i; 8918 clock_t delaytime; 8919 int cmd; 8920 int spin_cnt = 0; 8921 8922 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK; 8923 retry: 8924 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 8925 8926 for (i = 0; i < rfs4_maxlock_tries; i++) { 8927 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock); 8928 error = VOP_FRLOCK(vp, cmd, 8929 flock, flag, (u_offset_t)0, NULL, cred, NULL); 8930 8931 if (error != EAGAIN && error != EACCES) 8932 break; 8933 8934 if (i < rfs4_maxlock_tries - 1) { 8935 delay(delaytime); 8936 delaytime *= 2; 8937 } 8938 } 8939 8940 if (error == EAGAIN || error == EACCES) { 8941 /* Get the owner of the lock */ 8942 flk = *flock; 8943 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk); 8944 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred, 8945 NULL) == 0) { 8946 /* 8947 * There's a race inherent in the current VOP_FRLOCK 8948 * design where: 8949 * a: "other guy" takes a lock that conflicts with a 8950 * lock we want 8951 * b: we attempt to take our lock (non-blocking) and 8952 * the attempt fails. 8953 * c: "other guy" releases the conflicting lock 8954 * d: we ask what lock conflicts with the lock we want, 8955 * getting F_UNLCK (no lock blocks us) 8956 * 8957 * If we retry the non-blocking lock attempt in this 8958 * case (restart at step 'b') there's some possibility 8959 * that many such attempts might fail. However a test 8960 * designed to actually provoke this race shows that 8961 * the vast majority of cases require no retry, and 8962 * only a few took as many as three retries. Here's 8963 * the test outcome: 8964 * 8965 * number of retries how many times we needed 8966 * that many retries 8967 * 0 79461 8968 * 1 862 8969 * 2 49 8970 * 3 5 8971 * 8972 * Given those empirical results, we arbitrarily limit 8973 * the retry count to ten. 8974 * 8975 * If we actually make to ten retries and give up, 8976 * nothing catastrophic happens, but we're unable to 8977 * return the information about the conflicting lock to 8978 * the NFS client. That's an acceptable trade off vs. 8979 * letting this retry loop run forever. 8980 */ 8981 if (flk.l_type == F_UNLCK) { 8982 if (spin_cnt++ < 10) { 8983 /* No longer locked, retry */ 8984 goto retry; 8985 } 8986 } else { 8987 *flock = flk; 8988 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)", 8989 F_GETLK, &flk); 8990 } 8991 } 8992 } 8993 8994 return (error); 8995 } 8996 8997 /*ARGSUSED*/ 8998 static nfsstat4 8999 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype, 9000 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop) 9001 { 9002 nfsstat4 status; 9003 rfs4_lockowner_t *lo = lsp->rls_locker; 9004 rfs4_state_t *sp = lsp->rls_state; 9005 struct flock64 flock; 9006 int16_t ltype; 9007 int flag; 9008 int error; 9009 sysid_t sysid; 9010 LOCK4res *lres; 9011 vnode_t *vp; 9012 9013 if (rfs4_lease_expired(lo->rl_client)) { 9014 return (NFS4ERR_EXPIRED); 9015 } 9016 9017 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 9018 return (status); 9019 9020 /* Check for zero length. To lock to end of file use all ones for V4 */ 9021 if (length == 0) 9022 return (NFS4ERR_INVAL); 9023 else if (length == (length4)(~0)) 9024 length = 0; /* Posix to end of file */ 9025 9026 retry: 9027 rfs4_dbe_lock(sp->rs_dbe); 9028 if (sp->rs_closed == TRUE) { 9029 rfs4_dbe_unlock(sp->rs_dbe); 9030 return (NFS4ERR_OLD_STATEID); 9031 } 9032 9033 if (resop->resop != OP_LOCKU) { 9034 switch (locktype) { 9035 case READ_LT: 9036 case READW_LT: 9037 if ((sp->rs_share_access 9038 & OPEN4_SHARE_ACCESS_READ) == 0) { 9039 rfs4_dbe_unlock(sp->rs_dbe); 9040 9041 return (NFS4ERR_OPENMODE); 9042 } 9043 ltype = F_RDLCK; 9044 break; 9045 case WRITE_LT: 9046 case WRITEW_LT: 9047 if ((sp->rs_share_access 9048 & OPEN4_SHARE_ACCESS_WRITE) == 0) { 9049 rfs4_dbe_unlock(sp->rs_dbe); 9050 9051 return (NFS4ERR_OPENMODE); 9052 } 9053 ltype = F_WRLCK; 9054 break; 9055 } 9056 } else 9057 ltype = F_UNLCK; 9058 9059 flock.l_type = ltype; 9060 flock.l_whence = 0; /* SEEK_SET */ 9061 flock.l_start = offset; 9062 flock.l_len = length; 9063 flock.l_sysid = sysid; 9064 flock.l_pid = lsp->rls_locker->rl_pid; 9065 9066 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9067 if (flock.l_len < 0 || flock.l_start < 0) { 9068 rfs4_dbe_unlock(sp->rs_dbe); 9069 return (NFS4ERR_INVAL); 9070 } 9071 9072 /* 9073 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and 9074 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE. 9075 */ 9076 flag = (int)sp->rs_share_access | F_REMOTELOCK; 9077 9078 vp = sp->rs_finfo->rf_vp; 9079 VN_HOLD(vp); 9080 9081 /* 9082 * We need to unlock sp before we call the underlying filesystem to 9083 * acquire the file lock. 9084 */ 9085 rfs4_dbe_unlock(sp->rs_dbe); 9086 9087 error = setlock(vp, &flock, flag, cred); 9088 9089 /* 9090 * Make sure the file is still open. In a case the file was closed in 9091 * the meantime, clean the lock we acquired using the setlock() call 9092 * above, and return the appropriate error. 9093 */ 9094 rfs4_dbe_lock(sp->rs_dbe); 9095 if (sp->rs_closed == TRUE) { 9096 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid); 9097 rfs4_dbe_unlock(sp->rs_dbe); 9098 9099 VN_RELE(vp); 9100 9101 return (NFS4ERR_OLD_STATEID); 9102 } 9103 rfs4_dbe_unlock(sp->rs_dbe); 9104 9105 VN_RELE(vp); 9106 9107 if (error == 0) { 9108 rfs4_dbe_lock(lsp->rls_dbe); 9109 next_stateid(&lsp->rls_lockid); 9110 rfs4_dbe_unlock(lsp->rls_dbe); 9111 } 9112 9113 /* 9114 * N.B. We map error values to nfsv4 errors. This is differrent 9115 * than puterrno4 routine. 9116 */ 9117 switch (error) { 9118 case 0: 9119 status = NFS4_OK; 9120 break; 9121 case EAGAIN: 9122 case EACCES: /* Old value */ 9123 /* Can only get here if op is OP_LOCK */ 9124 ASSERT(resop->resop == OP_LOCK); 9125 lres = &resop->nfs_resop4_u.oplock; 9126 status = NFS4ERR_DENIED; 9127 if (lock_denied(&lres->LOCK4res_u.denied, &flock) 9128 == NFS4ERR_EXPIRED) 9129 goto retry; 9130 break; 9131 case ENOLCK: 9132 status = NFS4ERR_DELAY; 9133 break; 9134 case EOVERFLOW: 9135 status = NFS4ERR_INVAL; 9136 break; 9137 case EINVAL: 9138 status = NFS4ERR_NOTSUPP; 9139 break; 9140 default: 9141 status = NFS4ERR_SERVERFAULT; 9142 break; 9143 } 9144 9145 return (status); 9146 } 9147 9148 /*ARGSUSED*/ 9149 void 9150 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop, 9151 struct svc_req *req, struct compound_state *cs) 9152 { 9153 LOCK4args *args = &argop->nfs_argop4_u.oplock; 9154 LOCK4res *resp = &resop->nfs_resop4_u.oplock; 9155 nfsstat4 status; 9156 stateid4 *stateid; 9157 rfs4_lockowner_t *lo; 9158 rfs4_client_t *cp; 9159 rfs4_state_t *sp = NULL; 9160 rfs4_lo_state_t *lsp = NULL; 9161 bool_t ls_sw_held = FALSE; 9162 bool_t create = TRUE; 9163 bool_t lcreate = TRUE; 9164 bool_t dup_lock = FALSE; 9165 int rc; 9166 9167 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs, 9168 LOCK4args *, args); 9169 9170 if (cs->vp == NULL) { 9171 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9172 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9173 cs, LOCK4res *, resp); 9174 return; 9175 } 9176 9177 if (args->locker.new_lock_owner) { 9178 /* Create a new lockowner for this instance */ 9179 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner; 9180 9181 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner")); 9182 9183 stateid = &olo->open_stateid; 9184 get_stateid4(cs, stateid); 9185 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID); 9186 if (status != NFS4_OK) { 9187 NFS4_DEBUG(rfs4_debug, 9188 (CE_NOTE, "Get state failed in lock %d", status)); 9189 *cs->statusp = resp->status = status; 9190 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9191 cs, LOCK4res *, resp); 9192 return; 9193 } 9194 9195 /* Ensure specified filehandle matches */ 9196 if (cs->vp != sp->rs_finfo->rf_vp) { 9197 rfs4_state_rele(sp); 9198 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9199 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9200 cs, LOCK4res *, resp); 9201 return; 9202 } 9203 9204 /* hold off other access to open_owner while we tinker */ 9205 rfs4_sw_enter(&sp->rs_owner->ro_sw); 9206 9207 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) { 9208 case NFS4_CHECK_STATEID_OLD: 9209 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9210 goto end; 9211 case NFS4_CHECK_STATEID_BAD: 9212 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9213 goto end; 9214 case NFS4_CHECK_STATEID_EXPIRED: 9215 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9216 goto end; 9217 case NFS4_CHECK_STATEID_UNCONFIRMED: 9218 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9219 goto end; 9220 case NFS4_CHECK_STATEID_CLOSED: 9221 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9222 goto end; 9223 case NFS4_CHECK_STATEID_OKAY: 9224 if (rfs4_has_session(cs)) 9225 break; 9226 /* FALLTHROUGH */ 9227 case NFS4_CHECK_STATEID_REPLAY: 9228 ASSERT(!rfs4_has_session(cs)); 9229 9230 switch (rfs4_check_olo_seqid(olo->open_seqid, 9231 sp->rs_owner, resop)) { 9232 case NFS4_CHKSEQ_OKAY: 9233 if (rc == NFS4_CHECK_STATEID_OKAY) 9234 break; 9235 /* 9236 * This is replayed stateid; if seqid 9237 * matches next expected, then client 9238 * is using wrong seqid. 9239 */ 9240 /* FALLTHROUGH */ 9241 case NFS4_CHKSEQ_BAD: 9242 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9243 goto end; 9244 case NFS4_CHKSEQ_REPLAY: 9245 /* This is a duplicate LOCK request */ 9246 dup_lock = TRUE; 9247 9248 /* 9249 * For a duplicate we do not want to 9250 * create a new lockowner as it should 9251 * already exist. 9252 * Turn off the lockowner create flag. 9253 */ 9254 lcreate = FALSE; 9255 } 9256 break; 9257 } 9258 9259 /* 9260 * See RFC 8881 18.10.3. MUST be ignored by the server: 9261 * The clientid field of the lock_owner field of the 9262 * open_owner field (locker.open_owner.lock_owner.clientid). 9263 */ 9264 if (rfs4_has_session(cs)) 9265 olo->lock_owner.clientid = cs->client->rc_clientid; 9266 9267 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate); 9268 if (lo == NULL) { 9269 NFS4_DEBUG(rfs4_debug, 9270 (CE_NOTE, "rfs4_op_lock: no lock owner")); 9271 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 9272 goto end; 9273 } 9274 9275 lsp = rfs4_findlo_state_by_owner(lo, sp, &create); 9276 if (lsp == NULL) { 9277 rfs4_update_lease(sp->rs_owner->ro_client); 9278 /* 9279 * Only update theh open_seqid if this is not 9280 * a duplicate request 9281 */ 9282 if (dup_lock == FALSE) { 9283 rfs4_update_open_sequence(sp->rs_owner); 9284 } 9285 9286 NFS4_DEBUG(rfs4_debug, 9287 (CE_NOTE, "rfs4_op_lock: no state")); 9288 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 9289 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9290 rfs4_lockowner_rele(lo); 9291 goto end; 9292 } 9293 9294 /* 9295 * This is the new_lock_owner branch and the client is 9296 * supposed to be associating a new lock_owner with 9297 * the open file at this point. If we find that a 9298 * lock_owner/state association already exists and a 9299 * successful LOCK request was returned to the client, 9300 * an error is returned to the client since this is 9301 * not appropriate. The client should be using the 9302 * existing lock_owner branch. 9303 */ 9304 if (!rfs4_has_session(cs) && !dup_lock && !create) { 9305 if (lsp->rls_lock_completed == TRUE) { 9306 *cs->statusp = 9307 resp->status = NFS4ERR_BAD_SEQID; 9308 rfs4_lockowner_rele(lo); 9309 goto end; 9310 } 9311 } 9312 9313 rfs4_update_lease(sp->rs_owner->ro_client); 9314 9315 /* 9316 * Only update theh open_seqid if this is not 9317 * a duplicate request 9318 */ 9319 if (dup_lock == FALSE) { 9320 rfs4_update_open_sequence(sp->rs_owner); 9321 } 9322 9323 /* 9324 * If this is a duplicate lock request, just copy the 9325 * previously saved reply and return. 9326 */ 9327 if (dup_lock == TRUE) { 9328 /* verify that lock_seqid's match */ 9329 if (lsp->rls_seqid != olo->lock_seqid) { 9330 NFS4_DEBUG(rfs4_debug, 9331 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad" 9332 "lsp->seqid=%d old->seqid=%d", 9333 lsp->rls_seqid, olo->lock_seqid)); 9334 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9335 } else { 9336 rfs4_copy_reply(resop, &lsp->rls_reply); 9337 /* 9338 * Make sure to copy the just 9339 * retrieved reply status into the 9340 * overall compound status 9341 */ 9342 *cs->statusp = resp->status; 9343 } 9344 rfs4_lockowner_rele(lo); 9345 goto end; 9346 } 9347 9348 rfs4_dbe_lock(lsp->rls_dbe); 9349 9350 /* Make sure to update the lock sequence id */ 9351 lsp->rls_seqid = olo->lock_seqid; 9352 9353 NFS4_DEBUG(rfs4_debug, 9354 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid)); 9355 9356 /* 9357 * This is used to signify the newly created lockowner 9358 * stateid and its sequence number. The checks for 9359 * sequence number and increment don't occur on the 9360 * very first lock request for a lockowner. 9361 */ 9362 lsp->rls_skip_seqid_check = TRUE; 9363 9364 /* hold off other access to lsp while we tinker */ 9365 rfs4_sw_enter(&lsp->rls_sw); 9366 ls_sw_held = TRUE; 9367 9368 rfs4_dbe_unlock(lsp->rls_dbe); 9369 9370 rfs4_lockowner_rele(lo); 9371 } else { 9372 stateid = &args->locker.locker4_u.lock_owner.lock_stateid; 9373 /* get lsp and hold the lock on the underlying file struct */ 9374 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) 9375 != NFS4_OK) { 9376 *cs->statusp = resp->status = status; 9377 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9378 cs, LOCK4res *, resp); 9379 return; 9380 } 9381 create = FALSE; /* We didn't create lsp */ 9382 9383 /* Ensure specified filehandle matches */ 9384 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9385 rfs4_lo_state_rele(lsp, TRUE); 9386 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9387 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9388 cs, LOCK4res *, resp); 9389 return; 9390 } 9391 9392 /* hold off other access to lsp while we tinker */ 9393 rfs4_sw_enter(&lsp->rls_sw); 9394 ls_sw_held = TRUE; 9395 9396 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9397 /* 9398 * The stateid looks like it was okay (expected to be 9399 * the next one) 9400 */ 9401 case NFS4_CHECK_STATEID_OKAY: 9402 if (rfs4_has_session(cs)) 9403 break; 9404 9405 /* 9406 * The sequence id is now checked. Determine 9407 * if this is a replay or if it is in the 9408 * expected (next) sequence. In the case of a 9409 * replay, there are two replay conditions 9410 * that may occur. The first is the normal 9411 * condition where a LOCK is done with a 9412 * NFS4_OK response and the stateid is 9413 * updated. That case is handled below when 9414 * the stateid is identified as a REPLAY. The 9415 * second is the case where an error is 9416 * returned, like NFS4ERR_DENIED, and the 9417 * sequence number is updated but the stateid 9418 * is not updated. This second case is dealt 9419 * with here. So it may seem odd that the 9420 * stateid is okay but the sequence id is a 9421 * replay but it is okay. 9422 */ 9423 switch (rfs4_check_lock_seqid( 9424 args->locker.locker4_u.lock_owner.lock_seqid, 9425 lsp, resop)) { 9426 case NFS4_CHKSEQ_REPLAY: 9427 if (resp->status != NFS4_OK) { 9428 /* 9429 * Here is our replay and need 9430 * to verify that the last 9431 * response was an error. 9432 */ 9433 *cs->statusp = resp->status; 9434 goto end; 9435 } 9436 /* 9437 * This is done since the sequence id 9438 * looked like a replay but it didn't 9439 * pass our check so a BAD_SEQID is 9440 * returned as a result. 9441 */ 9442 /*FALLTHROUGH*/ 9443 case NFS4_CHKSEQ_BAD: 9444 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9445 goto end; 9446 case NFS4_CHKSEQ_OKAY: 9447 /* Everything looks okay move ahead */ 9448 break; 9449 } 9450 break; 9451 case NFS4_CHECK_STATEID_OLD: 9452 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9453 goto end; 9454 case NFS4_CHECK_STATEID_BAD: 9455 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9456 goto end; 9457 case NFS4_CHECK_STATEID_EXPIRED: 9458 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9459 goto end; 9460 case NFS4_CHECK_STATEID_CLOSED: 9461 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9462 goto end; 9463 case NFS4_CHECK_STATEID_REPLAY: 9464 ASSERT(!rfs4_has_session(cs)); 9465 9466 switch (rfs4_check_lock_seqid( 9467 args->locker.locker4_u.lock_owner.lock_seqid, 9468 lsp, resop)) { 9469 case NFS4_CHKSEQ_OKAY: 9470 /* 9471 * This is a replayed stateid; if 9472 * seqid matches the next expected, 9473 * then client is using wrong seqid. 9474 */ 9475 case NFS4_CHKSEQ_BAD: 9476 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9477 goto end; 9478 case NFS4_CHKSEQ_REPLAY: 9479 rfs4_update_lease(lsp->rls_locker->rl_client); 9480 *cs->statusp = status = resp->status; 9481 goto end; 9482 } 9483 break; 9484 default: 9485 ASSERT(FALSE); 9486 break; 9487 } 9488 9489 rfs4_update_lock_sequence(lsp); 9490 rfs4_update_lease(lsp->rls_locker->rl_client); 9491 } 9492 9493 /* 9494 * NFS4 only allows locking on regular files, so 9495 * verify type of object. 9496 */ 9497 if (cs->vp->v_type != VREG) { 9498 if (cs->vp->v_type == VDIR) 9499 status = NFS4ERR_ISDIR; 9500 else 9501 status = NFS4ERR_INVAL; 9502 goto out; 9503 } 9504 9505 cp = lsp->rls_state->rs_owner->ro_client; 9506 9507 if (rfs4_clnt_in_grace(cp) && !args->reclaim) { 9508 status = NFS4ERR_GRACE; 9509 goto out; 9510 } 9511 9512 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) { 9513 status = NFS4ERR_NO_GRACE; 9514 goto out; 9515 } 9516 9517 if (!rfs4_clnt_in_grace(cp) && args->reclaim) { 9518 status = NFS4ERR_NO_GRACE; 9519 goto out; 9520 } 9521 9522 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) 9523 cs->deleg = TRUE; 9524 9525 status = rfs4_do_lock(lsp, args->locktype, 9526 args->offset, args->length, cs->cr, resop); 9527 9528 out: 9529 lsp->rls_skip_seqid_check = FALSE; 9530 9531 *cs->statusp = resp->status = status; 9532 9533 if (status == NFS4_OK) { 9534 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid; 9535 lsp->rls_lock_completed = TRUE; 9536 9537 put_stateid4(cs, &resp->LOCK4res_u.lock_stateid); 9538 } 9539 /* 9540 * Only update the "OPEN" response here if this was a new 9541 * lock_owner 9542 */ 9543 if (sp) 9544 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9545 9546 rfs4_update_lock_resp(lsp, resop); 9547 9548 end: 9549 if (lsp) { 9550 if (ls_sw_held) 9551 rfs4_sw_exit(&lsp->rls_sw); 9552 /* 9553 * If an sp obtained, then the lsp does not represent 9554 * a lock on the file struct. 9555 */ 9556 if (sp != NULL) 9557 rfs4_lo_state_rele(lsp, FALSE); 9558 else 9559 rfs4_lo_state_rele(lsp, TRUE); 9560 } 9561 if (sp) { 9562 rfs4_sw_exit(&sp->rs_owner->ro_sw); 9563 rfs4_state_rele(sp); 9564 } 9565 9566 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs, 9567 LOCK4res *, resp); 9568 } 9569 9570 /* free function for LOCK/LOCKT */ 9571 static void 9572 lock_denied_free(nfs_resop4 *resop) 9573 { 9574 LOCK4denied *dp = NULL; 9575 9576 switch (resop->resop) { 9577 case OP_LOCK: 9578 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED) 9579 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied; 9580 break; 9581 case OP_LOCKT: 9582 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED) 9583 dp = &resop->nfs_resop4_u.oplockt.denied; 9584 break; 9585 default: 9586 break; 9587 } 9588 9589 if (dp) 9590 kmem_free(dp->owner.owner_val, dp->owner.owner_len); 9591 } 9592 9593 /*ARGSUSED*/ 9594 void 9595 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop, 9596 struct svc_req *req, struct compound_state *cs) 9597 { 9598 LOCKU4args *args = &argop->nfs_argop4_u.oplocku; 9599 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku; 9600 nfsstat4 status; 9601 stateid4 *stateid = &args->lock_stateid; 9602 rfs4_lo_state_t *lsp; 9603 9604 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs, 9605 LOCKU4args *, args); 9606 9607 if (cs->vp == NULL) { 9608 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9609 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9610 LOCKU4res *, resp); 9611 return; 9612 } 9613 9614 get_stateid4(cs, stateid); 9615 9616 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) { 9617 *cs->statusp = resp->status = status; 9618 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9619 LOCKU4res *, resp); 9620 return; 9621 } 9622 9623 /* Ensure specified filehandle matches */ 9624 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9625 rfs4_lo_state_rele(lsp, TRUE); 9626 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9627 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9628 LOCKU4res *, resp); 9629 return; 9630 } 9631 9632 /* hold off other access to lsp while we tinker */ 9633 rfs4_sw_enter(&lsp->rls_sw); 9634 9635 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9636 case NFS4_CHECK_STATEID_OKAY: 9637 if (rfs4_has_session(cs)) 9638 break; 9639 9640 if (rfs4_check_lock_seqid(args->seqid, lsp, resop) 9641 != NFS4_CHKSEQ_OKAY) { 9642 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9643 goto end; 9644 } 9645 break; 9646 case NFS4_CHECK_STATEID_OLD: 9647 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9648 goto end; 9649 case NFS4_CHECK_STATEID_BAD: 9650 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9651 goto end; 9652 case NFS4_CHECK_STATEID_EXPIRED: 9653 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9654 goto end; 9655 case NFS4_CHECK_STATEID_CLOSED: 9656 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9657 goto end; 9658 case NFS4_CHECK_STATEID_REPLAY: 9659 ASSERT(!rfs4_has_session(cs)); 9660 9661 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) { 9662 case NFS4_CHKSEQ_OKAY: 9663 /* 9664 * This is a replayed stateid; if 9665 * seqid matches the next expected, 9666 * then client is using wrong seqid. 9667 */ 9668 case NFS4_CHKSEQ_BAD: 9669 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9670 goto end; 9671 case NFS4_CHKSEQ_REPLAY: 9672 rfs4_update_lease(lsp->rls_locker->rl_client); 9673 *cs->statusp = status = resp->status; 9674 goto end; 9675 } 9676 break; 9677 default: 9678 ASSERT(FALSE); 9679 break; 9680 } 9681 9682 rfs4_update_lock_sequence(lsp); 9683 rfs4_update_lease(lsp->rls_locker->rl_client); 9684 9685 /* 9686 * NFS4 only allows locking on regular files, so 9687 * verify type of object. 9688 */ 9689 if (cs->vp->v_type != VREG) { 9690 if (cs->vp->v_type == VDIR) 9691 status = NFS4ERR_ISDIR; 9692 else 9693 status = NFS4ERR_INVAL; 9694 goto out; 9695 } 9696 9697 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) { 9698 status = NFS4ERR_GRACE; 9699 goto out; 9700 } 9701 9702 status = rfs4_do_lock(lsp, args->locktype, 9703 args->offset, args->length, cs->cr, resop); 9704 9705 out: 9706 *cs->statusp = resp->status = status; 9707 9708 if (status == NFS4_OK) 9709 resp->lock_stateid = lsp->rls_lockid.stateid; 9710 9711 rfs4_update_lock_resp(lsp, resop); 9712 9713 end: 9714 rfs4_sw_exit(&lsp->rls_sw); 9715 rfs4_lo_state_rele(lsp, TRUE); 9716 9717 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9718 LOCKU4res *, resp); 9719 } 9720 9721 /* 9722 * LOCKT is a best effort routine, the client can not be guaranteed that 9723 * the status return is still in effect by the time the reply is received. 9724 * They are numerous race conditions in this routine, but we are not required 9725 * and can not be accurate. 9726 */ 9727 /*ARGSUSED*/ 9728 void 9729 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop, 9730 struct svc_req *req, struct compound_state *cs) 9731 { 9732 LOCKT4args *args = &argop->nfs_argop4_u.oplockt; 9733 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt; 9734 rfs4_lockowner_t *lo; 9735 rfs4_client_t *cp; 9736 bool_t create = FALSE; 9737 struct flock64 flk; 9738 int error; 9739 int flag = FREAD | FWRITE; 9740 int ltype; 9741 length4 posix_length; 9742 sysid_t sysid; 9743 pid_t pid; 9744 9745 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs, 9746 LOCKT4args *, args); 9747 9748 if (cs->vp == NULL) { 9749 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9750 goto out; 9751 } 9752 9753 /* 9754 * NFS4 only allows locking on regular files, so 9755 * verify type of object. 9756 */ 9757 if (cs->vp->v_type != VREG) { 9758 if (cs->vp->v_type == VDIR) 9759 *cs->statusp = resp->status = NFS4ERR_ISDIR; 9760 else 9761 *cs->statusp = resp->status = NFS4ERR_INVAL; 9762 goto out; 9763 } 9764 9765 /* 9766 * Check out the clientid to ensure the server knows about it 9767 * so that we correctly inform the client of a server reboot. 9768 */ 9769 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE)) 9770 == NULL) { 9771 *cs->statusp = resp->status = 9772 rfs4_check_clientid(&args->owner.clientid, 0); 9773 goto out; 9774 } 9775 if (rfs4_lease_expired(cp)) { 9776 rfs4_client_close(cp); 9777 /* 9778 * Protocol doesn't allow returning NFS4ERR_STALE as 9779 * other operations do on this check so STALE_CLIENTID 9780 * is returned instead 9781 */ 9782 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 9783 goto out; 9784 } 9785 9786 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) { 9787 *cs->statusp = resp->status = NFS4ERR_GRACE; 9788 rfs4_client_rele(cp); 9789 goto out; 9790 } 9791 rfs4_client_rele(cp); 9792 9793 resp->status = NFS4_OK; 9794 9795 switch (args->locktype) { 9796 case READ_LT: 9797 case READW_LT: 9798 ltype = F_RDLCK; 9799 break; 9800 case WRITE_LT: 9801 case WRITEW_LT: 9802 ltype = F_WRLCK; 9803 break; 9804 } 9805 9806 posix_length = args->length; 9807 /* Check for zero length. To lock to end of file use all ones for V4 */ 9808 if (posix_length == 0) { 9809 *cs->statusp = resp->status = NFS4ERR_INVAL; 9810 goto out; 9811 } else if (posix_length == (length4)(~0)) { 9812 posix_length = 0; /* Posix to end of file */ 9813 } 9814 9815 /* 9816 * See RFC 8881 18.11.3: 9817 * The clientid field of the owner MAY be set to any value 9818 * by the client and MUST be ignored by the server. 9819 */ 9820 if (rfs4_has_session(cs)) 9821 args->owner.clientid = cs->client->rc_clientid; 9822 9823 /* Find or create a lockowner */ 9824 lo = rfs4_findlockowner(&args->owner, &create); 9825 9826 if (lo) { 9827 pid = lo->rl_pid; 9828 if ((resp->status = 9829 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 9830 goto err; 9831 } else { 9832 pid = 0; 9833 sysid = lockt_sysid; 9834 } 9835 retry: 9836 flk.l_type = ltype; 9837 flk.l_whence = 0; /* SEEK_SET */ 9838 flk.l_start = args->offset; 9839 flk.l_len = posix_length; 9840 flk.l_sysid = sysid; 9841 flk.l_pid = pid; 9842 flag |= F_REMOTELOCK; 9843 9844 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk); 9845 9846 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9847 if (flk.l_len < 0 || flk.l_start < 0) { 9848 resp->status = NFS4ERR_INVAL; 9849 goto err; 9850 } 9851 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0, 9852 NULL, cs->cr, NULL); 9853 9854 /* 9855 * N.B. We map error values to nfsv4 errors. This is differrent 9856 * than puterrno4 routine. 9857 */ 9858 switch (error) { 9859 case 0: 9860 if (flk.l_type == F_UNLCK) 9861 resp->status = NFS4_OK; 9862 else { 9863 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED) 9864 goto retry; 9865 resp->status = NFS4ERR_DENIED; 9866 } 9867 break; 9868 case EOVERFLOW: 9869 resp->status = NFS4ERR_INVAL; 9870 break; 9871 case EINVAL: 9872 resp->status = NFS4ERR_NOTSUPP; 9873 break; 9874 default: 9875 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)", 9876 error); 9877 resp->status = NFS4ERR_SERVERFAULT; 9878 break; 9879 } 9880 9881 err: 9882 if (lo) 9883 rfs4_lockowner_rele(lo); 9884 *cs->statusp = resp->status; 9885 out: 9886 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs, 9887 LOCKT4res *, resp); 9888 } 9889 9890 int 9891 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny) 9892 { 9893 int err; 9894 int cmd; 9895 vnode_t *vp; 9896 struct shrlock shr; 9897 struct shr_locowner shr_loco; 9898 int fflags = 0; 9899 9900 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 9901 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 9902 9903 if (sp->rs_closed) 9904 return (NFS4ERR_OLD_STATEID); 9905 9906 vp = sp->rs_finfo->rf_vp; 9907 ASSERT(vp); 9908 9909 shr.s_access = shr.s_deny = 0; 9910 9911 if (access & OPEN4_SHARE_ACCESS_READ) { 9912 fflags |= FREAD; 9913 shr.s_access |= F_RDACC; 9914 } 9915 if (access & OPEN4_SHARE_ACCESS_WRITE) { 9916 fflags |= FWRITE; 9917 shr.s_access |= F_WRACC; 9918 } 9919 ASSERT(shr.s_access); 9920 9921 if (deny & OPEN4_SHARE_DENY_READ) 9922 shr.s_deny |= F_RDDNY; 9923 if (deny & OPEN4_SHARE_DENY_WRITE) 9924 shr.s_deny |= F_WRDNY; 9925 9926 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 9927 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 9928 shr_loco.sl_pid = shr.s_pid; 9929 shr_loco.sl_id = shr.s_sysid; 9930 shr.s_owner = (caddr_t)&shr_loco; 9931 shr.s_own_len = sizeof (shr_loco); 9932 9933 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE; 9934 9935 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL); 9936 if (err != 0) { 9937 if (err == EAGAIN) 9938 err = NFS4ERR_SHARE_DENIED; 9939 else 9940 err = puterrno4(err); 9941 return (err); 9942 } 9943 9944 sp->rs_share_access |= access; 9945 sp->rs_share_deny |= deny; 9946 9947 return (0); 9948 } 9949 9950 int 9951 rfs4_unshare(rfs4_state_t *sp) 9952 { 9953 int err; 9954 struct shrlock shr; 9955 struct shr_locowner shr_loco; 9956 9957 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 9958 9959 if (sp->rs_closed || sp->rs_share_access == 0) 9960 return (0); 9961 9962 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 9963 ASSERT(sp->rs_finfo->rf_vp); 9964 9965 shr.s_access = shr.s_deny = 0; 9966 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 9967 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 9968 shr_loco.sl_pid = shr.s_pid; 9969 shr_loco.sl_id = shr.s_sysid; 9970 shr.s_owner = (caddr_t)&shr_loco; 9971 shr.s_own_len = sizeof (shr_loco); 9972 9973 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(), 9974 NULL); 9975 if (err != 0) { 9976 err = puterrno4(err); 9977 return (err); 9978 } 9979 9980 sp->rs_share_access = 0; 9981 sp->rs_share_deny = 0; 9982 9983 return (0); 9984 9985 } 9986 9987 static int 9988 rdma_setup_read_data4(READ4args *args, READ4res *rok) 9989 { 9990 struct clist *wcl; 9991 count4 count = rok->data_len; 9992 int wlist_len; 9993 9994 wcl = args->wlist; 9995 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 9996 return (FALSE); 9997 } 9998 wcl = args->wlist; 9999 rok->wlist_len = wlist_len; 10000 rok->wlist = wcl; 10001 return (TRUE); 10002 } 10003 10004 /* tunable to disable server referrals */ 10005 int rfs4_no_referrals = 0; 10006 10007 /* 10008 * Find an NFS record in reparse point data. 10009 * Returns 0 for success and <0 or an errno value on failure. 10010 */ 10011 int 10012 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap) 10013 { 10014 int err; 10015 char *stype, *val; 10016 nvlist_t *nvl; 10017 nvpair_t *curr; 10018 10019 if ((nvl = reparse_init()) == NULL) 10020 return (-1); 10021 10022 if ((err = reparse_vnode_parse(vp, nvl)) != 0) { 10023 reparse_free(nvl); 10024 return (err); 10025 } 10026 10027 curr = NULL; 10028 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) { 10029 if ((stype = nvpair_name(curr)) == NULL) { 10030 reparse_free(nvl); 10031 return (-2); 10032 } 10033 if (strncasecmp(stype, "NFS", 3) == 0) 10034 break; 10035 } 10036 10037 if ((curr == NULL) || 10038 (nvpair_value_string(curr, &val))) { 10039 reparse_free(nvl); 10040 return (-3); 10041 } 10042 *nvlp = nvl; 10043 *svcp = stype; 10044 *datap = val; 10045 return (0); 10046 } 10047 10048 int 10049 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr) 10050 { 10051 nvlist_t *nvl; 10052 char *s, *d; 10053 10054 if (rfs4_no_referrals != 0) 10055 return (B_FALSE); 10056 10057 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 10058 return (B_FALSE); 10059 10060 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0) 10061 return (B_FALSE); 10062 10063 reparse_free(nvl); 10064 10065 return (B_TRUE); 10066 } 10067 10068 /* 10069 * There is a user-level copy of this routine in ref_subr.c. 10070 * Changes should be kept in sync. 10071 */ 10072 static int 10073 nfs4_create_components(char *path, component4 *comp4) 10074 { 10075 int slen, plen, ncomp; 10076 char *ori_path, *nxtc, buf[MAXNAMELEN]; 10077 10078 if (path == NULL) 10079 return (0); 10080 10081 plen = strlen(path) + 1; /* include the terminator */ 10082 ori_path = path; 10083 ncomp = 0; 10084 10085 /* count number of components in the path */ 10086 for (nxtc = path; nxtc < ori_path + plen; nxtc++) { 10087 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') { 10088 if ((slen = nxtc - path) == 0) { 10089 path = nxtc + 1; 10090 continue; 10091 } 10092 10093 if (comp4 != NULL) { 10094 bcopy(path, buf, slen); 10095 buf[slen] = '\0'; 10096 (void) str_to_utf8(buf, &comp4[ncomp]); 10097 } 10098 10099 ncomp++; /* 1 valid component */ 10100 path = nxtc + 1; 10101 } 10102 if (*nxtc == '\0' || *nxtc == '\n') 10103 break; 10104 } 10105 10106 return (ncomp); 10107 } 10108 10109 /* 10110 * There is a user-level copy of this routine in ref_subr.c. 10111 * Changes should be kept in sync. 10112 */ 10113 static int 10114 make_pathname4(char *path, pathname4 *pathname) 10115 { 10116 int ncomp; 10117 component4 *comp4; 10118 10119 if (pathname == NULL) 10120 return (0); 10121 10122 if (path == NULL) { 10123 pathname->pathname4_val = NULL; 10124 pathname->pathname4_len = 0; 10125 return (0); 10126 } 10127 10128 /* count number of components to alloc buffer */ 10129 if ((ncomp = nfs4_create_components(path, NULL)) == 0) { 10130 pathname->pathname4_val = NULL; 10131 pathname->pathname4_len = 0; 10132 return (0); 10133 } 10134 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP); 10135 10136 /* copy components into allocated buffer */ 10137 ncomp = nfs4_create_components(path, comp4); 10138 10139 pathname->pathname4_val = comp4; 10140 pathname->pathname4_len = ncomp; 10141 10142 return (ncomp); 10143 } 10144 10145 #define xdr_fs_locations4 xdr_fattr4_fs_locations 10146 10147 fs_locations4 * 10148 fetch_referral(vnode_t *vp, cred_t *cr) 10149 { 10150 nvlist_t *nvl; 10151 char *stype, *sdata; 10152 fs_locations4 *result; 10153 char buf[1024]; 10154 size_t bufsize; 10155 XDR xdr; 10156 int err; 10157 10158 /* 10159 * Check attrs to ensure it's a reparse point 10160 */ 10161 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 10162 return (NULL); 10163 10164 /* 10165 * Look for an NFS record and get the type and data 10166 */ 10167 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0) 10168 return (NULL); 10169 10170 /* 10171 * With the type and data, upcall to get the referral 10172 */ 10173 bufsize = sizeof (buf); 10174 bzero(buf, sizeof (buf)); 10175 err = reparse_kderef((const char *)stype, (const char *)sdata, 10176 buf, &bufsize); 10177 reparse_free(nvl); 10178 10179 DTRACE_PROBE4(nfs4serv__func__referral__upcall, 10180 char *, stype, char *, sdata, char *, buf, int, err); 10181 if (err) { 10182 cmn_err(CE_NOTE, 10183 "reparsed daemon not running: unable to get referral (%d)", 10184 err); 10185 return (NULL); 10186 } 10187 10188 /* 10189 * We get an XDR'ed record back from the kderef call 10190 */ 10191 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE); 10192 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP); 10193 err = xdr_fs_locations4(&xdr, result); 10194 XDR_DESTROY(&xdr); 10195 if (err != TRUE) { 10196 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail, 10197 int, err); 10198 return (NULL); 10199 } 10200 10201 /* 10202 * Look at path to recover fs_root, ignoring the leading '/' 10203 */ 10204 (void) make_pathname4(vp->v_path, &result->fs_root); 10205 10206 return (result); 10207 } 10208 10209 char * 10210 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz) 10211 { 10212 fs_locations4 *fsl; 10213 fs_location4 *fs; 10214 char *server, *path, *symbuf; 10215 static char *prefix = "/net/"; 10216 int i, size, npaths; 10217 uint_t len; 10218 10219 /* Get the referral */ 10220 if ((fsl = fetch_referral(vp, cr)) == NULL) 10221 return (NULL); 10222 10223 /* Deal with only the first location and first server */ 10224 fs = &fsl->locations_val[0]; 10225 server = utf8_to_str(&fs->server_val[0], &len, NULL); 10226 if (server == NULL) { 10227 rfs4_free_fs_locations4(fsl); 10228 kmem_free(fsl, sizeof (fs_locations4)); 10229 return (NULL); 10230 } 10231 10232 /* Figure out size for "/net/" + host + /path/path/path + NULL */ 10233 size = strlen(prefix) + len; 10234 for (i = 0; i < fs->rootpath.pathname4_len; i++) 10235 size += fs->rootpath.pathname4_val[i].utf8string_len + 1; 10236 10237 /* Allocate the symlink buffer and fill it */ 10238 symbuf = kmem_zalloc(size, KM_SLEEP); 10239 (void) strcat(symbuf, prefix); 10240 (void) strcat(symbuf, server); 10241 kmem_free(server, len); 10242 10243 npaths = 0; 10244 for (i = 0; i < fs->rootpath.pathname4_len; i++) { 10245 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL); 10246 if (path == NULL) 10247 continue; 10248 (void) strcat(symbuf, "/"); 10249 (void) strcat(symbuf, path); 10250 npaths++; 10251 kmem_free(path, len); 10252 } 10253 10254 rfs4_free_fs_locations4(fsl); 10255 kmem_free(fsl, sizeof (fs_locations4)); 10256 10257 if (strsz != NULL) 10258 *strsz = size; 10259 return (symbuf); 10260 } 10261 10262 /* 10263 * Check to see if we have a downrev Solaris client, so that we 10264 * can send it a symlink instead of a referral. 10265 */ 10266 int 10267 client_is_downrev(struct svc_req *req) 10268 { 10269 struct sockaddr *ca; 10270 rfs4_clntip_t *ci; 10271 bool_t create = FALSE; 10272 int is_downrev; 10273 10274 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 10275 ASSERT(ca); 10276 ci = rfs4_find_clntip(ca, &create); 10277 if (ci == NULL) 10278 return (0); 10279 is_downrev = ci->ri_no_referrals; 10280 rfs4_dbe_rele(ci->ri_dbe); 10281 return (is_downrev); 10282 } 10283 10284 /* 10285 * Do the main work of handling HA-NFSv4 Resource Group failover on 10286 * Sun Cluster. 10287 * We need to detect whether any RG admin paths have been added or removed, 10288 * and adjust resources accordingly. 10289 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In 10290 * order to scale, the list and array of paths need to be held in more 10291 * suitable data structures. 10292 */ 10293 static void 10294 hanfsv4_failover(nfs4_srv_t *nsrv4) 10295 { 10296 int i, start_grace, numadded_paths = 0; 10297 char **added_paths = NULL; 10298 rfs4_dss_path_t *dss_path; 10299 10300 /* 10301 * Note: currently, dss_pathlist cannot be NULL, since 10302 * it will always include an entry for NFS4_DSS_VAR_DIR. If we 10303 * make the latter dynamically specified too, the following will 10304 * need to be adjusted. 10305 */ 10306 10307 /* 10308 * First, look for removed paths: RGs that have been failed-over 10309 * away from this node. 10310 * Walk the "currently-serving" dss_pathlist and, for each 10311 * path, check if it is on the "passed-in" rfs4_dss_newpaths array 10312 * from nfsd. If not, that RG path has been removed. 10313 * 10314 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed 10315 * any duplicates. 10316 */ 10317 dss_path = nsrv4->dss_pathlist; 10318 do { 10319 int found = 0; 10320 char *path = dss_path->path; 10321 10322 /* used only for non-HA so may not be removed */ 10323 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10324 dss_path = dss_path->next; 10325 continue; 10326 } 10327 10328 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10329 int cmpret; 10330 char *newpath = rfs4_dss_newpaths[i]; 10331 10332 /* 10333 * Since nfsd has sorted rfs4_dss_newpaths for us, 10334 * once the return from strcmp is negative we know 10335 * we've passed the point where "path" should be, 10336 * and can stop searching: "path" has been removed. 10337 */ 10338 cmpret = strcmp(path, newpath); 10339 if (cmpret < 0) 10340 break; 10341 if (cmpret == 0) { 10342 found = 1; 10343 break; 10344 } 10345 } 10346 10347 if (found == 0) { 10348 unsigned index = dss_path->index; 10349 rfs4_servinst_t *sip = dss_path->sip; 10350 rfs4_dss_path_t *path_next = dss_path->next; 10351 10352 /* 10353 * This path has been removed. 10354 * We must clear out the servinst reference to 10355 * it, since it's now owned by another 10356 * node: we should not attempt to touch it. 10357 */ 10358 ASSERT(dss_path == sip->dss_paths[index]); 10359 sip->dss_paths[index] = NULL; 10360 10361 /* remove from "currently-serving" list, and destroy */ 10362 remque(dss_path); 10363 /* allow for NUL */ 10364 kmem_free(dss_path->path, strlen(dss_path->path) + 1); 10365 kmem_free(dss_path, sizeof (rfs4_dss_path_t)); 10366 10367 dss_path = path_next; 10368 } else { 10369 /* path was found; not removed */ 10370 dss_path = dss_path->next; 10371 } 10372 } while (dss_path != nsrv4->dss_pathlist); 10373 10374 /* 10375 * Now, look for added paths: RGs that have been failed-over 10376 * to this node. 10377 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, 10378 * for each path, check if it is on the "currently-serving" 10379 * dss_pathlist. If not, that RG path has been added. 10380 * 10381 * Note: we don't do duplicate detection here; nfsd does that for us. 10382 * 10383 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us 10384 * an upper bound for the size needed for added_paths[numadded_paths]. 10385 */ 10386 10387 /* probably more space than we need, but guaranteed to be enough */ 10388 if (rfs4_dss_numnewpaths > 0) { 10389 size_t sz = rfs4_dss_numnewpaths * sizeof (char *); 10390 added_paths = kmem_zalloc(sz, KM_SLEEP); 10391 } 10392 10393 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ 10394 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10395 int found = 0; 10396 char *newpath = rfs4_dss_newpaths[i]; 10397 10398 dss_path = nsrv4->dss_pathlist; 10399 do { 10400 char *path = dss_path->path; 10401 10402 /* used only for non-HA */ 10403 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10404 dss_path = dss_path->next; 10405 continue; 10406 } 10407 10408 if (strncmp(path, newpath, strlen(path)) == 0) { 10409 found = 1; 10410 break; 10411 } 10412 10413 dss_path = dss_path->next; 10414 } while (dss_path != nsrv4->dss_pathlist); 10415 10416 if (found == 0) { 10417 added_paths[numadded_paths] = newpath; 10418 numadded_paths++; 10419 } 10420 } 10421 10422 /* did we find any added paths? */ 10423 if (numadded_paths > 0) { 10424 10425 /* create a new server instance, and start its grace period */ 10426 start_grace = 1; 10427 /* CSTYLED */ 10428 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths); 10429 10430 /* read in the stable storage state from these paths */ 10431 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths); 10432 10433 /* 10434 * Multiple failovers during a grace period will cause 10435 * clients of the same resource group to be partitioned 10436 * into different server instances, with different 10437 * grace periods. Since clients of the same resource 10438 * group must be subject to the same grace period, 10439 * we need to reset all currently active grace periods. 10440 */ 10441 rfs4_grace_reset_all(nsrv4); 10442 } 10443 10444 if (rfs4_dss_numnewpaths > 0) 10445 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); 10446 } 10447