1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vfs_opreg.h> 38 #include <sys/vnode.h> 39 #include <sys/uio.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/dirent.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/systeminfo.h> 48 #include <sys/flock.h> 49 #include <sys/pathname.h> 50 #include <sys/nbmlock.h> 51 #include <sys/share.h> 52 #include <sys/atomic.h> 53 #include <sys/policy.h> 54 #include <sys/fem.h> 55 #include <sys/sdt.h> 56 #include <sys/ddi.h> 57 #include <sys/zone.h> 58 59 #include <rpc/types.h> 60 #include <rpc/auth.h> 61 #include <rpc/rpcsec_gss.h> 62 #include <rpc/svc.h> 63 64 #include <nfs/nfs.h> 65 #include <nfs/export.h> 66 #include <nfs/nfs_cmd.h> 67 #include <nfs/lm.h> 68 #include <nfs/nfs4.h> 69 70 #include <sys/strsubr.h> 71 #include <sys/strsun.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tndb.h> 79 80 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 81 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 82 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 83 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 84 extern struct svc_ops rdma_svc_ops; 85 /* End of Tunables */ 86 87 static int rdma_setup_read_data4(READ4args *, READ4res *); 88 89 /* 90 * Used to bump the stateid4.seqid value and show changes in the stateid 91 */ 92 #define next_stateid(sp) (++(sp)->bits.chgseq) 93 94 /* 95 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 96 * This is used to return NFS4ERR_TOOSMALL when clients specify 97 * maxcount that isn't large enough to hold the smallest possible 98 * XDR encoded dirent. 99 * 100 * sizeof cookie (8 bytes) + 101 * sizeof name_len (4 bytes) + 102 * sizeof smallest (padded) name (4 bytes) + 103 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 104 * sizeof attrlist4_len (4 bytes) + 105 * sizeof next boolean (4 bytes) 106 * 107 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 108 * the smallest possible entry4 (assumes no attrs requested). 109 * sizeof nfsstat4 (4 bytes) + 110 * sizeof verifier4 (8 bytes) + 111 * sizeof entry4list bool (4 bytes) + 112 * sizeof entry4 (36 bytes) + 113 * sizeof eof bool (4 bytes) 114 * 115 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 116 * VOP_READDIR. Its value is the size of the maximum possible dirent 117 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 118 * required for a given name length. MAXNAMELEN is the maximum 119 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 120 * macros are to allow for . and .. entries -- just a minor tweak to try 121 * and guarantee that buffer we give to VOP_READDIR will be large enough 122 * to hold ., .., and the largest possible solaris dirent64. 123 */ 124 #define RFS4_MINLEN_ENTRY4 36 125 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 126 #define RFS4_MINLEN_RDDIR_BUF \ 127 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 128 129 /* 130 * It would be better to pad to 4 bytes since that's what XDR would do, 131 * but the dirents UFS gives us are already padded to 8, so just take 132 * what we're given. Dircount is only a hint anyway. Currently the 133 * solaris kernel is ASCII only, so there's no point in calling the 134 * UTF8 functions. 135 * 136 * dirent64: named padded to provide 8 byte struct alignment 137 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 138 * 139 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 140 * 141 */ 142 #define DIRENT64_TO_DIRCOUNT(dp) \ 143 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 144 145 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */ 146 147 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 148 149 u_longlong_t nfs4_srv_caller_id; 150 uint_t nfs4_srv_vkey = 0; 151 152 verifier4 Write4verf; 153 verifier4 Readdir4verf; 154 155 void rfs4_init_compound_state(struct compound_state *); 156 157 static void nullfree(caddr_t); 158 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 159 struct compound_state *); 160 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 161 struct compound_state *); 162 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 163 struct compound_state *); 164 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 165 struct compound_state *); 166 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 167 struct compound_state *); 168 static void rfs4_op_create_free(nfs_resop4 *resop); 169 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 170 struct svc_req *, struct compound_state *); 171 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, 172 struct svc_req *, struct compound_state *); 173 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 174 struct compound_state *); 175 static void rfs4_op_getattr_free(nfs_resop4 *); 176 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 177 struct compound_state *); 178 static void rfs4_op_getfh_free(nfs_resop4 *); 179 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 180 struct compound_state *); 181 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 182 struct compound_state *); 183 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 184 struct compound_state *); 185 static void lock_denied_free(nfs_resop4 *); 186 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 187 struct compound_state *); 188 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 189 struct compound_state *); 190 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 struct compound_state *); 192 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 struct compound_state *); 194 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 195 struct svc_req *req, struct compound_state *cs); 196 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 197 struct compound_state *); 198 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 199 struct compound_state *); 200 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, 201 struct svc_req *, struct compound_state *); 202 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 203 struct svc_req *, struct compound_state *); 204 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 205 struct compound_state *); 206 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 207 struct compound_state *); 208 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 209 struct compound_state *); 210 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 211 struct compound_state *); 212 static void rfs4_op_read_free(nfs_resop4 *); 213 static void rfs4_op_readdir_free(nfs_resop4 *resop); 214 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 215 struct compound_state *); 216 static void rfs4_op_readlink_free(nfs_resop4 *); 217 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 218 struct svc_req *, struct compound_state *); 219 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 220 struct compound_state *); 221 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 222 struct compound_state *); 223 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 224 struct compound_state *); 225 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 226 struct compound_state *); 227 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 228 struct compound_state *); 229 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 230 struct compound_state *); 231 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 232 struct compound_state *); 233 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 234 struct compound_state *); 235 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, 236 struct svc_req *, struct compound_state *); 237 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, 238 struct svc_req *req, struct compound_state *); 239 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 240 struct compound_state *); 241 static void rfs4_op_secinfo_free(nfs_resop4 *); 242 243 static nfsstat4 check_open_access(uint32_t, 244 struct compound_state *, struct svc_req *); 245 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 246 static int vop_shrlock(vnode_t *, int, struct shrlock *, int); 247 static int rfs4_shrlock(rfs4_state_t *, int); 248 static int rfs4_share(rfs4_state_t *); 249 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *); 250 251 /* 252 * translation table for attrs 253 */ 254 struct nfs4_ntov_table { 255 union nfs4_attr_u *na; 256 uint8_t amap[NFS4_MAXNUM_ATTRS]; 257 int attrcnt; 258 bool_t vfsstat; 259 }; 260 261 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); 262 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 263 struct nfs4_svgetit_arg *sargp); 264 265 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, 266 struct compound_state *cs, struct nfs4_svgetit_arg *sargp, 267 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); 268 269 fem_t *deleg_rdops; 270 fem_t *deleg_wrops; 271 272 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */ 273 kmutex_t rfs4_servinst_lock; /* protects linked list */ 274 int rfs4_seen_first_compound; /* set first time we see one */ 275 276 /* 277 * NFS4 op dispatch table 278 */ 279 280 struct rfsv4disp { 281 void (*dis_proc)(); /* proc to call */ 282 void (*dis_resfree)(); /* frees space allocated by proc */ 283 int dis_flags; /* RPC_IDEMPOTENT, etc... */ 284 }; 285 286 static struct rfsv4disp rfsv4disptab[] = { 287 /* 288 * NFS VERSION 4 289 */ 290 291 /* RFS_NULL = 0 */ 292 {rfs4_op_illegal, nullfree, 0}, 293 294 /* UNUSED = 1 */ 295 {rfs4_op_illegal, nullfree, 0}, 296 297 /* UNUSED = 2 */ 298 {rfs4_op_illegal, nullfree, 0}, 299 300 /* OP_ACCESS = 3 */ 301 {rfs4_op_access, nullfree, RPC_IDEMPOTENT}, 302 303 /* OP_CLOSE = 4 */ 304 {rfs4_op_close, nullfree, 0}, 305 306 /* OP_COMMIT = 5 */ 307 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT}, 308 309 /* OP_CREATE = 6 */ 310 {rfs4_op_create, nullfree, 0}, 311 312 /* OP_DELEGPURGE = 7 */ 313 {rfs4_op_delegpurge, nullfree, 0}, 314 315 /* OP_DELEGRETURN = 8 */ 316 {rfs4_op_delegreturn, nullfree, 0}, 317 318 /* OP_GETATTR = 9 */ 319 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT}, 320 321 /* OP_GETFH = 10 */ 322 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL}, 323 324 /* OP_LINK = 11 */ 325 {rfs4_op_link, nullfree, 0}, 326 327 /* OP_LOCK = 12 */ 328 {rfs4_op_lock, lock_denied_free, 0}, 329 330 /* OP_LOCKT = 13 */ 331 {rfs4_op_lockt, lock_denied_free, 0}, 332 333 /* OP_LOCKU = 14 */ 334 {rfs4_op_locku, nullfree, 0}, 335 336 /* OP_LOOKUP = 15 */ 337 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)}, 338 339 /* OP_LOOKUPP = 16 */ 340 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)}, 341 342 /* OP_NVERIFY = 17 */ 343 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT}, 344 345 /* OP_OPEN = 18 */ 346 {rfs4_op_open, rfs4_free_reply, 0}, 347 348 /* OP_OPENATTR = 19 */ 349 {rfs4_op_openattr, nullfree, 0}, 350 351 /* OP_OPEN_CONFIRM = 20 */ 352 {rfs4_op_open_confirm, nullfree, 0}, 353 354 /* OP_OPEN_DOWNGRADE = 21 */ 355 {rfs4_op_open_downgrade, nullfree, 0}, 356 357 /* OP_OPEN_PUTFH = 22 */ 358 {rfs4_op_putfh, nullfree, RPC_ALL}, 359 360 /* OP_PUTPUBFH = 23 */ 361 {rfs4_op_putpubfh, nullfree, RPC_ALL}, 362 363 /* OP_PUTROOTFH = 24 */ 364 {rfs4_op_putrootfh, nullfree, RPC_ALL}, 365 366 /* OP_READ = 25 */ 367 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT}, 368 369 /* OP_READDIR = 26 */ 370 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT}, 371 372 /* OP_READLINK = 27 */ 373 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT}, 374 375 /* OP_REMOVE = 28 */ 376 {rfs4_op_remove, nullfree, 0}, 377 378 /* OP_RENAME = 29 */ 379 {rfs4_op_rename, nullfree, 0}, 380 381 /* OP_RENEW = 30 */ 382 {rfs4_op_renew, nullfree, 0}, 383 384 /* OP_RESTOREFH = 31 */ 385 {rfs4_op_restorefh, nullfree, RPC_ALL}, 386 387 /* OP_SAVEFH = 32 */ 388 {rfs4_op_savefh, nullfree, RPC_ALL}, 389 390 /* OP_SECINFO = 33 */ 391 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0}, 392 393 /* OP_SETATTR = 34 */ 394 {rfs4_op_setattr, nullfree, 0}, 395 396 /* OP_SETCLIENTID = 35 */ 397 {rfs4_op_setclientid, nullfree, 0}, 398 399 /* OP_SETCLIENTID_CONFIRM = 36 */ 400 {rfs4_op_setclientid_confirm, nullfree, 0}, 401 402 /* OP_VERIFY = 37 */ 403 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT}, 404 405 /* OP_WRITE = 38 */ 406 {rfs4_op_write, nullfree, 0}, 407 408 /* OP_RELEASE_LOCKOWNER = 39 */ 409 {rfs4_op_release_lockowner, nullfree, 0}, 410 }; 411 412 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]); 413 414 #define OP_ILLEGAL_IDX (rfsv4disp_cnt) 415 416 #ifdef DEBUG 417 418 int rfs4_fillone_debug = 0; 419 int rfs4_shrlock_debug = 0; 420 int rfs4_no_stub_access = 1; 421 int rfs4_rddir_debug = 0; 422 423 static char *rfs4_op_string[] = { 424 "rfs4_op_null", 425 "rfs4_op_1 unused", 426 "rfs4_op_2 unused", 427 "rfs4_op_access", 428 "rfs4_op_close", 429 "rfs4_op_commit", 430 "rfs4_op_create", 431 "rfs4_op_delegpurge", 432 "rfs4_op_delegreturn", 433 "rfs4_op_getattr", 434 "rfs4_op_getfh", 435 "rfs4_op_link", 436 "rfs4_op_lock", 437 "rfs4_op_lockt", 438 "rfs4_op_locku", 439 "rfs4_op_lookup", 440 "rfs4_op_lookupp", 441 "rfs4_op_nverify", 442 "rfs4_op_open", 443 "rfs4_op_openattr", 444 "rfs4_op_open_confirm", 445 "rfs4_op_open_downgrade", 446 "rfs4_op_putfh", 447 "rfs4_op_putpubfh", 448 "rfs4_op_putrootfh", 449 "rfs4_op_read", 450 "rfs4_op_readdir", 451 "rfs4_op_readlink", 452 "rfs4_op_remove", 453 "rfs4_op_rename", 454 "rfs4_op_renew", 455 "rfs4_op_restorefh", 456 "rfs4_op_savefh", 457 "rfs4_op_secinfo", 458 "rfs4_op_setattr", 459 "rfs4_op_setclientid", 460 "rfs4_op_setclient_confirm", 461 "rfs4_op_verify", 462 "rfs4_op_write", 463 "rfs4_op_release_lockowner", 464 "rfs4_op_illegal" 465 }; 466 #endif 467 468 void rfs4_ss_chkclid(rfs4_client_t *); 469 470 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 471 472 #ifdef nextdp 473 #undef nextdp 474 #endif 475 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 476 477 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = { 478 VOPNAME_OPEN, { .femop_open = deleg_rd_open }, 479 VOPNAME_WRITE, { .femop_write = deleg_rd_write }, 480 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr }, 481 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock }, 482 VOPNAME_SPACE, { .femop_space = deleg_rd_space }, 483 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr }, 484 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent }, 485 NULL, NULL 486 }; 487 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { 488 VOPNAME_OPEN, { .femop_open = deleg_wr_open }, 489 VOPNAME_READ, { .femop_read = deleg_wr_read }, 490 VOPNAME_WRITE, { .femop_write = deleg_wr_write }, 491 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr }, 492 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock }, 493 VOPNAME_SPACE, { .femop_space = deleg_wr_space }, 494 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr }, 495 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent }, 496 NULL, NULL 497 }; 498 499 int 500 rfs4_srvrinit(void) 501 { 502 timespec32_t verf; 503 int error; 504 extern void rfs4_attr_init(); 505 extern krwlock_t rfs4_deleg_policy_lock; 506 507 /* 508 * The following algorithm attempts to find a unique verifier 509 * to be used as the write verifier returned from the server 510 * to the client. It is important that this verifier change 511 * whenever the server reboots. Of secondary importance, it 512 * is important for the verifier to be unique between two 513 * different servers. 514 * 515 * Thus, an attempt is made to use the system hostid and the 516 * current time in seconds when the nfssrv kernel module is 517 * loaded. It is assumed that an NFS server will not be able 518 * to boot and then to reboot in less than a second. If the 519 * hostid has not been set, then the current high resolution 520 * time is used. This will ensure different verifiers each 521 * time the server reboots and minimize the chances that two 522 * different servers will have the same verifier. 523 * XXX - this is broken on LP64 kernels. 524 */ 525 verf.tv_sec = (time_t)zone_get_hostid(NULL); 526 if (verf.tv_sec != 0) { 527 verf.tv_nsec = gethrestime_sec(); 528 } else { 529 timespec_t tverf; 530 531 gethrestime(&tverf); 532 verf.tv_sec = (time_t)tverf.tv_sec; 533 verf.tv_nsec = tverf.tv_nsec; 534 } 535 536 Write4verf = *(uint64_t *)&verf; 537 538 rfs4_attr_init(); 539 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL); 540 541 /* Used to manage create/destroy of server state */ 542 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL); 543 544 /* Used to manage access to server instance linked list */ 545 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL); 546 547 /* Used to manage access to rfs4_deleg_policy */ 548 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL); 549 550 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops); 551 if (error != 0) { 552 rfs4_disable_delegation(); 553 } else { 554 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, 555 &deleg_wrops); 556 if (error != 0) { 557 rfs4_disable_delegation(); 558 fem_free(deleg_rdops); 559 } 560 } 561 562 nfs4_srv_caller_id = fs_new_caller_id(); 563 564 lockt_sysid = lm_alloc_sysidt(); 565 566 vsd_create(&nfs4_srv_vkey, NULL); 567 568 return (0); 569 } 570 571 void 572 rfs4_srvrfini(void) 573 { 574 extern krwlock_t rfs4_deleg_policy_lock; 575 576 if (lockt_sysid != LM_NOSYSID) { 577 lm_free_sysidt(lockt_sysid); 578 lockt_sysid = LM_NOSYSID; 579 } 580 581 mutex_destroy(&rfs4_deleg_lock); 582 mutex_destroy(&rfs4_state_lock); 583 rw_destroy(&rfs4_deleg_policy_lock); 584 585 fem_free(deleg_rdops); 586 fem_free(deleg_wrops); 587 } 588 589 void 590 rfs4_init_compound_state(struct compound_state *cs) 591 { 592 bzero(cs, sizeof (*cs)); 593 cs->cont = TRUE; 594 cs->access = CS_ACCESS_DENIED; 595 cs->deleg = FALSE; 596 cs->mandlock = FALSE; 597 cs->fh.nfs_fh4_val = cs->fhbuf; 598 } 599 600 void 601 rfs4_grace_start(rfs4_servinst_t *sip) 602 { 603 rw_enter(&sip->rwlock, RW_WRITER); 604 sip->start_time = (time_t)TICK_TO_SEC(lbolt); 605 sip->grace_period = rfs4_grace_period; 606 rw_exit(&sip->rwlock); 607 } 608 609 /* 610 * returns true if the instance's grace period has never been started 611 */ 612 int 613 rfs4_servinst_grace_new(rfs4_servinst_t *sip) 614 { 615 time_t start_time; 616 617 rw_enter(&sip->rwlock, RW_READER); 618 start_time = sip->start_time; 619 rw_exit(&sip->rwlock); 620 621 return (start_time == 0); 622 } 623 624 /* 625 * Indicates if server instance is within the 626 * grace period. 627 */ 628 int 629 rfs4_servinst_in_grace(rfs4_servinst_t *sip) 630 { 631 time_t grace_expiry; 632 633 rw_enter(&sip->rwlock, RW_READER); 634 grace_expiry = sip->start_time + sip->grace_period; 635 rw_exit(&sip->rwlock); 636 637 return (((time_t)TICK_TO_SEC(lbolt)) < grace_expiry); 638 } 639 640 int 641 rfs4_clnt_in_grace(rfs4_client_t *cp) 642 { 643 ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0); 644 645 return (rfs4_servinst_in_grace(cp->server_instance)); 646 } 647 648 /* 649 * reset all currently active grace periods 650 */ 651 void 652 rfs4_grace_reset_all(void) 653 { 654 rfs4_servinst_t *sip; 655 656 mutex_enter(&rfs4_servinst_lock); 657 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) 658 if (rfs4_servinst_in_grace(sip)) 659 rfs4_grace_start(sip); 660 mutex_exit(&rfs4_servinst_lock); 661 } 662 663 /* 664 * start any new instances' grace periods 665 */ 666 void 667 rfs4_grace_start_new(void) 668 { 669 rfs4_servinst_t *sip; 670 671 mutex_enter(&rfs4_servinst_lock); 672 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) 673 if (rfs4_servinst_grace_new(sip)) 674 rfs4_grace_start(sip); 675 mutex_exit(&rfs4_servinst_lock); 676 } 677 678 static rfs4_dss_path_t * 679 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) 680 { 681 size_t len; 682 rfs4_dss_path_t *dss_path; 683 684 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP); 685 686 /* 687 * Take a copy of the string, since the original may be overwritten. 688 * Sadly, no strdup() in the kernel. 689 */ 690 /* allow for NUL */ 691 len = strlen(path) + 1; 692 dss_path->path = kmem_alloc(len, KM_SLEEP); 693 (void) strlcpy(dss_path->path, path, len); 694 695 /* associate with servinst */ 696 dss_path->sip = sip; 697 dss_path->index = index; 698 699 /* 700 * Add to list of served paths. 701 * No locking required, as we're only ever called at startup. 702 */ 703 if (rfs4_dss_pathlist == NULL) { 704 /* this is the first dss_path_t */ 705 706 /* needed for insque/remque */ 707 dss_path->next = dss_path->prev = dss_path; 708 709 rfs4_dss_pathlist = dss_path; 710 } else { 711 insque(dss_path, rfs4_dss_pathlist); 712 } 713 714 return (dss_path); 715 } 716 717 /* 718 * Create a new server instance, and make it the currently active instance. 719 * Note that starting the grace period too early will reduce the clients' 720 * recovery window. 721 */ 722 void 723 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) 724 { 725 unsigned i; 726 rfs4_servinst_t *sip; 727 rfs4_oldstate_t *oldstate; 728 729 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP); 730 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL); 731 732 sip->start_time = (time_t)0; 733 sip->grace_period = (time_t)0; 734 sip->next = NULL; 735 sip->prev = NULL; 736 737 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL); 738 /* 739 * This initial dummy entry is required to setup for insque/remque. 740 * It must be skipped over whenever the list is traversed. 741 */ 742 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 743 /* insque/remque require initial list entry to be self-terminated */ 744 oldstate->next = oldstate; 745 oldstate->prev = oldstate; 746 sip->oldstate = oldstate; 747 748 749 sip->dss_npaths = dss_npaths; 750 sip->dss_paths = kmem_alloc(dss_npaths * 751 sizeof (rfs4_dss_path_t *), KM_SLEEP); 752 753 for (i = 0; i < dss_npaths; i++) { 754 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i); 755 } 756 757 mutex_enter(&rfs4_servinst_lock); 758 if (rfs4_cur_servinst != NULL) { 759 /* add to linked list */ 760 sip->prev = rfs4_cur_servinst; 761 rfs4_cur_servinst->next = sip; 762 } 763 if (start_grace) 764 rfs4_grace_start(sip); 765 /* make the new instance "current" */ 766 rfs4_cur_servinst = sip; 767 768 mutex_exit(&rfs4_servinst_lock); 769 } 770 771 /* 772 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy 773 * all instances directly. 774 */ 775 void 776 rfs4_servinst_destroy_all(void) 777 { 778 rfs4_servinst_t *sip, *prev, *current; 779 #ifdef DEBUG 780 int n = 0; 781 #endif 782 783 mutex_enter(&rfs4_servinst_lock); 784 ASSERT(rfs4_cur_servinst != NULL); 785 current = rfs4_cur_servinst; 786 rfs4_cur_servinst = NULL; 787 for (sip = current; sip != NULL; sip = prev) { 788 prev = sip->prev; 789 rw_destroy(&sip->rwlock); 790 if (sip->oldstate) 791 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t)); 792 if (sip->dss_paths) 793 kmem_free(sip->dss_paths, 794 sip->dss_npaths * sizeof (rfs4_dss_path_t *)); 795 kmem_free(sip, sizeof (rfs4_servinst_t)); 796 #ifdef DEBUG 797 n++; 798 #endif 799 } 800 mutex_exit(&rfs4_servinst_lock); 801 } 802 803 /* 804 * Assign the current server instance to a client_t. 805 * Should be called with cp->dbe held. 806 */ 807 void 808 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip) 809 { 810 ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0); 811 812 /* 813 * The lock ensures that if the current instance is in the process 814 * of changing, we will see the new one. 815 */ 816 mutex_enter(&rfs4_servinst_lock); 817 cp->server_instance = sip; 818 mutex_exit(&rfs4_servinst_lock); 819 } 820 821 rfs4_servinst_t * 822 rfs4_servinst(rfs4_client_t *cp) 823 { 824 ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0); 825 826 return (cp->server_instance); 827 } 828 829 /* ARGSUSED */ 830 static void 831 nullfree(caddr_t resop) 832 { 833 } 834 835 /* 836 * This is a fall-through for invalid or not implemented (yet) ops 837 */ 838 /* ARGSUSED */ 839 static void 840 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 841 struct compound_state *cs) 842 { 843 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 844 } 845 846 /* 847 * Check if the security flavor, nfsnum, is in the flavor_list. 848 */ 849 bool_t 850 in_flavor_list(int nfsnum, int *flavor_list, int count) 851 { 852 int i; 853 854 for (i = 0; i < count; i++) { 855 if (nfsnum == flavor_list[i]) 856 return (TRUE); 857 } 858 return (FALSE); 859 } 860 861 /* 862 * Used by rfs4_op_secinfo to get the security information from the 863 * export structure associated with the component. 864 */ 865 /* ARGSUSED */ 866 static nfsstat4 867 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) 868 { 869 int error, different_export = 0; 870 vnode_t *dvp, *vp, *tvp; 871 struct exportinfo *exi = NULL; 872 fid_t fid; 873 uint_t count, i; 874 secinfo4 *resok_val; 875 struct secinfo *secp; 876 seconfig_t *si; 877 bool_t did_traverse; 878 int dotdot, walk; 879 880 dvp = cs->vp; 881 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 882 883 /* 884 * If dotdotting, then need to check whether it's above the 885 * root of a filesystem, or above an export point. 886 */ 887 if (dotdot) { 888 889 /* 890 * If dotdotting at the root of a filesystem, then 891 * need to traverse back to the mounted-on filesystem 892 * and do the dotdot lookup there. 893 */ 894 if (cs->vp->v_flag & VROOT) { 895 896 /* 897 * If at the system root, then can 898 * go up no further. 899 */ 900 if (VN_CMP(dvp, rootdir)) 901 return (puterrno4(ENOENT)); 902 903 /* 904 * Traverse back to the mounted-on filesystem 905 */ 906 dvp = untraverse(cs->vp); 907 908 /* 909 * Set the different_export flag so we remember 910 * to pick up a new exportinfo entry for 911 * this new filesystem. 912 */ 913 different_export = 1; 914 } else { 915 916 /* 917 * If dotdotting above an export point then set 918 * the different_export to get new export info. 919 */ 920 different_export = nfs_exported(cs->exi, cs->vp); 921 } 922 } 923 924 /* 925 * Get the vnode for the component "nm". 926 */ 927 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 928 NULL, NULL, NULL); 929 if (error) 930 return (puterrno4(error)); 931 932 /* 933 * If the vnode is in a pseudo filesystem, or if the security flavor 934 * used in the request is valid but not an explicitly shared flavor, 935 * or the access bit indicates that this is a limited access, 936 * check whether this vnode is visible. 937 */ 938 if (!different_export && 939 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 940 cs->access & CS_ACCESS_LIMITED)) { 941 if (! nfs_visible(cs->exi, vp, &different_export)) { 942 VN_RELE(vp); 943 return (puterrno4(ENOENT)); 944 } 945 } 946 947 /* 948 * If it's a mountpoint, then traverse it. 949 */ 950 if (vn_ismntpt(vp)) { 951 tvp = vp; 952 if ((error = traverse(&tvp)) != 0) { 953 VN_RELE(vp); 954 return (puterrno4(error)); 955 } 956 /* remember that we had to traverse mountpoint */ 957 did_traverse = TRUE; 958 vp = tvp; 959 different_export = 1; 960 } else if (vp->v_vfsp != dvp->v_vfsp) { 961 /* 962 * If vp isn't a mountpoint and the vfs ptrs aren't the same, 963 * then vp is probably an LOFS object. We don't need the 964 * realvp, we just need to know that we might have crossed 965 * a server fs boundary and need to call checkexport4. 966 * (LOFS lookup hides server fs mountpoints, and actually calls 967 * traverse) 968 */ 969 different_export = 1; 970 did_traverse = FALSE; 971 } 972 973 /* 974 * Get the export information for it. 975 */ 976 if (different_export) { 977 978 bzero(&fid, sizeof (fid)); 979 fid.fid_len = MAXFIDSZ; 980 error = vop_fid_pseudo(vp, &fid); 981 if (error) { 982 VN_RELE(vp); 983 return (puterrno4(error)); 984 } 985 986 if (dotdot) 987 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 988 else 989 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 990 991 if (exi == NULL) { 992 if (did_traverse == TRUE) { 993 /* 994 * If this vnode is a mounted-on vnode, 995 * but the mounted-on file system is not 996 * exported, send back the secinfo for 997 * the exported node that the mounted-on 998 * vnode lives in. 999 */ 1000 exi = cs->exi; 1001 } else { 1002 VN_RELE(vp); 1003 return (puterrno4(EACCES)); 1004 } 1005 } 1006 } else { 1007 exi = cs->exi; 1008 } 1009 ASSERT(exi != NULL); 1010 1011 1012 /* 1013 * Create the secinfo result based on the security information 1014 * from the exportinfo structure (exi). 1015 * 1016 * Return all flavors for a pseudo node. 1017 * For a real export node, return the flavor that the client 1018 * has access with. 1019 */ 1020 ASSERT(RW_LOCK_HELD(&exported_lock)); 1021 if (PSEUDO(exi)) { 1022 count = exi->exi_export.ex_seccnt; /* total sec count */ 1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); 1024 secp = exi->exi_export.ex_secinfo; 1025 1026 for (i = 0; i < count; i++) { 1027 si = &secp[i].s_secinfo; 1028 resok_val[i].flavor = si->sc_rpcnum; 1029 if (resok_val[i].flavor == RPCSEC_GSS) { 1030 rpcsec_gss_info *info; 1031 1032 info = &resok_val[i].flavor_info; 1033 info->qop = si->sc_qop; 1034 info->service = (rpc_gss_svc_t)si->sc_service; 1035 1036 /* get oid opaque data */ 1037 info->oid.sec_oid4_len = 1038 si->sc_gss_mech_type->length; 1039 info->oid.sec_oid4_val = kmem_alloc( 1040 si->sc_gss_mech_type->length, KM_SLEEP); 1041 bcopy( 1042 si->sc_gss_mech_type->elements, 1043 info->oid.sec_oid4_val, 1044 info->oid.sec_oid4_len); 1045 } 1046 } 1047 resp->SECINFO4resok_len = count; 1048 resp->SECINFO4resok_val = resok_val; 1049 } else { 1050 int ret_cnt = 0, k = 0; 1051 int *flavor_list; 1052 1053 count = exi->exi_export.ex_seccnt; /* total sec count */ 1054 secp = exi->exi_export.ex_secinfo; 1055 1056 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP); 1057 /* find out which flavors to return */ 1058 for (i = 0; i < count; i ++) { 1059 int access, flavor, perm; 1060 1061 flavor = secp[i].s_secinfo.sc_nfsnum; 1062 perm = secp[i].s_flags; 1063 1064 access = nfsauth4_secinfo_access(exi, cs->req, 1065 flavor, perm); 1066 1067 if (! (access & NFSAUTH_DENIED) && 1068 ! (access & NFSAUTH_WRONGSEC)) { 1069 flavor_list[ret_cnt] = flavor; 1070 ret_cnt++; 1071 } 1072 } 1073 1074 /* Create the returning SECINFO value */ 1075 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP); 1076 1077 for (i = 0; i < count; i++) { 1078 /* 1079 * If the flavor is in the flavor list, 1080 * fill in resok_val. 1081 */ 1082 si = &secp[i].s_secinfo; 1083 if (in_flavor_list(si->sc_nfsnum, 1084 flavor_list, ret_cnt)) { 1085 resok_val[k].flavor = si->sc_rpcnum; 1086 if (resok_val[k].flavor == RPCSEC_GSS) { 1087 rpcsec_gss_info *info; 1088 1089 info = &resok_val[k].flavor_info; 1090 info->qop = si->sc_qop; 1091 info->service = (rpc_gss_svc_t) 1092 si->sc_service; 1093 1094 /* get oid opaque data */ 1095 info->oid.sec_oid4_len = 1096 si->sc_gss_mech_type->length; 1097 info->oid.sec_oid4_val = kmem_alloc( 1098 si->sc_gss_mech_type->length, 1099 KM_SLEEP); 1100 bcopy(si->sc_gss_mech_type->elements, 1101 info->oid.sec_oid4_val, 1102 info->oid.sec_oid4_len); 1103 } 1104 k++; 1105 } 1106 if (k >= ret_cnt) 1107 break; 1108 } 1109 resp->SECINFO4resok_len = ret_cnt; 1110 resp->SECINFO4resok_val = resok_val; 1111 kmem_free(flavor_list, count * sizeof (int)); 1112 } 1113 1114 VN_RELE(vp); 1115 return (NFS4_OK); 1116 } 1117 1118 /* 1119 * SECINFO (Operation 33): Obtain required security information on 1120 * the component name in the format of (security-mechanism-oid, qop, service) 1121 * triplets. 1122 */ 1123 /* ARGSUSED */ 1124 static void 1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1126 struct compound_state *cs) 1127 { 1128 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1129 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1130 utf8string *utfnm = &args->name; 1131 uint_t len; 1132 char *nm; 1133 struct sockaddr *ca; 1134 char *name = NULL; 1135 1136 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1137 SECINFO4args *, args); 1138 1139 /* 1140 * Current file handle (cfh) should have been set before getting 1141 * into this function. If not, return error. 1142 */ 1143 if (cs->vp == NULL) { 1144 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1145 goto out; 1146 } 1147 1148 if (cs->vp->v_type != VDIR) { 1149 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1150 goto out; 1151 } 1152 1153 /* 1154 * Verify the component name. If failed, error out, but 1155 * do not error out if the component name is a "..". 1156 * SECINFO will return its parents secinfo data for SECINFO "..". 1157 */ 1158 if (!utf8_dir_verify(utfnm)) { 1159 if (utfnm->utf8string_len != 2 || 1160 utfnm->utf8string_val[0] != '.' || 1161 utfnm->utf8string_val[1] != '.') { 1162 *cs->statusp = resp->status = NFS4ERR_INVAL; 1163 goto out; 1164 } 1165 } 1166 1167 nm = utf8_to_str(utfnm, &len, NULL); 1168 if (nm == NULL) { 1169 *cs->statusp = resp->status = NFS4ERR_INVAL; 1170 goto out; 1171 } 1172 1173 if (len > MAXNAMELEN) { 1174 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1175 kmem_free(nm, len); 1176 goto out; 1177 } 1178 /* If necessary, convert to UTF-8 for illbehaved clients */ 1179 1180 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1181 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1182 MAXPATHLEN + 1); 1183 1184 if (name == NULL) { 1185 *cs->statusp = resp->status = NFS4ERR_INVAL; 1186 kmem_free(nm, len); 1187 goto out; 1188 } 1189 1190 1191 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp); 1192 1193 if (name != nm) 1194 kmem_free(name, MAXPATHLEN + 1); 1195 kmem_free(nm, len); 1196 1197 out: 1198 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1199 SECINFO4res *, resp); 1200 } 1201 1202 /* 1203 * Free SECINFO result. 1204 */ 1205 /* ARGSUSED */ 1206 static void 1207 rfs4_op_secinfo_free(nfs_resop4 *resop) 1208 { 1209 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1210 int count, i; 1211 secinfo4 *resok_val; 1212 1213 /* If this is not an Ok result, nothing to free. */ 1214 if (resp->status != NFS4_OK) { 1215 return; 1216 } 1217 1218 count = resp->SECINFO4resok_len; 1219 resok_val = resp->SECINFO4resok_val; 1220 1221 for (i = 0; i < count; i++) { 1222 if (resok_val[i].flavor == RPCSEC_GSS) { 1223 rpcsec_gss_info *info; 1224 1225 info = &resok_val[i].flavor_info; 1226 kmem_free(info->oid.sec_oid4_val, 1227 info->oid.sec_oid4_len); 1228 } 1229 } 1230 kmem_free(resok_val, count * sizeof (secinfo4)); 1231 resp->SECINFO4resok_len = 0; 1232 resp->SECINFO4resok_val = NULL; 1233 } 1234 1235 /* ARGSUSED */ 1236 static void 1237 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1238 struct compound_state *cs) 1239 { 1240 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1241 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1242 int error; 1243 vnode_t *vp; 1244 struct vattr va; 1245 int checkwriteperm; 1246 cred_t *cr = cs->cr; 1247 bslabel_t *clabel, *slabel; 1248 ts_label_t *tslabel; 1249 boolean_t admin_low_client; 1250 1251 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1252 ACCESS4args *, args); 1253 1254 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */ 1255 if (cs->access == CS_ACCESS_DENIED) { 1256 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1257 goto out; 1258 } 1259 #endif 1260 if (cs->vp == NULL) { 1261 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1262 goto out; 1263 } 1264 1265 ASSERT(cr != NULL); 1266 1267 vp = cs->vp; 1268 1269 /* 1270 * If the file system is exported read only, it is not appropriate 1271 * to check write permissions for regular files and directories. 1272 * Special files are interpreted by the client, so the underlying 1273 * permissions are sent back to the client for interpretation. 1274 */ 1275 if (rdonly4(cs->exi, cs->vp, req) && 1276 (vp->v_type == VREG || vp->v_type == VDIR)) 1277 checkwriteperm = 0; 1278 else 1279 checkwriteperm = 1; 1280 1281 /* 1282 * XXX 1283 * We need the mode so that we can correctly determine access 1284 * permissions relative to a mandatory lock file. Access to 1285 * mandatory lock files is denied on the server, so it might 1286 * as well be reflected to the server during the open. 1287 */ 1288 va.va_mask = AT_MODE; 1289 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1290 if (error) { 1291 *cs->statusp = resp->status = puterrno4(error); 1292 goto out; 1293 } 1294 resp->access = 0; 1295 resp->supported = 0; 1296 1297 if (is_system_labeled()) { 1298 ASSERT(req->rq_label != NULL); 1299 clabel = req->rq_label; 1300 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1301 "got client label from request(1)", 1302 struct svc_req *, req); 1303 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1304 if ((tslabel = nfs_getflabel(vp)) == NULL) { 1305 *cs->statusp = resp->status = puterrno4(EACCES); 1306 goto out; 1307 } 1308 slabel = label2bslabel(tslabel); 1309 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1310 char *, "got server label(1) for vp(2)", 1311 bslabel_t *, slabel, vnode_t *, vp); 1312 1313 admin_low_client = B_FALSE; 1314 } else 1315 admin_low_client = B_TRUE; 1316 } 1317 1318 if (args->access & ACCESS4_READ) { 1319 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1320 if (!error && !MANDLOCK(vp, va.va_mode) && 1321 (!is_system_labeled() || admin_low_client || 1322 bldominates(clabel, slabel))) 1323 resp->access |= ACCESS4_READ; 1324 resp->supported |= ACCESS4_READ; 1325 } 1326 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1327 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1328 if (!error && (!is_system_labeled() || admin_low_client || 1329 bldominates(clabel, slabel))) 1330 resp->access |= ACCESS4_LOOKUP; 1331 resp->supported |= ACCESS4_LOOKUP; 1332 } 1333 if (checkwriteperm && 1334 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1335 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1336 if (!error && !MANDLOCK(vp, va.va_mode) && 1337 (!is_system_labeled() || admin_low_client || 1338 blequal(clabel, slabel))) 1339 resp->access |= 1340 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND)); 1341 resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND); 1342 } 1343 1344 if (checkwriteperm && 1345 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1346 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1347 if (!error && (!is_system_labeled() || admin_low_client || 1348 blequal(clabel, slabel))) 1349 resp->access |= ACCESS4_DELETE; 1350 resp->supported |= ACCESS4_DELETE; 1351 } 1352 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1353 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1354 if (!error && !MANDLOCK(vp, va.va_mode) && 1355 (!is_system_labeled() || admin_low_client || 1356 bldominates(clabel, slabel))) 1357 resp->access |= ACCESS4_EXECUTE; 1358 resp->supported |= ACCESS4_EXECUTE; 1359 } 1360 1361 if (is_system_labeled() && !admin_low_client) 1362 label_rele(tslabel); 1363 1364 *cs->statusp = resp->status = NFS4_OK; 1365 out: 1366 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1367 ACCESS4res *, resp); 1368 } 1369 1370 /* ARGSUSED */ 1371 static void 1372 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1373 struct compound_state *cs) 1374 { 1375 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1376 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1377 int error; 1378 vnode_t *vp = cs->vp; 1379 cred_t *cr = cs->cr; 1380 vattr_t va; 1381 1382 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1383 COMMIT4args *, args); 1384 1385 if (vp == NULL) { 1386 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1387 goto out; 1388 } 1389 if (cs->access == CS_ACCESS_DENIED) { 1390 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1391 goto out; 1392 } 1393 1394 if (args->offset + args->count < args->offset) { 1395 *cs->statusp = resp->status = NFS4ERR_INVAL; 1396 goto out; 1397 } 1398 1399 va.va_mask = AT_UID; 1400 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1401 1402 /* 1403 * If we can't get the attributes, then we can't do the 1404 * right access checking. So, we'll fail the request. 1405 */ 1406 if (error) { 1407 *cs->statusp = resp->status = puterrno4(error); 1408 goto out; 1409 } 1410 if (rdonly4(cs->exi, cs->vp, req)) { 1411 *cs->statusp = resp->status = NFS4ERR_ROFS; 1412 goto out; 1413 } 1414 1415 if (vp->v_type != VREG) { 1416 if (vp->v_type == VDIR) 1417 resp->status = NFS4ERR_ISDIR; 1418 else 1419 resp->status = NFS4ERR_INVAL; 1420 *cs->statusp = resp->status; 1421 goto out; 1422 } 1423 1424 if (crgetuid(cr) != va.va_uid && 1425 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) { 1426 *cs->statusp = resp->status = puterrno4(error); 1427 goto out; 1428 } 1429 1430 error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL); 1431 if (!error) 1432 error = VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1433 1434 if (error) { 1435 *cs->statusp = resp->status = puterrno4(error); 1436 goto out; 1437 } 1438 1439 *cs->statusp = resp->status = NFS4_OK; 1440 resp->writeverf = Write4verf; 1441 out: 1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1443 COMMIT4res *, resp); 1444 } 1445 1446 /* 1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1448 * was completed. It does the nfsv4 create for special files. 1449 */ 1450 /* ARGSUSED */ 1451 static vnode_t * 1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req, 1453 struct compound_state *cs, vattr_t *vap, char *nm) 1454 { 1455 int error; 1456 cred_t *cr = cs->cr; 1457 vnode_t *dvp = cs->vp; 1458 vnode_t *vp = NULL; 1459 int mode; 1460 enum vcexcl excl; 1461 1462 switch (args->type) { 1463 case NF4CHR: 1464 case NF4BLK: 1465 if (secpolicy_sys_devices(cr) != 0) { 1466 *cs->statusp = resp->status = NFS4ERR_PERM; 1467 return (NULL); 1468 } 1469 if (args->type == NF4CHR) 1470 vap->va_type = VCHR; 1471 else 1472 vap->va_type = VBLK; 1473 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1, 1474 args->ftype4_u.devdata.specdata2); 1475 vap->va_mask |= AT_RDEV; 1476 break; 1477 case NF4SOCK: 1478 vap->va_type = VSOCK; 1479 break; 1480 case NF4FIFO: 1481 vap->va_type = VFIFO; 1482 break; 1483 default: 1484 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1485 return (NULL); 1486 } 1487 1488 /* 1489 * Must specify the mode. 1490 */ 1491 if (!(vap->va_mask & AT_MODE)) { 1492 *cs->statusp = resp->status = NFS4ERR_INVAL; 1493 return (NULL); 1494 } 1495 1496 excl = EXCL; 1497 1498 mode = 0; 1499 1500 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL); 1501 if (error) { 1502 *cs->statusp = resp->status = puterrno4(error); 1503 return (NULL); 1504 } 1505 return (vp); 1506 } 1507 1508 /* 1509 * nfsv4 create is used to create non-regular files. For regular files, 1510 * use nfsv4 open. 1511 */ 1512 /* ARGSUSED */ 1513 static void 1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1515 struct compound_state *cs) 1516 { 1517 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1518 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1519 int error; 1520 struct vattr bva, iva, iva2, ava, *vap; 1521 cred_t *cr = cs->cr; 1522 vnode_t *dvp = cs->vp; 1523 vnode_t *vp = NULL; 1524 vnode_t *realvp; 1525 char *nm, *lnm; 1526 uint_t len, llen; 1527 int syncval = 0; 1528 struct nfs4_svgetit_arg sarg; 1529 struct nfs4_ntov_table ntov; 1530 struct statvfs64 sb; 1531 nfsstat4 status; 1532 struct sockaddr *ca; 1533 char *name = NULL; 1534 char *lname = NULL; 1535 1536 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1537 CREATE4args *, args); 1538 1539 resp->attrset = 0; 1540 1541 if (dvp == NULL) { 1542 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1543 goto out; 1544 } 1545 1546 /* 1547 * If there is an unshared filesystem mounted on this vnode, 1548 * do not allow to create an object in this directory. 1549 */ 1550 if (vn_ismntpt(dvp)) { 1551 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1552 goto out; 1553 } 1554 1555 /* Verify that type is correct */ 1556 switch (args->type) { 1557 case NF4LNK: 1558 case NF4BLK: 1559 case NF4CHR: 1560 case NF4SOCK: 1561 case NF4FIFO: 1562 case NF4DIR: 1563 break; 1564 default: 1565 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1566 goto out; 1567 }; 1568 1569 if (cs->access == CS_ACCESS_DENIED) { 1570 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1571 goto out; 1572 } 1573 if (dvp->v_type != VDIR) { 1574 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1575 goto out; 1576 } 1577 if (!utf8_dir_verify(&args->objname)) { 1578 *cs->statusp = resp->status = NFS4ERR_INVAL; 1579 goto out; 1580 } 1581 1582 if (rdonly4(cs->exi, cs->vp, req)) { 1583 *cs->statusp = resp->status = NFS4ERR_ROFS; 1584 goto out; 1585 } 1586 1587 /* 1588 * Name of newly created object 1589 */ 1590 nm = utf8_to_fn(&args->objname, &len, NULL); 1591 if (nm == NULL) { 1592 *cs->statusp = resp->status = NFS4ERR_INVAL; 1593 goto out; 1594 } 1595 1596 if (len > MAXNAMELEN) { 1597 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1598 kmem_free(nm, len); 1599 goto out; 1600 } 1601 1602 /* If necessary, convert to UTF-8 for poorly behaved clients */ 1603 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1604 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1605 MAXPATHLEN + 1); 1606 1607 if (name == NULL) { 1608 *cs->statusp = resp->status = NFS4ERR_INVAL; 1609 kmem_free(nm, len); 1610 goto out; 1611 } 1612 1613 resp->attrset = 0; 1614 1615 sarg.sbp = &sb; 1616 nfs4_ntov_table_init(&ntov); 1617 1618 status = do_rfs4_set_attrs(&resp->attrset, 1619 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1620 1621 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1622 status = NFS4ERR_INVAL; 1623 1624 if (status != NFS4_OK) { 1625 *cs->statusp = resp->status = status; 1626 kmem_free(nm, len); 1627 nfs4_ntov_table_free(&ntov, &sarg); 1628 resp->attrset = 0; 1629 goto out; 1630 } 1631 1632 /* Get "before" change value */ 1633 bva.va_mask = AT_CTIME|AT_SEQ; 1634 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL); 1635 if (error) { 1636 *cs->statusp = resp->status = puterrno4(error); 1637 kmem_free(nm, len); 1638 nfs4_ntov_table_free(&ntov, &sarg); 1639 resp->attrset = 0; 1640 goto out; 1641 } 1642 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1643 1644 vap = sarg.vap; 1645 1646 /* 1647 * Set default initial values for attributes when not specified 1648 * in createattrs. 1649 */ 1650 if ((vap->va_mask & AT_UID) == 0) { 1651 vap->va_uid = crgetuid(cr); 1652 vap->va_mask |= AT_UID; 1653 } 1654 if ((vap->va_mask & AT_GID) == 0) { 1655 vap->va_gid = crgetgid(cr); 1656 vap->va_mask |= AT_GID; 1657 } 1658 1659 vap->va_mask |= AT_TYPE; 1660 switch (args->type) { 1661 case NF4DIR: 1662 vap->va_type = VDIR; 1663 if ((vap->va_mask & AT_MODE) == 0) { 1664 vap->va_mode = 0700; /* default: owner rwx only */ 1665 vap->va_mask |= AT_MODE; 1666 } 1667 error = VOP_MKDIR(dvp, nm, vap, &vp, cr, NULL, 0, NULL); 1668 if (error) 1669 break; 1670 1671 /* 1672 * Get the initial "after" sequence number, if it fails, 1673 * set to zero 1674 */ 1675 iva.va_mask = AT_SEQ; 1676 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1677 iva.va_seq = 0; 1678 break; 1679 case NF4LNK: 1680 vap->va_type = VLNK; 1681 if ((vap->va_mask & AT_MODE) == 0) { 1682 vap->va_mode = 0700; /* default: owner rwx only */ 1683 vap->va_mask |= AT_MODE; 1684 } 1685 1686 /* 1687 * symlink names must be treated as data 1688 */ 1689 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL); 1690 1691 if (lnm == NULL) { 1692 *cs->statusp = resp->status = NFS4ERR_INVAL; 1693 if (name != nm) 1694 kmem_free(name, MAXPATHLEN + 1); 1695 kmem_free(nm, len); 1696 nfs4_ntov_table_free(&ntov, &sarg); 1697 resp->attrset = 0; 1698 goto out; 1699 } 1700 1701 if (llen > MAXPATHLEN) { 1702 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1703 if (name != nm) 1704 kmem_free(name, MAXPATHLEN + 1); 1705 kmem_free(nm, len); 1706 kmem_free(lnm, llen); 1707 nfs4_ntov_table_free(&ntov, &sarg); 1708 resp->attrset = 0; 1709 goto out; 1710 } 1711 1712 lname = nfscmd_convname(ca, cs->exi, lnm, 1713 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 1714 1715 if (lname == NULL) { 1716 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 1717 if (name != nm) 1718 kmem_free(name, MAXPATHLEN + 1); 1719 kmem_free(nm, len); 1720 kmem_free(lnm, llen); 1721 nfs4_ntov_table_free(&ntov, &sarg); 1722 resp->attrset = 0; 1723 goto out; 1724 } 1725 1726 error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, NULL, 0); 1727 if (lname != lnm) 1728 kmem_free(lname, MAXPATHLEN + 1); 1729 if (lnm != NULL) 1730 kmem_free(lnm, llen); 1731 if (error) 1732 break; 1733 1734 /* 1735 * Get the initial "after" sequence number, if it fails, 1736 * set to zero 1737 */ 1738 iva.va_mask = AT_SEQ; 1739 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1740 iva.va_seq = 0; 1741 1742 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, 1743 NULL, NULL, NULL); 1744 if (error) 1745 break; 1746 1747 /* 1748 * va_seq is not safe over VOP calls, check it again 1749 * if it has changed zero out iva to force atomic = FALSE. 1750 */ 1751 iva2.va_mask = AT_SEQ; 1752 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) || 1753 iva2.va_seq != iva.va_seq) 1754 iva.va_seq = 0; 1755 break; 1756 default: 1757 /* 1758 * probably a special file. 1759 */ 1760 if ((vap->va_mask & AT_MODE) == 0) { 1761 vap->va_mode = 0600; /* default: owner rw only */ 1762 vap->va_mask |= AT_MODE; 1763 } 1764 syncval = FNODSYNC; 1765 /* 1766 * We know this will only generate one VOP call 1767 */ 1768 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm); 1769 1770 if (vp == NULL) { 1771 if (name != nm) 1772 kmem_free(name, MAXPATHLEN + 1); 1773 kmem_free(nm, len); 1774 nfs4_ntov_table_free(&ntov, &sarg); 1775 resp->attrset = 0; 1776 goto out; 1777 } 1778 1779 /* 1780 * Get the initial "after" sequence number, if it fails, 1781 * set to zero 1782 */ 1783 iva.va_mask = AT_SEQ; 1784 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1785 iva.va_seq = 0; 1786 1787 break; 1788 } 1789 if (name != nm) 1790 kmem_free(name, MAXPATHLEN + 1); 1791 kmem_free(nm, len); 1792 1793 if (error) { 1794 *cs->statusp = resp->status = puterrno4(error); 1795 } 1796 1797 /* 1798 * Force modified data and metadata out to stable storage. 1799 */ 1800 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1801 1802 if (resp->status != NFS4_OK) { 1803 if (vp != NULL) 1804 VN_RELE(vp); 1805 nfs4_ntov_table_free(&ntov, &sarg); 1806 resp->attrset = 0; 1807 goto out; 1808 } 1809 1810 /* 1811 * Finish setup of cinfo response, "before" value already set. 1812 * Get "after" change value, if it fails, simply return the 1813 * before value. 1814 */ 1815 ava.va_mask = AT_CTIME|AT_SEQ; 1816 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) { 1817 ava.va_ctime = bva.va_ctime; 1818 ava.va_seq = 0; 1819 } 1820 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 1821 1822 /* 1823 * True verification that object was created with correct 1824 * attrs is impossible. The attrs could have been changed 1825 * immediately after object creation. If attributes did 1826 * not verify, the only recourse for the server is to 1827 * destroy the object. Maybe if some attrs (like gid) 1828 * are set incorrectly, the object should be destroyed; 1829 * however, seems bad as a default policy. Do we really 1830 * want to destroy an object over one of the times not 1831 * verifying correctly? For these reasons, the server 1832 * currently sets bits in attrset for createattrs 1833 * that were set; however, no verification is done. 1834 * 1835 * vmask_to_nmask accounts for vattr bits set on create 1836 * [do_rfs4_set_attrs() only sets resp bits for 1837 * non-vattr/vfs bits.] 1838 * Mask off any bits set by default so as not to return 1839 * more attrset bits than were requested in createattrs 1840 */ 1841 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset); 1842 resp->attrset &= args->createattrs.attrmask; 1843 nfs4_ntov_table_free(&ntov, &sarg); 1844 1845 error = makefh4(&cs->fh, vp, cs->exi); 1846 if (error) { 1847 *cs->statusp = resp->status = puterrno4(error); 1848 } 1849 1850 /* 1851 * The cinfo.atomic = TRUE only if we got no errors, we have 1852 * non-zero va_seq's, and it has incremented by exactly one 1853 * during the creation and it didn't change during the VOP_LOOKUP 1854 * or VOP_FSYNC. 1855 */ 1856 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 1857 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 1858 resp->cinfo.atomic = TRUE; 1859 else 1860 resp->cinfo.atomic = FALSE; 1861 1862 /* 1863 * Force modified metadata out to stable storage. 1864 * 1865 * if a underlying vp exists, pass it to VOP_FSYNC 1866 */ 1867 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1868 (void) VOP_FSYNC(realvp, syncval, cr, NULL); 1869 else 1870 (void) VOP_FSYNC(vp, syncval, cr, NULL); 1871 1872 if (resp->status != NFS4_OK) { 1873 VN_RELE(vp); 1874 goto out; 1875 } 1876 if (cs->vp) 1877 VN_RELE(cs->vp); 1878 1879 cs->vp = vp; 1880 *cs->statusp = resp->status = NFS4_OK; 1881 out: 1882 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 1883 CREATE4res *, resp); 1884 } 1885 1886 /*ARGSUSED*/ 1887 static void 1888 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1889 struct compound_state *cs) 1890 { 1891 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs, 1892 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge); 1893 1894 rfs4_op_inval(argop, resop, req, cs); 1895 1896 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs, 1897 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge); 1898 } 1899 1900 /*ARGSUSED*/ 1901 static void 1902 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1903 struct compound_state *cs) 1904 { 1905 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 1906 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 1907 rfs4_deleg_state_t *dsp; 1908 nfsstat4 status; 1909 1910 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 1911 DELEGRETURN4args *, args); 1912 1913 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp); 1914 resp->status = *cs->statusp = status; 1915 if (status != NFS4_OK) 1916 goto out; 1917 1918 /* Ensure specified filehandle matches */ 1919 if (cs->vp != dsp->finfo->vp) { 1920 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 1921 } else 1922 rfs4_return_deleg(dsp, FALSE); 1923 1924 rfs4_update_lease(dsp->client); 1925 1926 rfs4_deleg_state_rele(dsp); 1927 out: 1928 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 1929 DELEGRETURN4res *, resp); 1930 } 1931 1932 /* 1933 * Check to see if a given "flavor" is an explicitly shared flavor. 1934 * The assumption of this routine is the "flavor" is already a valid 1935 * flavor in the secinfo list of "exi". 1936 * 1937 * e.g. 1938 * # share -o sec=flavor1 /export 1939 * # share -o sec=flavor2 /export/home 1940 * 1941 * flavor2 is not an explicitly shared flavor for /export, 1942 * however it is in the secinfo list for /export thru the 1943 * server namespace setup. 1944 */ 1945 int 1946 is_exported_sec(int flavor, struct exportinfo *exi) 1947 { 1948 int i; 1949 struct secinfo *sp; 1950 1951 sp = exi->exi_export.ex_secinfo; 1952 for (i = 0; i < exi->exi_export.ex_seccnt; i++) { 1953 if (flavor == sp[i].s_secinfo.sc_nfsnum || 1954 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) { 1955 return (SEC_REF_EXPORTED(&sp[i])); 1956 } 1957 } 1958 1959 /* Should not reach this point based on the assumption */ 1960 return (0); 1961 } 1962 1963 /* 1964 * Check if the security flavor used in the request matches what is 1965 * required at the export point or at the root pseudo node (exi_root). 1966 * 1967 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise. 1968 * 1969 */ 1970 static int 1971 secinfo_match_or_authnone(struct compound_state *cs) 1972 { 1973 int i; 1974 struct secinfo *sp; 1975 1976 /* 1977 * Check cs->nfsflavor (from the request) against 1978 * the current export data in cs->exi. 1979 */ 1980 sp = cs->exi->exi_export.ex_secinfo; 1981 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) { 1982 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum || 1983 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) 1984 return (1); 1985 } 1986 1987 return (0); 1988 } 1989 1990 /* 1991 * Check the access authority for the client and return the correct error. 1992 */ 1993 nfsstat4 1994 call_checkauth4(struct compound_state *cs, struct svc_req *req) 1995 { 1996 int authres; 1997 1998 /* 1999 * First, check if the security flavor used in the request 2000 * are among the flavors set in the server namespace. 2001 */ 2002 if (!secinfo_match_or_authnone(cs)) { 2003 *cs->statusp = NFS4ERR_WRONGSEC; 2004 return (*cs->statusp); 2005 } 2006 2007 authres = checkauth4(cs, req); 2008 2009 if (authres > 0) { 2010 *cs->statusp = NFS4_OK; 2011 if (! (cs->access & CS_ACCESS_LIMITED)) 2012 cs->access = CS_ACCESS_OK; 2013 } else if (authres == 0) { 2014 *cs->statusp = NFS4ERR_ACCESS; 2015 } else if (authres == -2) { 2016 *cs->statusp = NFS4ERR_WRONGSEC; 2017 } else { 2018 *cs->statusp = NFS4ERR_DELAY; 2019 } 2020 return (*cs->statusp); 2021 } 2022 2023 /* 2024 * bitmap4_to_attrmask is called by getattr and readdir. 2025 * It sets up the vattr mask and determines whether vfsstat call is needed 2026 * based on the input bitmap. 2027 * Returns nfsv4 status. 2028 */ 2029 static nfsstat4 2030 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp) 2031 { 2032 int i; 2033 uint_t va_mask; 2034 struct statvfs64 *sbp = sargp->sbp; 2035 2036 sargp->sbp = NULL; 2037 sargp->flag = 0; 2038 sargp->rdattr_error = NFS4_OK; 2039 sargp->mntdfid_set = FALSE; 2040 if (sargp->cs->vp) 2041 sargp->xattr = get_fh4_flag(&sargp->cs->fh, 2042 FH4_ATTRDIR | FH4_NAMEDATTR); 2043 else 2044 sargp->xattr = 0; 2045 2046 /* 2047 * Set rdattr_error_req to true if return error per 2048 * failed entry rather than fail the readdir. 2049 */ 2050 if (breq & FATTR4_RDATTR_ERROR_MASK) 2051 sargp->rdattr_error_req = 1; 2052 else 2053 sargp->rdattr_error_req = 0; 2054 2055 /* 2056 * generate the va_mask 2057 * Handle the easy cases first 2058 */ 2059 switch (breq) { 2060 case NFS4_NTOV_ATTR_MASK: 2061 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK; 2062 return (NFS4_OK); 2063 2064 case NFS4_FS_ATTR_MASK: 2065 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK; 2066 sargp->sbp = sbp; 2067 return (NFS4_OK); 2068 2069 case NFS4_NTOV_ATTR_CACHE_MASK: 2070 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK; 2071 return (NFS4_OK); 2072 2073 case FATTR4_LEASE_TIME_MASK: 2074 sargp->vap->va_mask = 0; 2075 return (NFS4_OK); 2076 2077 default: 2078 va_mask = 0; 2079 for (i = 0; i < nfs4_ntov_map_size; i++) { 2080 if ((breq & nfs4_ntov_map[i].fbit) && 2081 nfs4_ntov_map[i].vbit) 2082 va_mask |= nfs4_ntov_map[i].vbit; 2083 } 2084 2085 /* 2086 * Check is vfsstat is needed 2087 */ 2088 if (breq & NFS4_FS_ATTR_MASK) 2089 sargp->sbp = sbp; 2090 2091 sargp->vap->va_mask = va_mask; 2092 return (NFS4_OK); 2093 } 2094 /* NOTREACHED */ 2095 } 2096 2097 /* 2098 * bitmap4_get_sysattrs is called by getattr and readdir. 2099 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs. 2100 * Returns nfsv4 status. 2101 */ 2102 static nfsstat4 2103 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp) 2104 { 2105 int error; 2106 struct compound_state *cs = sargp->cs; 2107 vnode_t *vp = cs->vp; 2108 2109 if (sargp->sbp != NULL) { 2110 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) { 2111 sargp->sbp = NULL; /* to identify error */ 2112 return (puterrno4(error)); 2113 } 2114 } 2115 2116 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr)); 2117 } 2118 2119 static void 2120 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp) 2121 { 2122 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size, 2123 KM_SLEEP); 2124 ntovp->attrcnt = 0; 2125 ntovp->vfsstat = FALSE; 2126 } 2127 2128 static void 2129 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 2130 struct nfs4_svgetit_arg *sargp) 2131 { 2132 int i; 2133 union nfs4_attr_u *na; 2134 uint8_t *amap; 2135 2136 /* 2137 * XXX Should do the same checks for whether the bit is set 2138 */ 2139 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2140 i < ntovp->attrcnt; i++, na++, amap++) { 2141 (void) (*nfs4_ntov_map[*amap].sv_getit)( 2142 NFS4ATTR_FREEIT, sargp, na); 2143 } 2144 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) { 2145 /* 2146 * xdr_free for getattr will be done later 2147 */ 2148 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2149 i < ntovp->attrcnt; i++, na++, amap++) { 2150 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na); 2151 } 2152 } 2153 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size); 2154 } 2155 2156 /* 2157 * do_rfs4_op_getattr gets the system attrs and converts into fattr4. 2158 */ 2159 static nfsstat4 2160 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp, 2161 struct nfs4_svgetit_arg *sargp) 2162 { 2163 int error = 0; 2164 int i, k; 2165 struct nfs4_ntov_table ntov; 2166 XDR xdr; 2167 ulong_t xdr_size; 2168 char *xdr_attrs; 2169 nfsstat4 status = NFS4_OK; 2170 nfsstat4 prev_rdattr_error = sargp->rdattr_error; 2171 union nfs4_attr_u *na; 2172 uint8_t *amap; 2173 2174 sargp->op = NFS4ATTR_GETIT; 2175 sargp->flag = 0; 2176 2177 fattrp->attrmask = 0; 2178 /* if no bits requested, then return empty fattr4 */ 2179 if (breq == 0) { 2180 fattrp->attrlist4_len = 0; 2181 fattrp->attrlist4 = NULL; 2182 return (NFS4_OK); 2183 } 2184 2185 /* 2186 * return NFS4ERR_INVAL when client requests write-only attrs 2187 */ 2188 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK)) 2189 return (NFS4ERR_INVAL); 2190 2191 nfs4_ntov_table_init(&ntov); 2192 na = ntov.na; 2193 amap = ntov.amap; 2194 2195 /* 2196 * Now loop to get or verify the attrs 2197 */ 2198 for (i = 0; i < nfs4_ntov_map_size; i++) { 2199 if (breq & nfs4_ntov_map[i].fbit) { 2200 if ((*nfs4_ntov_map[i].sv_getit)( 2201 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) { 2202 2203 error = (*nfs4_ntov_map[i].sv_getit)( 2204 NFS4ATTR_GETIT, sargp, na); 2205 2206 /* 2207 * Possible error values: 2208 * >0 if sv_getit failed to 2209 * get the attr; 0 if succeeded; 2210 * <0 if rdattr_error and the 2211 * attribute cannot be returned. 2212 */ 2213 if (error && !(sargp->rdattr_error_req)) 2214 goto done; 2215 /* 2216 * If error then just for entry 2217 */ 2218 if (error == 0) { 2219 fattrp->attrmask |= 2220 nfs4_ntov_map[i].fbit; 2221 *amap++ = 2222 (uint8_t)nfs4_ntov_map[i].nval; 2223 na++; 2224 (ntov.attrcnt)++; 2225 } else if ((error > 0) && 2226 (sargp->rdattr_error == NFS4_OK)) { 2227 sargp->rdattr_error = puterrno4(error); 2228 } 2229 error = 0; 2230 } 2231 } 2232 } 2233 2234 /* 2235 * If rdattr_error was set after the return value for it was assigned, 2236 * update it. 2237 */ 2238 if (prev_rdattr_error != sargp->rdattr_error) { 2239 na = ntov.na; 2240 amap = ntov.amap; 2241 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2242 k = *amap; 2243 if (k < FATTR4_RDATTR_ERROR) { 2244 continue; 2245 } 2246 if ((k == FATTR4_RDATTR_ERROR) && 2247 ((*nfs4_ntov_map[k].sv_getit)( 2248 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) { 2249 2250 (void) (*nfs4_ntov_map[k].sv_getit)( 2251 NFS4ATTR_GETIT, sargp, na); 2252 } 2253 break; 2254 } 2255 } 2256 2257 xdr_size = 0; 2258 na = ntov.na; 2259 amap = ntov.amap; 2260 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2261 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na); 2262 } 2263 2264 fattrp->attrlist4_len = xdr_size; 2265 if (xdr_size) { 2266 /* freed by rfs4_op_getattr_free() */ 2267 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP); 2268 2269 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE); 2270 2271 na = ntov.na; 2272 amap = ntov.amap; 2273 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2274 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) { 2275 DTRACE_PROBE1(nfss__e__getattr4_encfail, 2276 int, *amap); 2277 status = NFS4ERR_SERVERFAULT; 2278 break; 2279 } 2280 } 2281 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 2282 } else { 2283 fattrp->attrlist4 = NULL; 2284 } 2285 done: 2286 2287 nfs4_ntov_table_free(&ntov, sargp); 2288 2289 if (error != 0) 2290 status = puterrno4(error); 2291 2292 return (status); 2293 } 2294 2295 /* ARGSUSED */ 2296 static void 2297 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2298 struct compound_state *cs) 2299 { 2300 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 2301 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2302 struct nfs4_svgetit_arg sarg; 2303 struct statvfs64 sb; 2304 nfsstat4 status; 2305 2306 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 2307 GETATTR4args *, args); 2308 2309 if (cs->vp == NULL) { 2310 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2311 goto out; 2312 } 2313 2314 if (cs->access == CS_ACCESS_DENIED) { 2315 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2316 goto out; 2317 } 2318 2319 sarg.sbp = &sb; 2320 sarg.cs = cs; 2321 2322 status = bitmap4_to_attrmask(args->attr_request, &sarg); 2323 if (status == NFS4_OK) { 2324 status = bitmap4_get_sysattrs(&sarg); 2325 if (status == NFS4_OK) 2326 status = do_rfs4_op_getattr(args->attr_request, 2327 &resp->obj_attributes, &sarg); 2328 } 2329 *cs->statusp = resp->status = status; 2330 out: 2331 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 2332 GETATTR4res *, resp); 2333 } 2334 2335 static void 2336 rfs4_op_getattr_free(nfs_resop4 *resop) 2337 { 2338 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2339 2340 nfs4_fattr4_free(&resp->obj_attributes); 2341 } 2342 2343 /* ARGSUSED */ 2344 static void 2345 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2346 struct compound_state *cs) 2347 { 2348 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2349 2350 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs); 2351 2352 if (cs->vp == NULL) { 2353 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2354 goto out; 2355 } 2356 if (cs->access == CS_ACCESS_DENIED) { 2357 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2358 goto out; 2359 } 2360 2361 resp->object.nfs_fh4_val = 2362 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 2363 nfs_fh4_copy(&cs->fh, &resp->object); 2364 *cs->statusp = resp->status = NFS4_OK; 2365 out: 2366 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 2367 GETFH4res *, resp); 2368 } 2369 2370 static void 2371 rfs4_op_getfh_free(nfs_resop4 *resop) 2372 { 2373 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2374 2375 if (resp->status == NFS4_OK && 2376 resp->object.nfs_fh4_val != NULL) { 2377 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len); 2378 resp->object.nfs_fh4_val = NULL; 2379 resp->object.nfs_fh4_len = 0; 2380 } 2381 } 2382 2383 /* 2384 * illegal: args: void 2385 * res : status (NFS4ERR_OP_ILLEGAL) 2386 */ 2387 /* ARGSUSED */ 2388 static void 2389 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, 2390 struct svc_req *req, struct compound_state *cs) 2391 { 2392 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal; 2393 2394 resop->resop = OP_ILLEGAL; 2395 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 2396 } 2397 2398 /* 2399 * link: args: SAVED_FH: file, CURRENT_FH: target directory 2400 * res: status. If success - CURRENT_FH unchanged, return change_info 2401 */ 2402 /* ARGSUSED */ 2403 static void 2404 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2405 struct compound_state *cs) 2406 { 2407 LINK4args *args = &argop->nfs_argop4_u.oplink; 2408 LINK4res *resp = &resop->nfs_resop4_u.oplink; 2409 int error; 2410 vnode_t *vp; 2411 vnode_t *dvp; 2412 struct vattr bdva, idva, adva; 2413 char *nm; 2414 uint_t len; 2415 struct sockaddr *ca; 2416 char *name = NULL; 2417 2418 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 2419 LINK4args *, args); 2420 2421 /* SAVED_FH: source object */ 2422 vp = cs->saved_vp; 2423 if (vp == NULL) { 2424 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2425 goto out; 2426 } 2427 2428 /* CURRENT_FH: target directory */ 2429 dvp = cs->vp; 2430 if (dvp == NULL) { 2431 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2432 goto out; 2433 } 2434 2435 /* 2436 * If there is a non-shared filesystem mounted on this vnode, 2437 * do not allow to link any file in this directory. 2438 */ 2439 if (vn_ismntpt(dvp)) { 2440 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2441 goto out; 2442 } 2443 2444 if (cs->access == CS_ACCESS_DENIED) { 2445 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2446 goto out; 2447 } 2448 2449 /* Check source object's type validity */ 2450 if (vp->v_type == VDIR) { 2451 *cs->statusp = resp->status = NFS4ERR_ISDIR; 2452 goto out; 2453 } 2454 2455 /* Check target directory's type */ 2456 if (dvp->v_type != VDIR) { 2457 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2458 goto out; 2459 } 2460 2461 if (cs->saved_exi != cs->exi) { 2462 *cs->statusp = resp->status = NFS4ERR_XDEV; 2463 goto out; 2464 } 2465 2466 if (!utf8_dir_verify(&args->newname)) { 2467 *cs->statusp = resp->status = NFS4ERR_INVAL; 2468 goto out; 2469 } 2470 2471 nm = utf8_to_fn(&args->newname, &len, NULL); 2472 if (nm == NULL) { 2473 *cs->statusp = resp->status = NFS4ERR_INVAL; 2474 goto out; 2475 } 2476 2477 if (len > MAXNAMELEN) { 2478 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2479 kmem_free(nm, len); 2480 goto out; 2481 } 2482 2483 if (rdonly4(cs->exi, cs->vp, req)) { 2484 *cs->statusp = resp->status = NFS4ERR_ROFS; 2485 kmem_free(nm, len); 2486 goto out; 2487 } 2488 2489 /* Get "before" change value */ 2490 bdva.va_mask = AT_CTIME|AT_SEQ; 2491 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 2492 if (error) { 2493 *cs->statusp = resp->status = puterrno4(error); 2494 kmem_free(nm, len); 2495 goto out; 2496 } 2497 2498 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2499 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 2500 MAXPATHLEN + 1); 2501 2502 if (name == NULL) { 2503 *cs->statusp = resp->status = NFS4ERR_INVAL; 2504 kmem_free(nm, len); 2505 goto out; 2506 } 2507 2508 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2509 2510 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0); 2511 2512 if (nm != name) 2513 kmem_free(name, MAXPATHLEN + 1); 2514 kmem_free(nm, len); 2515 2516 /* 2517 * Get the initial "after" sequence number, if it fails, set to zero 2518 */ 2519 idva.va_mask = AT_SEQ; 2520 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 2521 idva.va_seq = 0; 2522 2523 /* 2524 * Force modified data and metadata out to stable storage. 2525 */ 2526 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 2527 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 2528 2529 if (error) { 2530 *cs->statusp = resp->status = puterrno4(error); 2531 goto out; 2532 } 2533 2534 /* 2535 * Get "after" change value, if it fails, simply return the 2536 * before value. 2537 */ 2538 adva.va_mask = AT_CTIME|AT_SEQ; 2539 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 2540 adva.va_ctime = bdva.va_ctime; 2541 adva.va_seq = 0; 2542 } 2543 2544 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2545 2546 /* 2547 * The cinfo.atomic = TRUE only if we have 2548 * non-zero va_seq's, and it has incremented by exactly one 2549 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2550 */ 2551 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2552 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 2553 resp->cinfo.atomic = TRUE; 2554 else 2555 resp->cinfo.atomic = FALSE; 2556 2557 *cs->statusp = resp->status = NFS4_OK; 2558 out: 2559 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2560 LINK4res *, resp); 2561 } 2562 2563 /* 2564 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work. 2565 */ 2566 2567 /* ARGSUSED */ 2568 static nfsstat4 2569 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req, 2570 struct compound_state *cs) 2571 { 2572 int error; 2573 int different_export = 0; 2574 vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL; 2575 struct exportinfo *exi = NULL, *pre_exi = NULL; 2576 nfsstat4 stat; 2577 fid_t fid; 2578 int attrdir, dotdot, walk; 2579 bool_t is_newvp = FALSE; 2580 2581 if (cs->vp->v_flag & V_XATTRDIR) { 2582 attrdir = 1; 2583 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2584 } else { 2585 attrdir = 0; 2586 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2587 } 2588 2589 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2590 2591 /* 2592 * If dotdotting, then need to check whether it's 2593 * above the root of a filesystem, or above an 2594 * export point. 2595 */ 2596 if (dotdot) { 2597 2598 /* 2599 * If dotdotting at the root of a filesystem, then 2600 * need to traverse back to the mounted-on filesystem 2601 * and do the dotdot lookup there. 2602 */ 2603 if (cs->vp->v_flag & VROOT) { 2604 2605 /* 2606 * If at the system root, then can 2607 * go up no further. 2608 */ 2609 if (VN_CMP(cs->vp, rootdir)) 2610 return (puterrno4(ENOENT)); 2611 2612 /* 2613 * Traverse back to the mounted-on filesystem 2614 */ 2615 cs->vp = untraverse(cs->vp); 2616 2617 /* 2618 * Set the different_export flag so we remember 2619 * to pick up a new exportinfo entry for 2620 * this new filesystem. 2621 */ 2622 different_export = 1; 2623 } else { 2624 2625 /* 2626 * If dotdotting above an export point then set 2627 * the different_export to get new export info. 2628 */ 2629 different_export = nfs_exported(cs->exi, cs->vp); 2630 } 2631 } 2632 2633 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2634 NULL, NULL, NULL); 2635 if (error) 2636 return (puterrno4(error)); 2637 2638 /* 2639 * If the vnode is in a pseudo filesystem, check whether it is visible. 2640 * 2641 * XXX if the vnode is a symlink and it is not visible in 2642 * a pseudo filesystem, return ENOENT (not following symlink). 2643 * V4 client can not mount such symlink. This is a regression 2644 * from V2/V3. 2645 * 2646 * In the same exported filesystem, if the security flavor used 2647 * is not an explicitly shared flavor, limit the view to the visible 2648 * list entries only. This is not a WRONGSEC case because it's already 2649 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 2650 */ 2651 if (!different_export && 2652 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 2653 cs->access & CS_ACCESS_LIMITED)) { 2654 if (! nfs_visible(cs->exi, vp, &different_export)) { 2655 VN_RELE(vp); 2656 return (puterrno4(ENOENT)); 2657 } 2658 } 2659 2660 /* 2661 * If it's a mountpoint, then traverse it. 2662 */ 2663 if (vn_ismntpt(vp)) { 2664 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 2665 pre_tvp = vp; /* save pre-traversed vnode */ 2666 2667 /* 2668 * hold pre_tvp to counteract rele by traverse. We will 2669 * need pre_tvp below if checkexport4 fails 2670 */ 2671 VN_HOLD(pre_tvp); 2672 tvp = vp; 2673 if ((error = traverse(&tvp)) != 0) { 2674 VN_RELE(vp); 2675 VN_RELE(pre_tvp); 2676 return (puterrno4(error)); 2677 } 2678 vp = tvp; 2679 different_export = 1; 2680 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 2681 /* 2682 * The vfsp comparison is to handle the case where 2683 * a LOFS mount is shared. lo_lookup traverses mount points, 2684 * and NFS is unaware of local fs transistions because 2685 * v_vfsmountedhere isn't set. For this special LOFS case, 2686 * the dir and the obj returned by lookup will have different 2687 * vfs ptrs. 2688 */ 2689 different_export = 1; 2690 } 2691 2692 if (different_export) { 2693 2694 bzero(&fid, sizeof (fid)); 2695 fid.fid_len = MAXFIDSZ; 2696 error = vop_fid_pseudo(vp, &fid); 2697 if (error) { 2698 VN_RELE(vp); 2699 if (pre_tvp) 2700 VN_RELE(pre_tvp); 2701 return (puterrno4(error)); 2702 } 2703 2704 if (dotdot) 2705 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 2706 else 2707 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 2708 2709 if (exi == NULL) { 2710 if (pre_tvp) { 2711 /* 2712 * If this vnode is a mounted-on vnode, 2713 * but the mounted-on file system is not 2714 * exported, send back the filehandle for 2715 * the mounted-on vnode, not the root of 2716 * the mounted-on file system. 2717 */ 2718 VN_RELE(vp); 2719 vp = pre_tvp; 2720 exi = pre_exi; 2721 } else { 2722 VN_RELE(vp); 2723 return (puterrno4(EACCES)); 2724 } 2725 } else if (pre_tvp) { 2726 /* we're done with pre_tvp now. release extra hold */ 2727 VN_RELE(pre_tvp); 2728 } 2729 2730 cs->exi = exi; 2731 2732 /* 2733 * Now we do a checkauth4. The reason is that 2734 * this client/user may not have access to the new 2735 * exported file system, and if he does, 2736 * the client/user may be mapped to a different uid. 2737 * 2738 * We start with a new cr, because the checkauth4 done 2739 * in the PUT*FH operation over wrote the cred's uid, 2740 * gid, etc, and we want the real thing before calling 2741 * checkauth4() 2742 */ 2743 crfree(cs->cr); 2744 cs->cr = crdup(cs->basecr); 2745 2746 if (cs->vp) 2747 oldvp = cs->vp; 2748 cs->vp = vp; 2749 is_newvp = TRUE; 2750 2751 stat = call_checkauth4(cs, req); 2752 if (stat != NFS4_OK) { 2753 VN_RELE(cs->vp); 2754 cs->vp = oldvp; 2755 return (stat); 2756 } 2757 } 2758 2759 /* 2760 * After various NFS checks, do a label check on the path 2761 * component. The label on this path should either be the 2762 * global zone's label or a zone's label. We are only 2763 * interested in the zone's label because exported files 2764 * in global zone is accessible (though read-only) to 2765 * clients. The exportability/visibility check is already 2766 * done before reaching this code. 2767 */ 2768 if (is_system_labeled()) { 2769 bslabel_t *clabel; 2770 2771 ASSERT(req->rq_label != NULL); 2772 clabel = req->rq_label; 2773 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 2774 "got client label from request(1)", struct svc_req *, req); 2775 2776 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2777 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) { 2778 error = EACCES; 2779 goto err_out; 2780 } 2781 } else { 2782 /* 2783 * We grant access to admin_low label clients 2784 * only if the client is trusted, i.e. also 2785 * running Solaris Trusted Extension. 2786 */ 2787 struct sockaddr *ca; 2788 int addr_type; 2789 void *ipaddr; 2790 tsol_tpc_t *tp; 2791 2792 ca = (struct sockaddr *)svc_getrpccaller( 2793 req->rq_xprt)->buf; 2794 if (ca->sa_family == AF_INET) { 2795 addr_type = IPV4_VERSION; 2796 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 2797 } else if (ca->sa_family == AF_INET6) { 2798 addr_type = IPV6_VERSION; 2799 ipaddr = &((struct sockaddr_in6 *) 2800 ca)->sin6_addr; 2801 } 2802 tp = find_tpc(ipaddr, addr_type, B_FALSE); 2803 if (tp == NULL || tp->tpc_tp.tp_doi != 2804 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 2805 SUN_CIPSO) { 2806 if (tp != NULL) 2807 TPC_RELE(tp); 2808 error = EACCES; 2809 goto err_out; 2810 } 2811 TPC_RELE(tp); 2812 } 2813 } 2814 2815 error = makefh4(&cs->fh, vp, cs->exi); 2816 2817 err_out: 2818 if (error) { 2819 if (is_newvp) { 2820 VN_RELE(cs->vp); 2821 cs->vp = oldvp; 2822 } else 2823 VN_RELE(vp); 2824 return (puterrno4(error)); 2825 } 2826 2827 if (!is_newvp) { 2828 if (cs->vp) 2829 VN_RELE(cs->vp); 2830 cs->vp = vp; 2831 } else if (oldvp) 2832 VN_RELE(oldvp); 2833 2834 /* 2835 * if did lookup on attrdir and didn't lookup .., set named 2836 * attr fh flag 2837 */ 2838 if (attrdir && ! dotdot) 2839 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 2840 2841 /* Assume false for now, open proc will set this */ 2842 cs->mandlock = FALSE; 2843 2844 return (NFS4_OK); 2845 } 2846 2847 /* ARGSUSED */ 2848 static void 2849 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2850 struct compound_state *cs) 2851 { 2852 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 2853 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 2854 char *nm; 2855 uint_t len; 2856 struct sockaddr *ca; 2857 char *name = NULL; 2858 2859 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 2860 LOOKUP4args *, args); 2861 2862 if (cs->vp == NULL) { 2863 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2864 goto out; 2865 } 2866 2867 if (cs->vp->v_type == VLNK) { 2868 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 2869 goto out; 2870 } 2871 2872 if (cs->vp->v_type != VDIR) { 2873 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2874 goto out; 2875 } 2876 2877 if (!utf8_dir_verify(&args->objname)) { 2878 *cs->statusp = resp->status = NFS4ERR_INVAL; 2879 goto out; 2880 } 2881 2882 nm = utf8_to_str(&args->objname, &len, NULL); 2883 if (nm == NULL) { 2884 *cs->statusp = resp->status = NFS4ERR_INVAL; 2885 goto out; 2886 } 2887 2888 if (len > MAXNAMELEN) { 2889 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2890 kmem_free(nm, len); 2891 goto out; 2892 } 2893 2894 /* If necessary, convert to UTF-8 for illbehaved clients */ 2895 2896 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2897 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 2898 MAXPATHLEN + 1); 2899 2900 if (name == NULL) { 2901 *cs->statusp = resp->status = NFS4ERR_INVAL; 2902 kmem_free(nm, len); 2903 goto out; 2904 } 2905 2906 *cs->statusp = resp->status = do_rfs4_op_lookup(name, len, req, cs); 2907 2908 if (name != nm) 2909 kmem_free(name, MAXPATHLEN + 1); 2910 kmem_free(nm, len); 2911 2912 out: 2913 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 2914 LOOKUP4res *, resp); 2915 } 2916 2917 /* ARGSUSED */ 2918 static void 2919 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 2920 struct compound_state *cs) 2921 { 2922 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 2923 2924 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 2925 2926 if (cs->vp == NULL) { 2927 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2928 goto out; 2929 } 2930 2931 if (cs->vp->v_type != VDIR) { 2932 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2933 goto out; 2934 } 2935 2936 *cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs); 2937 2938 /* 2939 * From NFSV4 Specification, LOOKUPP should not check for 2940 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 2941 */ 2942 if (resp->status == NFS4ERR_WRONGSEC) { 2943 *cs->statusp = resp->status = NFS4_OK; 2944 } 2945 2946 out: 2947 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 2948 LOOKUPP4res *, resp); 2949 } 2950 2951 2952 /*ARGSUSED2*/ 2953 static void 2954 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2955 struct compound_state *cs) 2956 { 2957 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 2958 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 2959 vnode_t *avp = NULL; 2960 int lookup_flags = LOOKUP_XATTR, error; 2961 int exp_ro = 0; 2962 2963 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 2964 OPENATTR4args *, args); 2965 2966 if (cs->vp == NULL) { 2967 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2968 goto out; 2969 } 2970 2971 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 && 2972 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) { 2973 *cs->statusp = resp->status = puterrno4(ENOTSUP); 2974 goto out; 2975 } 2976 2977 /* 2978 * If file system supports passing ACE mask to VOP_ACCESS then 2979 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks 2980 */ 2981 2982 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS)) 2983 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS, 2984 V_ACE_MASK, cs->cr, NULL); 2985 else 2986 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) && 2987 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) && 2988 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0)); 2989 2990 if (error) { 2991 *cs->statusp = resp->status = puterrno4(EACCES); 2992 goto out; 2993 } 2994 2995 /* 2996 * The CREATE_XATTR_DIR VOP flag cannot be specified if 2997 * the file system is exported read-only -- regardless of 2998 * createdir flag. Otherwise the attrdir would be created 2999 * (assuming server fs isn't mounted readonly locally). If 3000 * VOP_LOOKUP returns ENOENT in this case, the error will 3001 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 3002 * because specfs has no VOP_LOOKUP op, so the macro would 3003 * return ENOSYS. EINVAL is returned by all (current) 3004 * Solaris file system implementations when any of their 3005 * restrictions are violated (xattr(dir) can't have xattrdir). 3006 * Returning NOTSUPP is more appropriate in this case 3007 * because the object will never be able to have an attrdir. 3008 */ 3009 if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req))) 3010 lookup_flags |= CREATE_XATTR_DIR; 3011 3012 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr, 3013 NULL, NULL, NULL); 3014 3015 if (error) { 3016 if (error == ENOENT && args->createdir && exp_ro) 3017 *cs->statusp = resp->status = puterrno4(EROFS); 3018 else if (error == EINVAL || error == ENOSYS) 3019 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3020 else 3021 *cs->statusp = resp->status = puterrno4(error); 3022 goto out; 3023 } 3024 3025 ASSERT(avp->v_flag & V_XATTRDIR); 3026 3027 error = makefh4(&cs->fh, avp, cs->exi); 3028 3029 if (error) { 3030 VN_RELE(avp); 3031 *cs->statusp = resp->status = puterrno4(error); 3032 goto out; 3033 } 3034 3035 VN_RELE(cs->vp); 3036 cs->vp = avp; 3037 3038 /* 3039 * There is no requirement for an attrdir fh flag 3040 * because the attrdir has a vnode flag to distinguish 3041 * it from regular (non-xattr) directories. The 3042 * FH4_ATTRDIR flag is set for future sanity checks. 3043 */ 3044 set_fh4_flag(&cs->fh, FH4_ATTRDIR); 3045 *cs->statusp = resp->status = NFS4_OK; 3046 3047 out: 3048 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 3049 OPENATTR4res *, resp); 3050 } 3051 3052 static int 3053 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 3054 caller_context_t *ct) 3055 { 3056 int error; 3057 int i; 3058 clock_t delaytime; 3059 3060 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 3061 3062 /* 3063 * Don't block on mandatory locks. If this routine returns 3064 * EAGAIN, the caller should return NFS4ERR_LOCKED. 3065 */ 3066 uio->uio_fmode = FNONBLOCK; 3067 3068 for (i = 0; i < rfs4_maxlock_tries; i++) { 3069 3070 3071 if (direction == FREAD) { 3072 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 3073 error = VOP_READ(vp, uio, ioflag, cred, ct); 3074 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 3075 } else { 3076 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 3077 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 3078 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 3079 } 3080 3081 if (error != EAGAIN) 3082 break; 3083 3084 if (i < rfs4_maxlock_tries - 1) { 3085 delay(delaytime); 3086 delaytime *= 2; 3087 } 3088 } 3089 3090 return (error); 3091 } 3092 3093 /* ARGSUSED */ 3094 static void 3095 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3096 struct compound_state *cs) 3097 { 3098 READ4args *args = &argop->nfs_argop4_u.opread; 3099 READ4res *resp = &resop->nfs_resop4_u.opread; 3100 int error; 3101 int verror; 3102 vnode_t *vp; 3103 struct vattr va; 3104 struct iovec iov; 3105 struct uio uio; 3106 u_offset_t offset; 3107 bool_t *deleg = &cs->deleg; 3108 nfsstat4 stat; 3109 int in_crit = 0; 3110 mblk_t *mp; 3111 int alloc_err = 0; 3112 caller_context_t ct; 3113 3114 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 3115 READ4args, args); 3116 3117 vp = cs->vp; 3118 if (vp == NULL) { 3119 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3120 goto out; 3121 } 3122 if (cs->access == CS_ACCESS_DENIED) { 3123 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3124 goto out; 3125 } 3126 3127 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE, 3128 deleg, TRUE, &ct)) != NFS4_OK) { 3129 *cs->statusp = resp->status = stat; 3130 goto out; 3131 } 3132 3133 /* 3134 * Enter the critical region before calling VOP_RWLOCK 3135 * to avoid a deadlock with write requests. 3136 */ 3137 if (nbl_need_check(vp)) { 3138 nbl_start_crit(vp, RW_READER); 3139 in_crit = 1; 3140 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0, 3141 &ct)) { 3142 *cs->statusp = resp->status = NFS4ERR_LOCKED; 3143 goto out; 3144 } 3145 } 3146 3147 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE, 3148 deleg, TRUE, &ct)) != NFS4_OK) { 3149 *cs->statusp = resp->status = stat; 3150 goto out; 3151 } 3152 3153 va.va_mask = AT_MODE|AT_SIZE|AT_UID; 3154 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3155 3156 /* 3157 * If we can't get the attributes, then we can't do the 3158 * right access checking. So, we'll fail the request. 3159 */ 3160 if (verror) { 3161 *cs->statusp = resp->status = puterrno4(verror); 3162 goto out; 3163 } 3164 3165 if (vp->v_type != VREG) { 3166 *cs->statusp = resp->status = 3167 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 3168 goto out; 3169 } 3170 3171 if (crgetuid(cs->cr) != va.va_uid && 3172 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) && 3173 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) { 3174 *cs->statusp = resp->status = puterrno4(error); 3175 goto out; 3176 } 3177 3178 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */ 3179 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3180 goto out; 3181 } 3182 3183 offset = args->offset; 3184 if (offset >= va.va_size) { 3185 *cs->statusp = resp->status = NFS4_OK; 3186 resp->eof = TRUE; 3187 resp->data_len = 0; 3188 resp->data_val = NULL; 3189 resp->mblk = NULL; 3190 /* RDMA */ 3191 resp->wlist = args->wlist; 3192 resp->wlist_len = resp->data_len; 3193 *cs->statusp = resp->status = NFS4_OK; 3194 goto out; 3195 } 3196 3197 if (args->count == 0) { 3198 *cs->statusp = resp->status = NFS4_OK; 3199 resp->eof = FALSE; 3200 resp->data_len = 0; 3201 resp->data_val = NULL; 3202 resp->mblk = NULL; 3203 /* RDMA */ 3204 resp->wlist = args->wlist; 3205 resp->wlist_len = resp->data_len; 3206 goto out; 3207 } 3208 3209 /* 3210 * Do not allocate memory more than maximum allowed 3211 * transfer size 3212 */ 3213 if (args->count > rfs4_tsize(req)) 3214 args->count = rfs4_tsize(req); 3215 3216 /* 3217 * If returning data via RDMA Write, then grab the chunk list. If we 3218 * aren't returning READ data w/RDMA_WRITE, then grab a mblk. 3219 */ 3220 if (args->wlist) { 3221 mp = NULL; 3222 (void) rdma_get_wchunk(req, &iov, args->wlist); 3223 } else { 3224 /* 3225 * mp will contain the data to be sent out in the read reply. 3226 * It will be freed after the reply has been sent. Let's 3227 * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that 3228 * the call to xdrmblk_putmblk() never fails. If the first 3229 * alloc of the requested size fails, then decrease the size to 3230 * something more reasonable and wait for the allocation to 3231 * occur. 3232 */ 3233 mp = allocb(RNDUP(args->count), BPRI_MED); 3234 if (mp == NULL) { 3235 if (args->count > MAXBSIZE) 3236 args->count = MAXBSIZE; 3237 mp = allocb_wait(RNDUP(args->count), BPRI_MED, 3238 STR_NOSIG, &alloc_err); 3239 } 3240 ASSERT(mp != NULL); 3241 ASSERT(alloc_err == 0); 3242 3243 iov.iov_base = (caddr_t)mp->b_datap->db_base; 3244 iov.iov_len = args->count; 3245 } 3246 3247 uio.uio_iov = &iov; 3248 uio.uio_iovcnt = 1; 3249 uio.uio_segflg = UIO_SYSSPACE; 3250 uio.uio_extflg = UIO_COPY_CACHED; 3251 uio.uio_loffset = args->offset; 3252 uio.uio_resid = args->count; 3253 3254 error = do_io(FREAD, vp, &uio, 0, cs->cr, &ct); 3255 3256 va.va_mask = AT_SIZE; 3257 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3258 3259 if (error) { 3260 freeb(mp); 3261 *cs->statusp = resp->status = puterrno4(error); 3262 goto out; 3263 } 3264 3265 *cs->statusp = resp->status = NFS4_OK; 3266 3267 ASSERT(uio.uio_resid >= 0); 3268 resp->data_len = args->count - uio.uio_resid; 3269 if (mp) { 3270 resp->data_val = (char *)mp->b_datap->db_base; 3271 } else { 3272 resp->data_val = (caddr_t)iov.iov_base; 3273 } 3274 resp->mblk = mp; 3275 3276 if (!verror && offset + resp->data_len == va.va_size) 3277 resp->eof = TRUE; 3278 else 3279 resp->eof = FALSE; 3280 3281 if (args->wlist) { 3282 if (!rdma_setup_read_data4(args, resp)) { 3283 *cs->statusp = resp->status = NFS4ERR_INVAL; 3284 } 3285 } else { 3286 resp->wlist = NULL; 3287 } 3288 3289 out: 3290 if (in_crit) 3291 nbl_end_crit(vp); 3292 3293 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 3294 READ4res *, resp); 3295 } 3296 3297 static void 3298 rfs4_op_read_free(nfs_resop4 *resop) 3299 { 3300 READ4res *resp = &resop->nfs_resop4_u.opread; 3301 3302 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3303 freeb(resp->mblk); 3304 resp->mblk = NULL; 3305 resp->data_val = NULL; 3306 resp->data_len = 0; 3307 } 3308 } 3309 3310 static void 3311 rfs4_op_readdir_free(nfs_resop4 * resop) 3312 { 3313 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir; 3314 3315 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3316 freeb(resp->mblk); 3317 resp->mblk = NULL; 3318 resp->data_len = 0; 3319 } 3320 } 3321 3322 3323 /* ARGSUSED */ 3324 static void 3325 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3326 struct compound_state *cs) 3327 { 3328 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 3329 int error; 3330 vnode_t *vp; 3331 struct exportinfo *exi, *sav_exi; 3332 nfs_fh4_fmt_t *fh_fmtp; 3333 3334 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 3335 3336 if (cs->vp) { 3337 VN_RELE(cs->vp); 3338 cs->vp = NULL; 3339 } 3340 3341 if (cs->cr) 3342 crfree(cs->cr); 3343 3344 cs->cr = crdup(cs->basecr); 3345 3346 vp = exi_public->exi_vp; 3347 if (vp == NULL) { 3348 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3349 goto out; 3350 } 3351 3352 if (is_system_labeled()) { 3353 bslabel_t *clabel; 3354 3355 ASSERT(req->rq_label != NULL); 3356 clabel = req->rq_label; 3357 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *, 3358 "got client label from request(1)", 3359 struct svc_req *, req); 3360 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3361 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) { 3362 *cs->statusp = resp->status = 3363 NFS4ERR_SERVERFAULT; 3364 return; 3365 } 3366 } 3367 } 3368 3369 error = makefh4(&cs->fh, vp, exi_public); 3370 if (error != 0) { 3371 *cs->statusp = resp->status = puterrno4(error); 3372 goto out; 3373 } 3374 sav_exi = cs->exi; 3375 if (exi_public == exi_root) { 3376 /* 3377 * No filesystem is actually shared public, so we default 3378 * to exi_root. In this case, we must check whether root 3379 * is exported. 3380 */ 3381 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val; 3382 3383 /* 3384 * if root filesystem is exported, the exportinfo struct that we 3385 * should use is what checkexport4 returns, because root_exi is 3386 * actually a mostly empty struct. 3387 */ 3388 exi = checkexport4(&fh_fmtp->fh4_fsid, 3389 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 3390 cs->exi = ((exi != NULL) ? exi : exi_public); 3391 } else { 3392 /* 3393 * it's a properly shared filesystem 3394 */ 3395 cs->exi = exi_public; 3396 } 3397 3398 VN_HOLD(vp); 3399 cs->vp = vp; 3400 3401 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3402 VN_RELE(cs->vp); 3403 cs->vp = NULL; 3404 cs->exi = sav_exi; 3405 goto out; 3406 } 3407 3408 *cs->statusp = resp->status = NFS4_OK; 3409 out: 3410 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 3411 PUTPUBFH4res *, resp); 3412 } 3413 3414 /* 3415 * XXX - issue with put*fh operations. Suppose /export/home is exported. 3416 * Suppose an NFS client goes to mount /export/home/joe. If /export, home, 3417 * or joe have restrictive search permissions, then we shouldn't let 3418 * the client get a file handle. This is easy to enforce. However, we 3419 * don't know what security flavor should be used until we resolve the 3420 * path name. Another complication is uid mapping. If root is 3421 * the user, then it will be mapped to the anonymous user by default, 3422 * but we won't know that till we've resolved the path name. And we won't 3423 * know what the anonymous user is. 3424 * Luckily, SECINFO is specified to take a full filename. 3425 * So what we will have to in rfs4_op_lookup is check that flavor of 3426 * the target object matches that of the request, and if root was the 3427 * caller, check for the root= and anon= options, and if necessary, 3428 * repeat the lookup using the right cred_t. But that's not done yet. 3429 */ 3430 /* ARGSUSED */ 3431 static void 3432 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3433 struct compound_state *cs) 3434 { 3435 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 3436 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 3437 nfs_fh4_fmt_t *fh_fmtp; 3438 3439 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 3440 PUTFH4args *, args); 3441 3442 if (cs->vp) { 3443 VN_RELE(cs->vp); 3444 cs->vp = NULL; 3445 } 3446 3447 if (cs->cr) { 3448 crfree(cs->cr); 3449 cs->cr = NULL; 3450 } 3451 3452 3453 if (args->object.nfs_fh4_len < NFS_FH4_LEN) { 3454 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 3455 goto out; 3456 } 3457 3458 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val; 3459 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, 3460 NULL); 3461 3462 if (cs->exi == NULL) { 3463 *cs->statusp = resp->status = NFS4ERR_STALE; 3464 goto out; 3465 } 3466 3467 cs->cr = crdup(cs->basecr); 3468 3469 ASSERT(cs->cr != NULL); 3470 3471 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) { 3472 *cs->statusp = resp->status; 3473 goto out; 3474 } 3475 3476 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3477 VN_RELE(cs->vp); 3478 cs->vp = NULL; 3479 goto out; 3480 } 3481 3482 nfs_fh4_copy(&args->object, &cs->fh); 3483 *cs->statusp = resp->status = NFS4_OK; 3484 cs->deleg = FALSE; 3485 3486 out: 3487 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 3488 PUTFH4res *, resp); 3489 } 3490 3491 /* ARGSUSED */ 3492 static void 3493 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3494 struct compound_state *cs) 3495 3496 { 3497 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 3498 int error; 3499 fid_t fid; 3500 struct exportinfo *exi, *sav_exi; 3501 3502 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 3503 3504 if (cs->vp) { 3505 VN_RELE(cs->vp); 3506 cs->vp = NULL; 3507 } 3508 3509 if (cs->cr) 3510 crfree(cs->cr); 3511 3512 cs->cr = crdup(cs->basecr); 3513 3514 /* 3515 * Using rootdir, the system root vnode, 3516 * get its fid. 3517 */ 3518 bzero(&fid, sizeof (fid)); 3519 fid.fid_len = MAXFIDSZ; 3520 error = vop_fid_pseudo(rootdir, &fid); 3521 if (error != 0) { 3522 *cs->statusp = resp->status = puterrno4(error); 3523 goto out; 3524 } 3525 3526 /* 3527 * Then use the root fsid & fid it to find out if it's exported 3528 * 3529 * If the server root isn't exported directly, then 3530 * it should at least be a pseudo export based on 3531 * one or more exports further down in the server's 3532 * file tree. 3533 */ 3534 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL); 3535 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 3536 NFS4_DEBUG(rfs4_debug, 3537 (CE_WARN, "rfs4_op_putrootfh: export check failure")); 3538 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3539 goto out; 3540 } 3541 3542 /* 3543 * Now make a filehandle based on the root 3544 * export and root vnode. 3545 */ 3546 error = makefh4(&cs->fh, rootdir, exi); 3547 if (error != 0) { 3548 *cs->statusp = resp->status = puterrno4(error); 3549 goto out; 3550 } 3551 3552 sav_exi = cs->exi; 3553 cs->exi = exi; 3554 3555 VN_HOLD(rootdir); 3556 cs->vp = rootdir; 3557 3558 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3559 VN_RELE(rootdir); 3560 cs->vp = NULL; 3561 cs->exi = sav_exi; 3562 goto out; 3563 } 3564 3565 *cs->statusp = resp->status = NFS4_OK; 3566 cs->deleg = FALSE; 3567 out: 3568 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 3569 PUTROOTFH4res *, resp); 3570 } 3571 3572 /* 3573 * A directory entry is a valid nfsv4 entry if 3574 * - it has a non-zero ino 3575 * - it is not a dot or dotdot name 3576 * - it is visible in a pseudo export or in a real export that can 3577 * only have a limited view. 3578 */ 3579 static bool_t 3580 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp, 3581 int *expseudo, int check_visible) 3582 { 3583 if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) { 3584 *expseudo = 0; 3585 return (FALSE); 3586 } 3587 3588 if (! check_visible) { 3589 *expseudo = 0; 3590 return (TRUE); 3591 } 3592 3593 return (nfs_visible_inode(exi, dp->d_ino, expseudo)); 3594 } 3595 3596 /* 3597 * set_rdattr_params sets up the variables used to manage what information 3598 * to get for each directory entry. 3599 */ 3600 static nfsstat4 3601 set_rdattr_params(struct nfs4_svgetit_arg *sargp, 3602 bitmap4 attrs, bool_t *need_to_lookup) 3603 { 3604 uint_t va_mask; 3605 nfsstat4 status; 3606 bitmap4 objbits; 3607 3608 status = bitmap4_to_attrmask(attrs, sargp); 3609 if (status != NFS4_OK) { 3610 /* 3611 * could not even figure attr mask 3612 */ 3613 return (status); 3614 } 3615 va_mask = sargp->vap->va_mask; 3616 3617 /* 3618 * dirent's d_ino is always correct value for mounted_on_fileid. 3619 * mntdfid_set is set once here, but mounted_on_fileid is 3620 * set in main dirent processing loop for each dirent. 3621 * The mntdfid_set is a simple optimization that lets the 3622 * server attr code avoid work when caller is readdir. 3623 */ 3624 sargp->mntdfid_set = TRUE; 3625 3626 /* 3627 * Lookup entry only if client asked for any of the following: 3628 * a) vattr attrs 3629 * b) vfs attrs 3630 * c) attrs w/per-object scope requested (change, filehandle, etc) 3631 * other than mounted_on_fileid (which we can take from dirent) 3632 */ 3633 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0; 3634 3635 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK)) 3636 *need_to_lookup = TRUE; 3637 else 3638 *need_to_lookup = FALSE; 3639 3640 if (sargp->sbp == NULL) 3641 return (NFS4_OK); 3642 3643 /* 3644 * If filesystem attrs are requested, get them now from the 3645 * directory vp, as most entries will have same filesystem. The only 3646 * exception are mounted over entries but we handle 3647 * those as we go (XXX mounted over detection not yet implemented). 3648 */ 3649 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */ 3650 status = bitmap4_get_sysattrs(sargp); 3651 sargp->vap->va_mask = va_mask; 3652 3653 if ((status != NFS4_OK) && sargp->rdattr_error_req) { 3654 /* 3655 * Failed to get filesystem attributes. 3656 * Return a rdattr_error for each entry, but don't fail. 3657 * However, don't get any obj-dependent attrs. 3658 */ 3659 sargp->rdattr_error = status; /* for rdattr_error */ 3660 *need_to_lookup = FALSE; 3661 /* 3662 * At least get fileid for regular readdir output 3663 */ 3664 sargp->vap->va_mask &= AT_NODEID; 3665 status = NFS4_OK; 3666 } 3667 3668 return (status); 3669 } 3670 3671 /* 3672 * readlink: args: CURRENT_FH. 3673 * res: status. If success - CURRENT_FH unchanged, return linktext. 3674 */ 3675 3676 /* ARGSUSED */ 3677 static void 3678 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3679 struct compound_state *cs) 3680 { 3681 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3682 int error; 3683 vnode_t *vp; 3684 struct iovec iov; 3685 struct vattr va; 3686 struct uio uio; 3687 char *data; 3688 struct sockaddr *ca; 3689 char *name = NULL; 3690 3691 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 3692 3693 /* CURRENT_FH: directory */ 3694 vp = cs->vp; 3695 if (vp == NULL) { 3696 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3697 goto out; 3698 } 3699 3700 if (cs->access == CS_ACCESS_DENIED) { 3701 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3702 goto out; 3703 } 3704 3705 if (vp->v_type == VDIR) { 3706 *cs->statusp = resp->status = NFS4ERR_ISDIR; 3707 goto out; 3708 } 3709 3710 if (vp->v_type != VLNK) { 3711 *cs->statusp = resp->status = NFS4ERR_INVAL; 3712 goto out; 3713 } 3714 3715 va.va_mask = AT_MODE; 3716 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 3717 if (error) { 3718 *cs->statusp = resp->status = puterrno4(error); 3719 goto out; 3720 } 3721 3722 if (MANDLOCK(vp, va.va_mode)) { 3723 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3724 goto out; 3725 } 3726 3727 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 3728 3729 iov.iov_base = data; 3730 iov.iov_len = MAXPATHLEN; 3731 uio.uio_iov = &iov; 3732 uio.uio_iovcnt = 1; 3733 uio.uio_segflg = UIO_SYSSPACE; 3734 uio.uio_extflg = UIO_COPY_CACHED; 3735 uio.uio_loffset = 0; 3736 uio.uio_resid = MAXPATHLEN; 3737 3738 error = VOP_READLINK(vp, &uio, cs->cr, NULL); 3739 3740 if (error) { 3741 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3742 *cs->statusp = resp->status = puterrno4(error); 3743 goto out; 3744 } 3745 3746 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 3747 3748 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3749 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND, 3750 MAXPATHLEN + 1); 3751 3752 if (name == NULL) { 3753 /* 3754 * Even though the conversion failed, we return 3755 * something. We just don't translate it. 3756 */ 3757 name = data; 3758 } 3759 3760 /* 3761 * treat link name as data 3762 */ 3763 (void) str_to_utf8(name, &resp->link); 3764 3765 if (name != data) 3766 kmem_free(name, MAXPATHLEN + 1); 3767 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3768 *cs->statusp = resp->status = NFS4_OK; 3769 3770 out: 3771 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 3772 READLINK4res *, resp); 3773 } 3774 3775 static void 3776 rfs4_op_readlink_free(nfs_resop4 *resop) 3777 { 3778 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3779 utf8string *symlink = &resp->link; 3780 3781 if (symlink->utf8string_val) { 3782 UTF8STRING_FREE(*symlink) 3783 } 3784 } 3785 3786 /* 3787 * release_lockowner: 3788 * Release any state associated with the supplied 3789 * lockowner. Note if any lo_state is holding locks we will not 3790 * rele that lo_state and thus the lockowner will not be destroyed. 3791 * A client using lock after the lock owner stateid has been released 3792 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have 3793 * to reissue the lock with new_lock_owner set to TRUE. 3794 * args: lock_owner 3795 * res: status 3796 */ 3797 /* ARGSUSED */ 3798 static void 3799 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop, 3800 struct svc_req *req, struct compound_state *cs) 3801 { 3802 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner; 3803 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner; 3804 rfs4_lockowner_t *lo; 3805 rfs4_openowner_t *oop; 3806 rfs4_state_t *sp; 3807 rfs4_lo_state_t *lsp; 3808 rfs4_client_t *cp; 3809 bool_t create = FALSE; 3810 locklist_t *llist; 3811 sysid_t sysid; 3812 3813 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *, 3814 cs, RELEASE_LOCKOWNER4args *, ap); 3815 3816 /* Make sure there is a clientid around for this request */ 3817 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE); 3818 3819 if (cp == NULL) { 3820 *cs->statusp = resp->status = 3821 rfs4_check_clientid(&ap->lock_owner.clientid, 0); 3822 goto out; 3823 } 3824 rfs4_client_rele(cp); 3825 3826 lo = rfs4_findlockowner(&ap->lock_owner, &create); 3827 if (lo == NULL) { 3828 *cs->statusp = resp->status = NFS4_OK; 3829 goto out; 3830 } 3831 ASSERT(lo->client != NULL); 3832 3833 /* 3834 * Check for EXPIRED client. If so will reap state with in a lease 3835 * period or on next set_clientid_confirm step 3836 */ 3837 if (rfs4_lease_expired(lo->client)) { 3838 rfs4_lockowner_rele(lo); 3839 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 3840 goto out; 3841 } 3842 3843 /* 3844 * If no sysid has been assigned, then no locks exist; just return. 3845 */ 3846 rfs4_dbe_lock(lo->client->dbe); 3847 if (lo->client->sysidt == LM_NOSYSID) { 3848 rfs4_lockowner_rele(lo); 3849 rfs4_dbe_unlock(lo->client->dbe); 3850 goto out; 3851 } 3852 3853 sysid = lo->client->sysidt; 3854 rfs4_dbe_unlock(lo->client->dbe); 3855 3856 /* 3857 * Mark the lockowner invalid. 3858 */ 3859 rfs4_dbe_hide(lo->dbe); 3860 3861 /* 3862 * sysid-pid pair should now not be used since the lockowner is 3863 * invalid. If the client were to instantiate the lockowner again 3864 * it would be assigned a new pid. Thus we can get the list of 3865 * current locks. 3866 */ 3867 3868 llist = flk_get_active_locks(sysid, lo->pid); 3869 /* If we are still holding locks fail */ 3870 if (llist != NULL) { 3871 3872 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD; 3873 3874 flk_free_locklist(llist); 3875 /* 3876 * We need to unhide the lockowner so the client can 3877 * try it again. The bad thing here is if the client 3878 * has a logic error that took it here in the first place 3879 * he probably has lost accounting of the locks that it 3880 * is holding. So we may have dangling state until the 3881 * open owner state is reaped via close. One scenario 3882 * that could possibly occur is that the client has 3883 * sent the unlock request(s) in separate threads 3884 * and has not waited for the replies before sending the 3885 * RELEASE_LOCKOWNER request. Presumably, it would expect 3886 * and deal appropriately with NFS4ERR_LOCKS_HELD, by 3887 * reissuing the request. 3888 */ 3889 rfs4_dbe_unhide(lo->dbe); 3890 rfs4_lockowner_rele(lo); 3891 goto out; 3892 } 3893 3894 /* 3895 * For the corresponding client we need to check each open 3896 * owner for any opens that have lockowner state associated 3897 * with this lockowner. 3898 */ 3899 3900 rfs4_dbe_lock(lo->client->dbe); 3901 for (oop = lo->client->openownerlist.next->oop; oop != NULL; 3902 oop = oop->openownerlist.next->oop) { 3903 3904 rfs4_dbe_lock(oop->dbe); 3905 for (sp = oop->ownerstateids.next->sp; sp != NULL; 3906 sp = sp->ownerstateids.next->sp) { 3907 3908 rfs4_dbe_lock(sp->dbe); 3909 for (lsp = sp->lockownerlist.next->lsp; 3910 lsp != NULL; lsp = lsp->lockownerlist.next->lsp) { 3911 if (lsp->locker == lo) { 3912 rfs4_dbe_lock(lsp->dbe); 3913 rfs4_dbe_invalidate(lsp->dbe); 3914 rfs4_dbe_unlock(lsp->dbe); 3915 } 3916 } 3917 rfs4_dbe_unlock(sp->dbe); 3918 } 3919 rfs4_dbe_unlock(oop->dbe); 3920 } 3921 rfs4_dbe_unlock(lo->client->dbe); 3922 3923 rfs4_lockowner_rele(lo); 3924 3925 *cs->statusp = resp->status = NFS4_OK; 3926 3927 out: 3928 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *, 3929 cs, RELEASE_LOCKOWNER4res *, resp); 3930 } 3931 3932 /* 3933 * short utility function to lookup a file and recall the delegation 3934 */ 3935 static rfs4_file_t * 3936 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 3937 int *lkup_error, cred_t *cr) 3938 { 3939 vnode_t *vp; 3940 rfs4_file_t *fp = NULL; 3941 bool_t fcreate = FALSE; 3942 int error; 3943 3944 if (vpp) 3945 *vpp = NULL; 3946 3947 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL, 3948 NULL)) == 0) { 3949 if (vp->v_type == VREG) 3950 fp = rfs4_findfile(vp, NULL, &fcreate); 3951 if (vpp) 3952 *vpp = vp; 3953 else 3954 VN_RELE(vp); 3955 } 3956 3957 if (lkup_error) 3958 *lkup_error = error; 3959 3960 return (fp); 3961 } 3962 3963 /* 3964 * remove: args: CURRENT_FH: directory; name. 3965 * res: status. If success - CURRENT_FH unchanged, return change_info 3966 * for directory. 3967 */ 3968 /* ARGSUSED */ 3969 static void 3970 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3971 struct compound_state *cs) 3972 { 3973 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 3974 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 3975 int error; 3976 vnode_t *dvp, *vp; 3977 struct vattr bdva, idva, adva; 3978 char *nm; 3979 uint_t len; 3980 rfs4_file_t *fp; 3981 int in_crit = 0; 3982 bslabel_t *clabel; 3983 struct sockaddr *ca; 3984 char *name = NULL; 3985 3986 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 3987 REMOVE4args *, args); 3988 3989 /* CURRENT_FH: directory */ 3990 dvp = cs->vp; 3991 if (dvp == NULL) { 3992 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3993 goto out; 3994 } 3995 3996 if (cs->access == CS_ACCESS_DENIED) { 3997 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3998 goto out; 3999 } 4000 4001 /* 4002 * If there is an unshared filesystem mounted on this vnode, 4003 * Do not allow to remove anything in this directory. 4004 */ 4005 if (vn_ismntpt(dvp)) { 4006 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4007 goto out; 4008 } 4009 4010 if (dvp->v_type != VDIR) { 4011 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4012 goto out; 4013 } 4014 4015 if (!utf8_dir_verify(&args->target)) { 4016 *cs->statusp = resp->status = NFS4ERR_INVAL; 4017 goto out; 4018 } 4019 4020 /* 4021 * Lookup the file so that we can check if it's a directory 4022 */ 4023 nm = utf8_to_fn(&args->target, &len, NULL); 4024 if (nm == NULL) { 4025 *cs->statusp = resp->status = NFS4ERR_INVAL; 4026 goto out; 4027 } 4028 4029 if (len > MAXNAMELEN) { 4030 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4031 kmem_free(nm, len); 4032 goto out; 4033 } 4034 4035 if (rdonly4(cs->exi, cs->vp, req)) { 4036 *cs->statusp = resp->status = NFS4ERR_ROFS; 4037 kmem_free(nm, len); 4038 goto out; 4039 } 4040 4041 /* If necessary, convert to UTF-8 for illbehaved clients */ 4042 4043 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4044 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 4045 MAXPATHLEN + 1); 4046 4047 if (name == NULL) { 4048 *cs->statusp = resp->status = NFS4ERR_INVAL; 4049 kmem_free(nm, len); 4050 goto out; 4051 } 4052 4053 /* 4054 * Lookup the file to determine type and while we are see if 4055 * there is a file struct around and check for delegation. 4056 * We don't need to acquire va_seq before this lookup, if 4057 * it causes an update, cinfo.before will not match, which will 4058 * trigger a cache flush even if atomic is TRUE. 4059 */ 4060 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) { 4061 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4062 NULL)) { 4063 VN_RELE(vp); 4064 rfs4_file_rele(fp); 4065 *cs->statusp = resp->status = NFS4ERR_DELAY; 4066 if (nm != name) 4067 kmem_free(name, MAXPATHLEN + 1); 4068 kmem_free(nm, len); 4069 goto out; 4070 } 4071 } 4072 4073 /* Didn't find anything to remove */ 4074 if (vp == NULL) { 4075 *cs->statusp = resp->status = error; 4076 if (nm != name) 4077 kmem_free(name, MAXPATHLEN + 1); 4078 kmem_free(nm, len); 4079 goto out; 4080 } 4081 4082 if (nbl_need_check(vp)) { 4083 nbl_start_crit(vp, RW_READER); 4084 in_crit = 1; 4085 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { 4086 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4087 if (nm != name) 4088 kmem_free(name, MAXPATHLEN + 1); 4089 kmem_free(nm, len); 4090 nbl_end_crit(vp); 4091 VN_RELE(vp); 4092 if (fp) { 4093 rfs4_clear_dont_grant(fp); 4094 rfs4_file_rele(fp); 4095 } 4096 goto out; 4097 } 4098 } 4099 4100 /* check label before allowing removal */ 4101 if (is_system_labeled()) { 4102 ASSERT(req->rq_label != NULL); 4103 clabel = req->rq_label; 4104 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 4105 "got client label from request(1)", 4106 struct svc_req *, req); 4107 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4108 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) { 4109 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4110 if (name != nm) 4111 kmem_free(name, MAXPATHLEN + 1); 4112 kmem_free(nm, len); 4113 if (in_crit) 4114 nbl_end_crit(vp); 4115 VN_RELE(vp); 4116 if (fp) { 4117 rfs4_clear_dont_grant(fp); 4118 rfs4_file_rele(fp); 4119 } 4120 goto out; 4121 } 4122 } 4123 } 4124 4125 /* Get dir "before" change value */ 4126 bdva.va_mask = AT_CTIME|AT_SEQ; 4127 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 4128 if (error) { 4129 *cs->statusp = resp->status = puterrno4(error); 4130 if (nm != name) 4131 kmem_free(name, MAXPATHLEN + 1); 4132 kmem_free(nm, len); 4133 if (in_crit) 4134 nbl_end_crit(vp); 4135 VN_RELE(vp); 4136 if (fp) { 4137 rfs4_clear_dont_grant(fp); 4138 rfs4_file_rele(fp); 4139 } 4140 goto out; 4141 } 4142 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 4143 4144 /* Actually do the REMOVE operation */ 4145 if (vp->v_type == VDIR) { 4146 /* 4147 * Can't remove a directory that has a mounted-on filesystem. 4148 */ 4149 if (vn_ismntpt(vp)) { 4150 error = EACCES; 4151 } else { 4152 /* 4153 * System V defines rmdir to return EEXIST, 4154 * not * ENOTEMPTY, if the directory is not 4155 * empty. A System V NFS server needs to map 4156 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 4157 * transmit over the wire. 4158 */ 4159 if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr, 4160 NULL, 0)) == EEXIST) 4161 error = ENOTEMPTY; 4162 } 4163 } else { 4164 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 && 4165 fp != NULL) { 4166 struct vattr va; 4167 vnode_t *tvp; 4168 4169 rfs4_dbe_lock(fp->dbe); 4170 tvp = fp->vp; 4171 if (tvp) 4172 VN_HOLD(tvp); 4173 rfs4_dbe_unlock(fp->dbe); 4174 4175 if (tvp) { 4176 /* 4177 * This is va_seq safe because we are not 4178 * manipulating dvp. 4179 */ 4180 va.va_mask = AT_NLINK; 4181 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4182 va.va_nlink == 0) { 4183 /* Remove state on file remove */ 4184 if (in_crit) { 4185 nbl_end_crit(vp); 4186 in_crit = 0; 4187 } 4188 rfs4_close_all_state(fp); 4189 } 4190 VN_RELE(tvp); 4191 } 4192 } 4193 } 4194 4195 if (in_crit) 4196 nbl_end_crit(vp); 4197 VN_RELE(vp); 4198 4199 if (fp) { 4200 rfs4_clear_dont_grant(fp); 4201 rfs4_file_rele(fp); 4202 } 4203 if (nm != name) 4204 kmem_free(name, MAXPATHLEN + 1); 4205 kmem_free(nm, len); 4206 4207 if (error) { 4208 *cs->statusp = resp->status = puterrno4(error); 4209 goto out; 4210 } 4211 4212 /* 4213 * Get the initial "after" sequence number, if it fails, set to zero 4214 */ 4215 idva.va_mask = AT_SEQ; 4216 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 4217 idva.va_seq = 0; 4218 4219 /* 4220 * Force modified data and metadata out to stable storage. 4221 */ 4222 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 4223 4224 /* 4225 * Get "after" change value, if it fails, simply return the 4226 * before value. 4227 */ 4228 adva.va_mask = AT_CTIME|AT_SEQ; 4229 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 4230 adva.va_ctime = bdva.va_ctime; 4231 adva.va_seq = 0; 4232 } 4233 4234 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 4235 4236 /* 4237 * The cinfo.atomic = TRUE only if we have 4238 * non-zero va_seq's, and it has incremented by exactly one 4239 * during the VOP_REMOVE/RMDIR and it didn't change during 4240 * the VOP_FSYNC. 4241 */ 4242 if (bdva.va_seq && idva.va_seq && adva.va_seq && 4243 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 4244 resp->cinfo.atomic = TRUE; 4245 else 4246 resp->cinfo.atomic = FALSE; 4247 4248 *cs->statusp = resp->status = NFS4_OK; 4249 4250 out: 4251 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 4252 REMOVE4res *, resp); 4253 } 4254 4255 /* 4256 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 4257 * oldname and newname. 4258 * res: status. If success - CURRENT_FH unchanged, return change_info 4259 * for both from and target directories. 4260 */ 4261 /* ARGSUSED */ 4262 static void 4263 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4264 struct compound_state *cs) 4265 { 4266 RENAME4args *args = &argop->nfs_argop4_u.oprename; 4267 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 4268 int error; 4269 vnode_t *odvp; 4270 vnode_t *ndvp; 4271 vnode_t *srcvp, *targvp; 4272 struct vattr obdva, oidva, oadva; 4273 struct vattr nbdva, nidva, nadva; 4274 char *onm, *nnm; 4275 uint_t olen, nlen; 4276 rfs4_file_t *fp, *sfp; 4277 int in_crit_src, in_crit_targ; 4278 int fp_rele_grant_hold, sfp_rele_grant_hold; 4279 bslabel_t *clabel; 4280 struct sockaddr *ca; 4281 char *converted_onm = NULL; 4282 char *converted_nnm = NULL; 4283 4284 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 4285 RENAME4args *, args); 4286 4287 fp = sfp = NULL; 4288 srcvp = targvp = NULL; 4289 in_crit_src = in_crit_targ = 0; 4290 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 4291 4292 /* CURRENT_FH: target directory */ 4293 ndvp = cs->vp; 4294 if (ndvp == NULL) { 4295 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4296 goto out; 4297 } 4298 4299 /* SAVED_FH: from directory */ 4300 odvp = cs->saved_vp; 4301 if (odvp == NULL) { 4302 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4303 goto out; 4304 } 4305 4306 if (cs->access == CS_ACCESS_DENIED) { 4307 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4308 goto out; 4309 } 4310 4311 /* 4312 * If there is an unshared filesystem mounted on this vnode, 4313 * do not allow to rename objects in this directory. 4314 */ 4315 if (vn_ismntpt(odvp)) { 4316 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4317 goto out; 4318 } 4319 4320 /* 4321 * If there is an unshared filesystem mounted on this vnode, 4322 * do not allow to rename to this directory. 4323 */ 4324 if (vn_ismntpt(ndvp)) { 4325 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4326 goto out; 4327 } 4328 4329 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 4330 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4331 goto out; 4332 } 4333 4334 if (cs->saved_exi != cs->exi) { 4335 *cs->statusp = resp->status = NFS4ERR_XDEV; 4336 goto out; 4337 } 4338 4339 if (!utf8_dir_verify(&args->oldname)) { 4340 *cs->statusp = resp->status = NFS4ERR_INVAL; 4341 goto out; 4342 } 4343 4344 if (!utf8_dir_verify(&args->newname)) { 4345 *cs->statusp = resp->status = NFS4ERR_INVAL; 4346 goto out; 4347 } 4348 4349 onm = utf8_to_fn(&args->oldname, &olen, NULL); 4350 if (onm == NULL) { 4351 *cs->statusp = resp->status = NFS4ERR_INVAL; 4352 goto out; 4353 } 4354 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4355 nlen = MAXPATHLEN + 1; 4356 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND, 4357 nlen); 4358 4359 if (converted_onm == NULL) { 4360 *cs->statusp = resp->status = NFS4ERR_INVAL; 4361 kmem_free(onm, olen); 4362 goto out; 4363 } 4364 4365 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4366 if (nnm == NULL) { 4367 *cs->statusp = resp->status = NFS4ERR_INVAL; 4368 if (onm != converted_onm) 4369 kmem_free(converted_onm, MAXPATHLEN + 1); 4370 kmem_free(onm, olen); 4371 goto out; 4372 } 4373 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND, 4374 MAXPATHLEN + 1); 4375 4376 if (converted_nnm == NULL) { 4377 *cs->statusp = resp->status = NFS4ERR_INVAL; 4378 kmem_free(nnm, nlen); 4379 nnm = NULL; 4380 if (onm != converted_onm) 4381 kmem_free(converted_onm, MAXPATHLEN + 1); 4382 kmem_free(onm, olen); 4383 goto out; 4384 } 4385 4386 4387 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 4388 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4389 kmem_free(onm, olen); 4390 kmem_free(nnm, nlen); 4391 goto out; 4392 } 4393 4394 4395 if (rdonly4(cs->exi, cs->vp, req)) { 4396 *cs->statusp = resp->status = NFS4ERR_ROFS; 4397 if (onm != converted_onm) 4398 kmem_free(converted_onm, MAXPATHLEN + 1); 4399 kmem_free(onm, olen); 4400 if (nnm != converted_nnm) 4401 kmem_free(converted_nnm, MAXPATHLEN + 1); 4402 kmem_free(nnm, nlen); 4403 goto out; 4404 } 4405 4406 /* check label of the target dir */ 4407 if (is_system_labeled()) { 4408 ASSERT(req->rq_label != NULL); 4409 clabel = req->rq_label; 4410 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *, 4411 "got client label from request(1)", 4412 struct svc_req *, req); 4413 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4414 if (!do_rfs_label_check(clabel, ndvp, 4415 EQUALITY_CHECK)) { 4416 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4417 goto err_out; 4418 } 4419 } 4420 } 4421 4422 /* 4423 * Is the source a file and have a delegation? 4424 * We don't need to acquire va_seq before these lookups, if 4425 * it causes an update, cinfo.before will not match, which will 4426 * trigger a cache flush even if atomic is TRUE. 4427 */ 4428 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp, 4429 &error, cs->cr)) { 4430 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE, 4431 NULL)) { 4432 *cs->statusp = resp->status = NFS4ERR_DELAY; 4433 goto err_out; 4434 } 4435 } 4436 4437 if (srcvp == NULL) { 4438 *cs->statusp = resp->status = puterrno4(error); 4439 if (onm != converted_onm) 4440 kmem_free(converted_onm, MAXPATHLEN + 1); 4441 kmem_free(onm, olen); 4442 if (nnm != converted_nnm) 4443 kmem_free(converted_onm, MAXPATHLEN + 1); 4444 kmem_free(nnm, nlen); 4445 goto out; 4446 } 4447 4448 sfp_rele_grant_hold = 1; 4449 4450 /* Does the destination exist and a file and have a delegation? */ 4451 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, 4452 NULL, cs->cr)) { 4453 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4454 NULL)) { 4455 *cs->statusp = resp->status = NFS4ERR_DELAY; 4456 goto err_out; 4457 } 4458 } 4459 fp_rele_grant_hold = 1; 4460 4461 4462 /* Check for NBMAND lock on both source and target */ 4463 if (nbl_need_check(srcvp)) { 4464 nbl_start_crit(srcvp, RW_READER); 4465 in_crit_src = 1; 4466 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 4467 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4468 goto err_out; 4469 } 4470 } 4471 4472 if (targvp && nbl_need_check(targvp)) { 4473 nbl_start_crit(targvp, RW_READER); 4474 in_crit_targ = 1; 4475 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 4476 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4477 goto err_out; 4478 } 4479 } 4480 4481 /* Get source "before" change value */ 4482 obdva.va_mask = AT_CTIME|AT_SEQ; 4483 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL); 4484 if (!error) { 4485 nbdva.va_mask = AT_CTIME|AT_SEQ; 4486 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL); 4487 } 4488 if (error) { 4489 *cs->statusp = resp->status = puterrno4(error); 4490 goto err_out; 4491 } 4492 4493 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) 4494 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) 4495 4496 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, 4497 cs->cr, NULL, 0)) == 0 && fp != NULL) { 4498 struct vattr va; 4499 vnode_t *tvp; 4500 4501 rfs4_dbe_lock(fp->dbe); 4502 tvp = fp->vp; 4503 if (tvp) 4504 VN_HOLD(tvp); 4505 rfs4_dbe_unlock(fp->dbe); 4506 4507 if (tvp) { 4508 va.va_mask = AT_NLINK; 4509 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4510 va.va_nlink == 0) { 4511 /* The file is gone and so should the state */ 4512 if (in_crit_targ) { 4513 nbl_end_crit(targvp); 4514 in_crit_targ = 0; 4515 } 4516 rfs4_close_all_state(fp); 4517 } 4518 VN_RELE(tvp); 4519 } 4520 } 4521 if (error == 0) 4522 vn_renamepath(ndvp, srcvp, nnm, nlen - 1); 4523 4524 if (in_crit_src) 4525 nbl_end_crit(srcvp); 4526 if (srcvp) 4527 VN_RELE(srcvp); 4528 if (in_crit_targ) 4529 nbl_end_crit(targvp); 4530 if (targvp) 4531 VN_RELE(targvp); 4532 4533 if (sfp) { 4534 rfs4_clear_dont_grant(sfp); 4535 rfs4_file_rele(sfp); 4536 } 4537 if (fp) { 4538 rfs4_clear_dont_grant(fp); 4539 rfs4_file_rele(fp); 4540 } 4541 4542 if (converted_onm != onm) 4543 kmem_free(converted_onm, MAXPATHLEN + 1); 4544 kmem_free(onm, olen); 4545 if (converted_nnm != nnm) 4546 kmem_free(converted_nnm, MAXPATHLEN + 1); 4547 kmem_free(nnm, nlen); 4548 4549 /* 4550 * Get the initial "after" sequence number, if it fails, set to zero 4551 */ 4552 oidva.va_mask = AT_SEQ; 4553 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL)) 4554 oidva.va_seq = 0; 4555 4556 nidva.va_mask = AT_SEQ; 4557 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL)) 4558 nidva.va_seq = 0; 4559 4560 /* 4561 * Force modified data and metadata out to stable storage. 4562 */ 4563 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL); 4564 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL); 4565 4566 if (error) { 4567 *cs->statusp = resp->status = puterrno4(error); 4568 goto out; 4569 } 4570 4571 /* 4572 * Get "after" change values, if it fails, simply return the 4573 * before value. 4574 */ 4575 oadva.va_mask = AT_CTIME|AT_SEQ; 4576 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) { 4577 oadva.va_ctime = obdva.va_ctime; 4578 oadva.va_seq = 0; 4579 } 4580 4581 nadva.va_mask = AT_CTIME|AT_SEQ; 4582 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) { 4583 nadva.va_ctime = nbdva.va_ctime; 4584 nadva.va_seq = 0; 4585 } 4586 4587 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime) 4588 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime) 4589 4590 /* 4591 * The cinfo.atomic = TRUE only if we have 4592 * non-zero va_seq's, and it has incremented by exactly one 4593 * during the VOP_RENAME and it didn't change during the VOP_FSYNC. 4594 */ 4595 if (obdva.va_seq && oidva.va_seq && oadva.va_seq && 4596 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq) 4597 resp->source_cinfo.atomic = TRUE; 4598 else 4599 resp->source_cinfo.atomic = FALSE; 4600 4601 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq && 4602 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq) 4603 resp->target_cinfo.atomic = TRUE; 4604 else 4605 resp->target_cinfo.atomic = FALSE; 4606 4607 #ifdef VOLATILE_FH_TEST 4608 { 4609 extern void add_volrnm_fh(struct exportinfo *, vnode_t *); 4610 4611 /* 4612 * Add the renamed file handle to the volatile rename list 4613 */ 4614 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) { 4615 /* file handles may expire on rename */ 4616 vnode_t *vp; 4617 4618 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4619 /* 4620 * Already know that nnm will be a valid string 4621 */ 4622 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr, 4623 NULL, NULL, NULL); 4624 kmem_free(nnm, nlen); 4625 if (!error) { 4626 add_volrnm_fh(cs->exi, vp); 4627 VN_RELE(vp); 4628 } 4629 } 4630 } 4631 #endif /* VOLATILE_FH_TEST */ 4632 4633 *cs->statusp = resp->status = NFS4_OK; 4634 out: 4635 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4636 RENAME4res *, resp); 4637 return; 4638 4639 err_out: 4640 if (onm != converted_onm) 4641 kmem_free(converted_onm, MAXPATHLEN + 1); 4642 if (onm != NULL) 4643 kmem_free(onm, olen); 4644 if (nnm != converted_nnm) 4645 kmem_free(converted_nnm, MAXPATHLEN + 1); 4646 if (nnm != NULL) 4647 kmem_free(nnm, nlen); 4648 4649 if (in_crit_src) nbl_end_crit(srcvp); 4650 if (in_crit_targ) nbl_end_crit(targvp); 4651 if (targvp) VN_RELE(targvp); 4652 if (srcvp) VN_RELE(srcvp); 4653 if (sfp) { 4654 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp); 4655 rfs4_file_rele(sfp); 4656 } 4657 if (fp) { 4658 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp); 4659 rfs4_file_rele(fp); 4660 } 4661 4662 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4663 RENAME4res *, resp); 4664 } 4665 4666 /* ARGSUSED */ 4667 static void 4668 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4669 struct compound_state *cs) 4670 { 4671 RENEW4args *args = &argop->nfs_argop4_u.oprenew; 4672 RENEW4res *resp = &resop->nfs_resop4_u.oprenew; 4673 rfs4_client_t *cp; 4674 4675 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs, 4676 RENEW4args *, args); 4677 4678 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) { 4679 *cs->statusp = resp->status = 4680 rfs4_check_clientid(&args->clientid, 0); 4681 goto out; 4682 } 4683 4684 if (rfs4_lease_expired(cp)) { 4685 rfs4_client_rele(cp); 4686 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 4687 goto out; 4688 } 4689 4690 rfs4_update_lease(cp); 4691 4692 mutex_enter(cp->cbinfo.cb_lock); 4693 if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) { 4694 cp->cbinfo.cb_notified_of_cb_path_down = TRUE; 4695 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN; 4696 } else { 4697 *cs->statusp = resp->status = NFS4_OK; 4698 } 4699 mutex_exit(cp->cbinfo.cb_lock); 4700 4701 rfs4_client_rele(cp); 4702 4703 out: 4704 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs, 4705 RENEW4res *, resp); 4706 } 4707 4708 /* ARGSUSED */ 4709 static void 4710 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 4711 struct compound_state *cs) 4712 { 4713 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh; 4714 4715 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs); 4716 4717 /* No need to check cs->access - we are not accessing any object */ 4718 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) { 4719 *cs->statusp = resp->status = NFS4ERR_RESTOREFH; 4720 goto out; 4721 } 4722 if (cs->vp != NULL) { 4723 VN_RELE(cs->vp); 4724 } 4725 cs->vp = cs->saved_vp; 4726 cs->saved_vp = NULL; 4727 cs->exi = cs->saved_exi; 4728 nfs_fh4_copy(&cs->saved_fh, &cs->fh); 4729 *cs->statusp = resp->status = NFS4_OK; 4730 cs->deleg = FALSE; 4731 4732 out: 4733 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs, 4734 RESTOREFH4res *, resp); 4735 } 4736 4737 /* ARGSUSED */ 4738 static void 4739 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4740 struct compound_state *cs) 4741 { 4742 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh; 4743 4744 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs); 4745 4746 /* No need to check cs->access - we are not accessing any object */ 4747 if (cs->vp == NULL) { 4748 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4749 goto out; 4750 } 4751 if (cs->saved_vp != NULL) { 4752 VN_RELE(cs->saved_vp); 4753 } 4754 cs->saved_vp = cs->vp; 4755 VN_HOLD(cs->saved_vp); 4756 cs->saved_exi = cs->exi; 4757 /* 4758 * since SAVEFH is fairly rare, don't alloc space for its fh 4759 * unless necessary. 4760 */ 4761 if (cs->saved_fh.nfs_fh4_val == NULL) { 4762 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 4763 } 4764 nfs_fh4_copy(&cs->fh, &cs->saved_fh); 4765 *cs->statusp = resp->status = NFS4_OK; 4766 4767 out: 4768 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs, 4769 SAVEFH4res *, resp); 4770 } 4771 4772 /* 4773 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to 4774 * return the bitmap of attrs that were set successfully. It is also 4775 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should 4776 * always be called only after rfs4_do_set_attrs(). 4777 * 4778 * Verify that the attributes are same as the expected ones. sargp->vap 4779 * and sargp->sbp contain the input attributes as translated from fattr4. 4780 * 4781 * This function verifies only the attrs that correspond to a vattr or 4782 * vfsstat struct. That is because of the extra step needed to get the 4783 * corresponding system structs. Other attributes have already been set or 4784 * verified by do_rfs4_set_attrs. 4785 * 4786 * Return 0 if all attrs match, -1 if some don't, error if error processing. 4787 */ 4788 static int 4789 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp, 4790 bitmap4 *resp, struct nfs4_ntov_table *ntovp) 4791 { 4792 int error, ret_error = 0; 4793 int i, k; 4794 uint_t sva_mask = sargp->vap->va_mask; 4795 uint_t vbit; 4796 union nfs4_attr_u *na; 4797 uint8_t *amap; 4798 bool_t getsb = ntovp->vfsstat; 4799 4800 if (sva_mask != 0) { 4801 /* 4802 * Okay to overwrite sargp->vap because we verify based 4803 * on the incoming values. 4804 */ 4805 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0, 4806 sargp->cs->cr, NULL); 4807 if (ret_error) { 4808 if (resp == NULL) 4809 return (ret_error); 4810 /* 4811 * Must return bitmap of successful attrs 4812 */ 4813 sva_mask = 0; /* to prevent checking vap later */ 4814 } else { 4815 /* 4816 * Some file systems clobber va_mask. it is probably 4817 * wrong of them to do so, nonethless we practice 4818 * defensive coding. 4819 * See bug id 4276830. 4820 */ 4821 sargp->vap->va_mask = sva_mask; 4822 } 4823 } 4824 4825 if (getsb) { 4826 /* 4827 * Now get the superblock and loop on the bitmap, as there is 4828 * no simple way of translating from superblock to bitmap4. 4829 */ 4830 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp); 4831 if (ret_error) { 4832 if (resp == NULL) 4833 goto errout; 4834 getsb = FALSE; 4835 } 4836 } 4837 4838 /* 4839 * Now loop and verify each attribute which getattr returned 4840 * whether it's the same as the input. 4841 */ 4842 if (resp == NULL && !getsb && (sva_mask == 0)) 4843 goto errout; 4844 4845 na = ntovp->na; 4846 amap = ntovp->amap; 4847 k = 0; 4848 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) { 4849 k = *amap; 4850 ASSERT(nfs4_ntov_map[k].nval == k); 4851 vbit = nfs4_ntov_map[k].vbit; 4852 4853 /* 4854 * If vattr attribute but VOP_GETATTR failed, or it's 4855 * superblock attribute but VFS_STATVFS failed, skip 4856 */ 4857 if (vbit) { 4858 if ((vbit & sva_mask) == 0) 4859 continue; 4860 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) { 4861 continue; 4862 } 4863 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na); 4864 if (resp != NULL) { 4865 if (error) 4866 ret_error = -1; /* not all match */ 4867 else /* update response bitmap */ 4868 *resp |= nfs4_ntov_map[k].fbit; 4869 continue; 4870 } 4871 if (error) { 4872 ret_error = -1; /* not all match */ 4873 break; 4874 } 4875 } 4876 errout: 4877 return (ret_error); 4878 } 4879 4880 /* 4881 * Decode the attribute to be set/verified. If the attr requires a sys op 4882 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't 4883 * call the sv_getit function for it, because the sys op hasn't yet been done. 4884 * Return 0 for success, error code if failed. 4885 * 4886 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free. 4887 */ 4888 static int 4889 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp, 4890 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap) 4891 { 4892 int error = 0; 4893 bool_t set_later; 4894 4895 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit; 4896 4897 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) { 4898 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat; 4899 /* 4900 * don't verify yet if a vattr or sb dependent attr, 4901 * because we don't have their sys values yet. 4902 * Will be done later. 4903 */ 4904 if (! (set_later && (cmd == NFS4ATTR_VERIT))) { 4905 /* 4906 * ACLs are a special case, since setting the MODE 4907 * conflicts with setting the ACL. We delay setting 4908 * the ACL until all other attributes have been set. 4909 * The ACL gets set in do_rfs4_op_setattr(). 4910 */ 4911 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) { 4912 error = (*nfs4_ntov_map[k].sv_getit)(cmd, 4913 sargp, nap); 4914 if (error) { 4915 xdr_free(nfs4_ntov_map[k].xfunc, 4916 (caddr_t)nap); 4917 } 4918 } 4919 } 4920 } else { 4921 #ifdef DEBUG 4922 cmn_err(CE_NOTE, "decode_fattr4_attr: error " 4923 "decoding attribute %d\n", k); 4924 #endif 4925 error = EINVAL; 4926 } 4927 if (!error && resp_bval && !set_later) { 4928 *resp_bval |= nfs4_ntov_map[k].fbit; 4929 } 4930 4931 return (error); 4932 } 4933 4934 /* 4935 * Set vattr based on incoming fattr4 attrs - used by setattr. 4936 * Set response mask. Ignore any values that are not writable vattr attrs. 4937 */ 4938 static nfsstat4 4939 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 4940 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, 4941 nfs4_attr_cmd_t cmd) 4942 { 4943 int error = 0; 4944 int i; 4945 char *attrs = fattrp->attrlist4; 4946 uint32_t attrslen = fattrp->attrlist4_len; 4947 XDR xdr; 4948 nfsstat4 status = NFS4_OK; 4949 vnode_t *vp = cs->vp; 4950 union nfs4_attr_u *na; 4951 uint8_t *amap; 4952 4953 #ifndef lint 4954 /* 4955 * Make sure that maximum attribute number can be expressed as an 4956 * 8 bit quantity. 4957 */ 4958 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1)); 4959 #endif 4960 4961 if (vp == NULL) { 4962 if (resp) 4963 *resp = 0; 4964 return (NFS4ERR_NOFILEHANDLE); 4965 } 4966 if (cs->access == CS_ACCESS_DENIED) { 4967 if (resp) 4968 *resp = 0; 4969 return (NFS4ERR_ACCESS); 4970 } 4971 4972 sargp->op = cmd; 4973 sargp->cs = cs; 4974 sargp->flag = 0; /* may be set later */ 4975 sargp->vap->va_mask = 0; 4976 sargp->rdattr_error = NFS4_OK; 4977 sargp->rdattr_error_req = FALSE; 4978 /* sargp->sbp is set by the caller */ 4979 4980 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE); 4981 4982 na = ntovp->na; 4983 amap = ntovp->amap; 4984 4985 /* 4986 * The following loop iterates on the nfs4_ntov_map checking 4987 * if the fbit is set in the requested bitmap. 4988 * If set then we process the arguments using the 4989 * rfs4_fattr4 conversion functions to populate the setattr 4990 * vattr and va_mask. Any settable attrs that are not using vattr 4991 * will be set in this loop. 4992 */ 4993 for (i = 0; i < nfs4_ntov_map_size; i++) { 4994 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) { 4995 continue; 4996 } 4997 /* 4998 * If setattr, must be a writable attr. 4999 * If verify/nverify, must be a readable attr. 5000 */ 5001 if ((error = (*nfs4_ntov_map[i].sv_getit)( 5002 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) { 5003 /* 5004 * Client tries to set/verify an 5005 * unsupported attribute, tries to set 5006 * a read only attr or verify a write 5007 * only one - error! 5008 */ 5009 break; 5010 } 5011 /* 5012 * Decode the attribute to set/verify 5013 */ 5014 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval, 5015 &xdr, resp ? resp : NULL, na); 5016 if (error) 5017 break; 5018 *amap++ = (uint8_t)nfs4_ntov_map[i].nval; 5019 na++; 5020 (ntovp->attrcnt)++; 5021 if (nfs4_ntov_map[i].vfsstat) 5022 ntovp->vfsstat = TRUE; 5023 } 5024 5025 if (error != 0) 5026 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP : 5027 puterrno4(error)); 5028 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 5029 return (status); 5030 } 5031 5032 static nfsstat4 5033 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5034 stateid4 *stateid) 5035 { 5036 int error = 0; 5037 struct nfs4_svgetit_arg sarg; 5038 bool_t trunc; 5039 5040 nfsstat4 status = NFS4_OK; 5041 cred_t *cr = cs->cr; 5042 vnode_t *vp = cs->vp; 5043 struct nfs4_ntov_table ntov; 5044 struct statvfs64 sb; 5045 struct vattr bva; 5046 struct flock64 bf; 5047 int in_crit = 0; 5048 uint_t saved_mask = 0; 5049 caller_context_t ct; 5050 5051 *resp = 0; 5052 sarg.sbp = &sb; 5053 nfs4_ntov_table_init(&ntov); 5054 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 5055 NFS4ATTR_SETIT); 5056 if (status != NFS4_OK) { 5057 /* 5058 * failed set attrs 5059 */ 5060 goto done; 5061 } 5062 if ((sarg.vap->va_mask == 0) && 5063 (! (fattrp->attrmask & FATTR4_ACL_MASK))) { 5064 /* 5065 * no further work to be done 5066 */ 5067 goto done; 5068 } 5069 5070 /* 5071 * If we got a request to set the ACL and the MODE, only 5072 * allow changing VSUID, VSGID, and VSVTX. Attempting 5073 * to change any other bits, along with setting an ACL, 5074 * gives NFS4ERR_INVAL. 5075 */ 5076 if ((fattrp->attrmask & FATTR4_ACL_MASK) && 5077 (fattrp->attrmask & FATTR4_MODE_MASK)) { 5078 vattr_t va; 5079 5080 va.va_mask = AT_MODE; 5081 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 5082 if (error) { 5083 status = puterrno4(error); 5084 goto done; 5085 } 5086 if ((sarg.vap->va_mode ^ va.va_mode) & 5087 ~(VSUID | VSGID | VSVTX)) { 5088 status = NFS4ERR_INVAL; 5089 goto done; 5090 } 5091 } 5092 5093 /* Check stateid only if size has been set */ 5094 if (sarg.vap->va_mask & AT_SIZE) { 5095 trunc = (sarg.vap->va_size == 0); 5096 status = rfs4_check_stateid(FWRITE, cs->vp, stateid, 5097 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct); 5098 if (status != NFS4_OK) 5099 goto done; 5100 } else { 5101 ct.cc_sysid = 0; 5102 ct.cc_pid = 0; 5103 ct.cc_caller_id = nfs4_srv_caller_id; 5104 ct.cc_flags = CC_DONTBLOCK; 5105 } 5106 5107 /* XXX start of possible race with delegations */ 5108 5109 /* 5110 * We need to specially handle size changes because it is 5111 * possible for the client to create a file with read-only 5112 * modes, but with the file opened for writing. If the client 5113 * then tries to set the file size, e.g. ftruncate(3C), 5114 * fcntl(F_FREESP), the normal access checking done in 5115 * VOP_SETATTR would prevent the client from doing it even though 5116 * it should be allowed to do so. To get around this, we do the 5117 * access checking for ourselves and use VOP_SPACE which doesn't 5118 * do the access checking. 5119 * Also the client should not be allowed to change the file 5120 * size if there is a conflicting non-blocking mandatory lock in 5121 * the region of the change. 5122 */ 5123 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 5124 u_offset_t offset; 5125 ssize_t length; 5126 5127 /* 5128 * ufs_setattr clears AT_SIZE from vap->va_mask, but 5129 * before returning, sarg.vap->va_mask is used to 5130 * generate the setattr reply bitmap. We also clear 5131 * AT_SIZE below before calling VOP_SPACE. For both 5132 * of these cases, the va_mask needs to be saved here 5133 * and restored after calling VOP_SETATTR. 5134 */ 5135 saved_mask = sarg.vap->va_mask; 5136 5137 /* 5138 * Check any possible conflict due to NBMAND locks. 5139 * Get into critical region before VOP_GETATTR, so the 5140 * size attribute is valid when checking conflicts. 5141 */ 5142 if (nbl_need_check(vp)) { 5143 nbl_start_crit(vp, RW_READER); 5144 in_crit = 1; 5145 } 5146 5147 bva.va_mask = AT_UID|AT_SIZE; 5148 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) { 5149 status = puterrno4(error); 5150 goto done; 5151 } 5152 5153 if (in_crit) { 5154 if (sarg.vap->va_size < bva.va_size) { 5155 offset = sarg.vap->va_size; 5156 length = bva.va_size - sarg.vap->va_size; 5157 } else { 5158 offset = bva.va_size; 5159 length = sarg.vap->va_size - bva.va_size; 5160 } 5161 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 5162 &ct)) { 5163 status = NFS4ERR_LOCKED; 5164 goto done; 5165 } 5166 } 5167 5168 if (crgetuid(cr) == bva.va_uid) { 5169 sarg.vap->va_mask &= ~AT_SIZE; 5170 bf.l_type = F_WRLCK; 5171 bf.l_whence = 0; 5172 bf.l_start = (off64_t)sarg.vap->va_size; 5173 bf.l_len = 0; 5174 bf.l_sysid = 0; 5175 bf.l_pid = 0; 5176 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 5177 (offset_t)sarg.vap->va_size, cr, &ct); 5178 } 5179 } 5180 5181 if (!error && sarg.vap->va_mask != 0) 5182 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 5183 5184 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 5185 if (saved_mask & AT_SIZE) 5186 sarg.vap->va_mask |= AT_SIZE; 5187 5188 /* 5189 * If an ACL was being set, it has been delayed until now, 5190 * in order to set the mode (via the VOP_SETATTR() above) first. 5191 */ 5192 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) { 5193 int i; 5194 5195 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++) 5196 if (ntov.amap[i] == FATTR4_ACL) 5197 break; 5198 if (i < NFS4_MAXNUM_ATTRS) { 5199 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)( 5200 NFS4ATTR_SETIT, &sarg, &ntov.na[i]); 5201 if (error == 0) { 5202 *resp |= FATTR4_ACL_MASK; 5203 } else if (error == ENOTSUP) { 5204 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5205 status = NFS4ERR_ATTRNOTSUPP; 5206 goto done; 5207 } 5208 } else { 5209 NFS4_DEBUG(rfs4_debug, 5210 (CE_NOTE, "do_rfs4_op_setattr: " 5211 "unable to find ACL in fattr4")); 5212 error = EINVAL; 5213 } 5214 } 5215 5216 if (error) { 5217 /* check if a monitor detected a delegation conflict */ 5218 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 5219 status = NFS4ERR_DELAY; 5220 else 5221 status = puterrno4(error); 5222 5223 /* 5224 * Set the response bitmap when setattr failed. 5225 * If VOP_SETATTR partially succeeded, test by doing a 5226 * VOP_GETATTR on the object and comparing the data 5227 * to the setattr arguments. 5228 */ 5229 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5230 } else { 5231 /* 5232 * Force modified metadata out to stable storage. 5233 */ 5234 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 5235 /* 5236 * Set response bitmap 5237 */ 5238 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 5239 } 5240 5241 /* Return early and already have a NFSv4 error */ 5242 done: 5243 /* 5244 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 5245 * conversion sets both readable and writeable NFS4 attrs 5246 * for AT_MTIME and AT_ATIME. The line below masks out 5247 * unrequested attrs from the setattr result bitmap. This 5248 * is placed after the done: label to catch the ATTRNOTSUP 5249 * case. 5250 */ 5251 *resp &= fattrp->attrmask; 5252 5253 if (in_crit) 5254 nbl_end_crit(vp); 5255 5256 nfs4_ntov_table_free(&ntov, &sarg); 5257 5258 return (status); 5259 } 5260 5261 /* ARGSUSED */ 5262 static void 5263 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5264 struct compound_state *cs) 5265 { 5266 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr; 5267 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr; 5268 bslabel_t *clabel; 5269 5270 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs, 5271 SETATTR4args *, args); 5272 5273 if (cs->vp == NULL) { 5274 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5275 goto out; 5276 } 5277 5278 /* 5279 * If there is an unshared filesystem mounted on this vnode, 5280 * do not allow to setattr on this vnode. 5281 */ 5282 if (vn_ismntpt(cs->vp)) { 5283 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5284 goto out; 5285 } 5286 5287 resp->attrsset = 0; 5288 5289 if (rdonly4(cs->exi, cs->vp, req)) { 5290 *cs->statusp = resp->status = NFS4ERR_ROFS; 5291 goto out; 5292 } 5293 5294 /* check label before setting attributes */ 5295 if (is_system_labeled()) { 5296 ASSERT(req->rq_label != NULL); 5297 clabel = req->rq_label; 5298 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *, 5299 "got client label from request(1)", 5300 struct svc_req *, req); 5301 if (!blequal(&l_admin_low->tsl_label, clabel)) { 5302 if (!do_rfs_label_check(clabel, cs->vp, 5303 EQUALITY_CHECK)) { 5304 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5305 goto out; 5306 } 5307 } 5308 } 5309 5310 *cs->statusp = resp->status = 5311 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs, 5312 &args->stateid); 5313 5314 out: 5315 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs, 5316 SETATTR4res *, resp); 5317 } 5318 5319 /* ARGSUSED */ 5320 static void 5321 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5322 struct compound_state *cs) 5323 { 5324 /* 5325 * verify and nverify are exactly the same, except that nverify 5326 * succeeds when some argument changed, and verify succeeds when 5327 * when none changed. 5328 */ 5329 5330 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 5331 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 5332 5333 int error; 5334 struct nfs4_svgetit_arg sarg; 5335 struct statvfs64 sb; 5336 struct nfs4_ntov_table ntov; 5337 5338 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 5339 VERIFY4args *, args); 5340 5341 if (cs->vp == NULL) { 5342 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5343 goto out; 5344 } 5345 5346 sarg.sbp = &sb; 5347 nfs4_ntov_table_init(&ntov); 5348 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5349 &sarg, &ntov, NFS4ATTR_VERIT); 5350 if (resp->status != NFS4_OK) { 5351 /* 5352 * do_rfs4_set_attrs will try to verify systemwide attrs, 5353 * so could return -1 for "no match". 5354 */ 5355 if (resp->status == -1) 5356 resp->status = NFS4ERR_NOT_SAME; 5357 goto done; 5358 } 5359 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5360 switch (error) { 5361 case 0: 5362 resp->status = NFS4_OK; 5363 break; 5364 case -1: 5365 resp->status = NFS4ERR_NOT_SAME; 5366 break; 5367 default: 5368 resp->status = puterrno4(error); 5369 break; 5370 } 5371 done: 5372 *cs->statusp = resp->status; 5373 nfs4_ntov_table_free(&ntov, &sarg); 5374 out: 5375 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 5376 VERIFY4res *, resp); 5377 } 5378 5379 /* ARGSUSED */ 5380 static void 5381 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5382 struct compound_state *cs) 5383 { 5384 /* 5385 * verify and nverify are exactly the same, except that nverify 5386 * succeeds when some argument changed, and verify succeeds when 5387 * when none changed. 5388 */ 5389 5390 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 5391 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 5392 5393 int error; 5394 struct nfs4_svgetit_arg sarg; 5395 struct statvfs64 sb; 5396 struct nfs4_ntov_table ntov; 5397 5398 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 5399 NVERIFY4args *, args); 5400 5401 if (cs->vp == NULL) { 5402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5403 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5404 NVERIFY4res *, resp); 5405 return; 5406 } 5407 sarg.sbp = &sb; 5408 nfs4_ntov_table_init(&ntov); 5409 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5410 &sarg, &ntov, NFS4ATTR_VERIT); 5411 if (resp->status != NFS4_OK) { 5412 /* 5413 * do_rfs4_set_attrs will try to verify systemwide attrs, 5414 * so could return -1 for "no match". 5415 */ 5416 if (resp->status == -1) 5417 resp->status = NFS4_OK; 5418 goto done; 5419 } 5420 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5421 switch (error) { 5422 case 0: 5423 resp->status = NFS4ERR_SAME; 5424 break; 5425 case -1: 5426 resp->status = NFS4_OK; 5427 break; 5428 default: 5429 resp->status = puterrno4(error); 5430 break; 5431 } 5432 done: 5433 *cs->statusp = resp->status; 5434 nfs4_ntov_table_free(&ntov, &sarg); 5435 5436 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5437 NVERIFY4res *, resp); 5438 } 5439 5440 /* 5441 * XXX - This should live in an NFS header file. 5442 */ 5443 #define MAX_IOVECS 12 5444 5445 /* ARGSUSED */ 5446 static void 5447 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5448 struct compound_state *cs) 5449 { 5450 WRITE4args *args = &argop->nfs_argop4_u.opwrite; 5451 WRITE4res *resp = &resop->nfs_resop4_u.opwrite; 5452 int error; 5453 vnode_t *vp; 5454 struct vattr bva; 5455 u_offset_t rlimit; 5456 struct uio uio; 5457 struct iovec iov[MAX_IOVECS]; 5458 struct iovec *iovp; 5459 int iovcnt; 5460 int ioflag; 5461 cred_t *savecred, *cr; 5462 bool_t *deleg = &cs->deleg; 5463 nfsstat4 stat; 5464 int in_crit = 0; 5465 caller_context_t ct; 5466 5467 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, 5468 WRITE4args *, args); 5469 5470 vp = cs->vp; 5471 if (vp == NULL) { 5472 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5473 goto out; 5474 } 5475 if (cs->access == CS_ACCESS_DENIED) { 5476 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5477 goto out; 5478 } 5479 5480 cr = cs->cr; 5481 5482 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE, 5483 deleg, TRUE, &ct)) != NFS4_OK) { 5484 *cs->statusp = resp->status = stat; 5485 goto out; 5486 } 5487 5488 /* 5489 * We have to enter the critical region before calling VOP_RWLOCK 5490 * to avoid a deadlock with ufs. 5491 */ 5492 if (nbl_need_check(vp)) { 5493 nbl_start_crit(vp, RW_READER); 5494 in_crit = 1; 5495 if (nbl_conflict(vp, NBL_WRITE, 5496 args->offset, args->data_len, 0, &ct)) { 5497 *cs->statusp = resp->status = NFS4ERR_LOCKED; 5498 goto out; 5499 } 5500 } 5501 5502 bva.va_mask = AT_MODE | AT_UID; 5503 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5504 5505 /* 5506 * If we can't get the attributes, then we can't do the 5507 * right access checking. So, we'll fail the request. 5508 */ 5509 if (error) { 5510 *cs->statusp = resp->status = puterrno4(error); 5511 goto out; 5512 } 5513 5514 if (rdonly4(cs->exi, cs->vp, req)) { 5515 *cs->statusp = resp->status = NFS4ERR_ROFS; 5516 goto out; 5517 } 5518 5519 if (vp->v_type != VREG) { 5520 *cs->statusp = resp->status = 5521 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 5522 goto out; 5523 } 5524 5525 if (crgetuid(cr) != bva.va_uid && 5526 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) { 5527 *cs->statusp = resp->status = puterrno4(error); 5528 goto out; 5529 } 5530 5531 if (MANDLOCK(vp, bva.va_mode)) { 5532 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5533 goto out; 5534 } 5535 5536 if (args->data_len == 0) { 5537 *cs->statusp = resp->status = NFS4_OK; 5538 resp->count = 0; 5539 resp->committed = args->stable; 5540 resp->writeverf = Write4verf; 5541 goto out; 5542 } 5543 5544 if (args->mblk != NULL) { 5545 mblk_t *m; 5546 uint_t bytes, round_len; 5547 5548 iovcnt = 0; 5549 bytes = 0; 5550 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT); 5551 for (m = args->mblk; 5552 m != NULL && bytes < round_len; 5553 m = m->b_cont) { 5554 iovcnt++; 5555 bytes += MBLKL(m); 5556 } 5557 #ifdef DEBUG 5558 /* should have ended on an mblk boundary */ 5559 if (bytes != round_len) { 5560 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n", 5561 bytes, round_len, args->data_len); 5562 printf("args=%p, args->mblk=%p, m=%p", (void *)args, 5563 (void *)args->mblk, (void *)m); 5564 ASSERT(bytes == round_len); 5565 } 5566 #endif 5567 if (iovcnt <= MAX_IOVECS) { 5568 iovp = iov; 5569 } else { 5570 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 5571 } 5572 mblk_to_iov(args->mblk, iovcnt, iovp); 5573 } else if (args->rlist != NULL) { 5574 iovcnt = 1; 5575 iovp = iov; 5576 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3); 5577 iovp->iov_len = args->data_len; 5578 } else { 5579 iovcnt = 1; 5580 iovp = iov; 5581 iovp->iov_base = args->data_val; 5582 iovp->iov_len = args->data_len; 5583 } 5584 5585 uio.uio_iov = iovp; 5586 uio.uio_iovcnt = iovcnt; 5587 5588 uio.uio_segflg = UIO_SYSSPACE; 5589 uio.uio_extflg = UIO_COPY_DEFAULT; 5590 uio.uio_loffset = args->offset; 5591 uio.uio_resid = args->data_len; 5592 uio.uio_llimit = curproc->p_fsz_ctl; 5593 rlimit = uio.uio_llimit - args->offset; 5594 if (rlimit < (u_offset_t)uio.uio_resid) 5595 uio.uio_resid = (int)rlimit; 5596 5597 if (args->stable == UNSTABLE4) 5598 ioflag = 0; 5599 else if (args->stable == FILE_SYNC4) 5600 ioflag = FSYNC; 5601 else if (args->stable == DATA_SYNC4) 5602 ioflag = FDSYNC; 5603 else { 5604 if (iovp != iov) 5605 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5606 *cs->statusp = resp->status = NFS4ERR_INVAL; 5607 goto out; 5608 } 5609 5610 /* 5611 * We're changing creds because VM may fault and we need 5612 * the cred of the current thread to be used if quota 5613 * checking is enabled. 5614 */ 5615 savecred = curthread->t_cred; 5616 curthread->t_cred = cr; 5617 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct); 5618 curthread->t_cred = savecred; 5619 5620 if (iovp != iov) 5621 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5622 5623 if (error) { 5624 *cs->statusp = resp->status = puterrno4(error); 5625 goto out; 5626 } 5627 5628 *cs->statusp = resp->status = NFS4_OK; 5629 resp->count = args->data_len - uio.uio_resid; 5630 5631 if (ioflag == 0) 5632 resp->committed = UNSTABLE4; 5633 else 5634 resp->committed = FILE_SYNC4; 5635 5636 resp->writeverf = Write4verf; 5637 5638 out: 5639 if (in_crit) 5640 nbl_end_crit(vp); 5641 5642 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs, 5643 WRITE4res *, resp); 5644 } 5645 5646 5647 /* XXX put in a header file */ 5648 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *); 5649 5650 void 5651 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, 5652 struct svc_req *req, cred_t *cr, int *rv) 5653 { 5654 uint_t i; 5655 struct compound_state cs; 5656 5657 if (rv != NULL) 5658 *rv = 0; 5659 rfs4_init_compound_state(&cs); 5660 /* 5661 * Form a reply tag by copying over the reqeuest tag. 5662 */ 5663 resp->tag.utf8string_val = 5664 kmem_alloc(args->tag.utf8string_len, KM_SLEEP); 5665 resp->tag.utf8string_len = args->tag.utf8string_len; 5666 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 5667 resp->tag.utf8string_len); 5668 5669 cs.statusp = &resp->status; 5670 cs.req = req; 5671 5672 /* 5673 * XXX for now, minorversion should be zero 5674 */ 5675 if (args->minorversion != NFS4_MINORVERSION) { 5676 DTRACE_NFSV4_2(compound__start, struct compound_state *, 5677 &cs, COMPOUND4args *, args); 5678 resp->array_len = 0; 5679 resp->array = NULL; 5680 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 5681 DTRACE_NFSV4_2(compound__done, struct compound_state *, 5682 &cs, COMPOUND4res *, resp); 5683 return; 5684 } 5685 5686 ASSERT(exi == NULL); 5687 ASSERT(cr == NULL); 5688 5689 cr = crget(); 5690 ASSERT(cr != NULL); 5691 5692 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) { 5693 DTRACE_NFSV4_2(compound__start, struct compound_state *, 5694 &cs, COMPOUND4args *, args); 5695 crfree(cr); 5696 DTRACE_NFSV4_2(compound__done, struct compound_state *, 5697 &cs, COMPOUND4res *, resp); 5698 svcerr_badcred(req->rq_xprt); 5699 if (rv != NULL) 5700 *rv = 1; 5701 return; 5702 } 5703 resp->array_len = args->array_len; 5704 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4), 5705 KM_SLEEP); 5706 5707 cs.basecr = cr; 5708 5709 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs, 5710 COMPOUND4args *, args); 5711 5712 /* 5713 * For now, NFS4 compound processing must be protected by 5714 * exported_lock because it can access more than one exportinfo 5715 * per compound and share/unshare can now change multiple 5716 * exinfo structs. The NFS2/3 code only refs 1 exportinfo 5717 * per proc (excluding public exinfo), and exi_count design 5718 * is sufficient to protect concurrent execution of NFS2/3 5719 * ops along with unexport. This lock will be removed as 5720 * part of the NFSv4 phase 2 namespace redesign work. 5721 */ 5722 rw_enter(&exported_lock, RW_READER); 5723 5724 /* 5725 * If this is the first compound we've seen, we need to start all 5726 * new instances' grace periods. 5727 */ 5728 if (rfs4_seen_first_compound == 0) { 5729 rfs4_grace_start_new(); 5730 /* 5731 * This must be set after rfs4_grace_start_new(), otherwise 5732 * another thread could proceed past here before the former 5733 * is finished. 5734 */ 5735 rfs4_seen_first_compound = 1; 5736 } 5737 5738 for (i = 0; i < args->array_len && cs.cont; i++) { 5739 nfs_argop4 *argop; 5740 nfs_resop4 *resop; 5741 uint_t op; 5742 5743 argop = &args->array[i]; 5744 resop = &resp->array[i]; 5745 resop->resop = argop->argop; 5746 op = (uint_t)resop->resop; 5747 5748 if (op < rfsv4disp_cnt) { 5749 /* 5750 * Count the individual ops here; NULL and COMPOUND 5751 * are counted in common_dispatch() 5752 */ 5753 rfsproccnt_v4_ptr[op].value.ui64++; 5754 5755 NFS4_DEBUG(rfs4_debug > 1, 5756 (CE_NOTE, "Executing %s", rfs4_op_string[op])); 5757 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs); 5758 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d", 5759 rfs4_op_string[op], *cs.statusp)); 5760 if (*cs.statusp != NFS4_OK) 5761 cs.cont = FALSE; 5762 } else { 5763 /* 5764 * This is effectively dead code since XDR code 5765 * will have already returned BADXDR if op doesn't 5766 * decode to legal value. This only done for a 5767 * day when XDR code doesn't verify v4 opcodes. 5768 */ 5769 op = OP_ILLEGAL; 5770 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++; 5771 5772 rfs4_op_illegal(argop, resop, req, &cs); 5773 cs.cont = FALSE; 5774 } 5775 5776 /* 5777 * If not at last op, and if we are to stop, then 5778 * compact the results array. 5779 */ 5780 if ((i + 1) < args->array_len && !cs.cont) { 5781 nfs_resop4 *new_res = kmem_alloc( 5782 (i+1) * sizeof (nfs_resop4), KM_SLEEP); 5783 bcopy(resp->array, 5784 new_res, (i+1) * sizeof (nfs_resop4)); 5785 kmem_free(resp->array, 5786 args->array_len * sizeof (nfs_resop4)); 5787 5788 resp->array_len = i + 1; 5789 resp->array = new_res; 5790 } 5791 } 5792 5793 rw_exit(&exported_lock); 5794 5795 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs, 5796 COMPOUND4res *, resp); 5797 5798 if (cs.vp) 5799 VN_RELE(cs.vp); 5800 if (cs.saved_vp) 5801 VN_RELE(cs.saved_vp); 5802 if (cs.saved_fh.nfs_fh4_val) 5803 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE); 5804 5805 if (cs.basecr) 5806 crfree(cs.basecr); 5807 if (cs.cr) 5808 crfree(cs.cr); 5809 /* 5810 * done with this compound request, free the label 5811 */ 5812 5813 if (req->rq_label != NULL) { 5814 kmem_free(req->rq_label, sizeof (bslabel_t)); 5815 req->rq_label = NULL; 5816 } 5817 } 5818 5819 /* 5820 * XXX because of what appears to be duplicate calls to rfs4_compound_free 5821 * XXX zero out the tag and array values. Need to investigate why the 5822 * XXX calls occur, but at least prevent the panic for now. 5823 */ 5824 void 5825 rfs4_compound_free(COMPOUND4res *resp) 5826 { 5827 uint_t i; 5828 5829 if (resp->tag.utf8string_val) { 5830 UTF8STRING_FREE(resp->tag) 5831 } 5832 5833 for (i = 0; i < resp->array_len; i++) { 5834 nfs_resop4 *resop; 5835 uint_t op; 5836 5837 resop = &resp->array[i]; 5838 op = (uint_t)resop->resop; 5839 if (op < rfsv4disp_cnt) { 5840 (*rfsv4disptab[op].dis_resfree)(resop); 5841 } 5842 } 5843 if (resp->array != NULL) { 5844 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4)); 5845 } 5846 } 5847 5848 /* 5849 * Process the value of the compound request rpc flags, as a bit-AND 5850 * of the individual per-op flags (idempotent, allowork, publicfh_ok) 5851 */ 5852 void 5853 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp) 5854 { 5855 int i; 5856 int flag = RPC_ALL; 5857 5858 for (i = 0; flag && i < args->array_len; i++) { 5859 uint_t op; 5860 5861 op = (uint_t)args->array[i].argop; 5862 5863 if (op < rfsv4disp_cnt) 5864 flag &= rfsv4disptab[op].dis_flags; 5865 else 5866 flag = 0; 5867 } 5868 *flagp = flag; 5869 } 5870 5871 nfsstat4 5872 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp) 5873 { 5874 nfsstat4 e; 5875 5876 rfs4_dbe_lock(cp->dbe); 5877 5878 if (cp->sysidt != LM_NOSYSID) { 5879 *sp = cp->sysidt; 5880 e = NFS4_OK; 5881 5882 } else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) { 5883 *sp = cp->sysidt; 5884 e = NFS4_OK; 5885 5886 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 5887 "rfs4_client_sysid: allocated 0x%x\n", *sp)); 5888 } else 5889 e = NFS4ERR_DELAY; 5890 5891 rfs4_dbe_unlock(cp->dbe); 5892 return (e); 5893 } 5894 5895 #if defined(DEBUG) && ! defined(lint) 5896 static void lock_print(char *str, int operation, struct flock64 *flk) 5897 { 5898 char *op, *type; 5899 5900 switch (operation) { 5901 case F_GETLK: op = "F_GETLK"; 5902 break; 5903 case F_SETLK: op = "F_SETLK"; 5904 break; 5905 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND"; 5906 break; 5907 default: op = "F_UNKNOWN"; 5908 break; 5909 } 5910 switch (flk->l_type) { 5911 case F_UNLCK: type = "F_UNLCK"; 5912 break; 5913 case F_RDLCK: type = "F_RDLCK"; 5914 break; 5915 case F_WRLCK: type = "F_WRLCK"; 5916 break; 5917 default: type = "F_UNKNOWN"; 5918 break; 5919 } 5920 5921 ASSERT(flk->l_whence == 0); 5922 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d", 5923 str, op, type, (longlong_t)flk->l_start, 5924 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid); 5925 } 5926 5927 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f) 5928 #else 5929 #define LOCK_PRINT(d, s, t, f) 5930 #endif 5931 5932 /*ARGSUSED*/ 5933 static bool_t 5934 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs) 5935 { 5936 return (TRUE); 5937 } 5938 5939 /* 5940 * Look up the pathname using the vp in cs as the directory vnode. 5941 * cs->vp will be the vnode for the file on success 5942 */ 5943 5944 static nfsstat4 5945 rfs4_lookup(component4 *component, struct svc_req *req, 5946 struct compound_state *cs) 5947 { 5948 char *nm; 5949 uint32_t len; 5950 nfsstat4 status; 5951 5952 if (cs->vp == NULL) { 5953 return (NFS4ERR_NOFILEHANDLE); 5954 } 5955 if (cs->vp->v_type != VDIR) { 5956 return (NFS4ERR_NOTDIR); 5957 } 5958 5959 if (!utf8_dir_verify(component)) 5960 return (NFS4ERR_INVAL); 5961 5962 nm = utf8_to_fn(component, &len, NULL); 5963 if (nm == NULL) { 5964 return (NFS4ERR_INVAL); 5965 } 5966 5967 if (len > MAXNAMELEN) { 5968 kmem_free(nm, len); 5969 return (NFS4ERR_NAMETOOLONG); 5970 } 5971 5972 status = do_rfs4_op_lookup(nm, len, req, cs); 5973 5974 kmem_free(nm, len); 5975 5976 return (status); 5977 } 5978 5979 static nfsstat4 5980 rfs4_lookupfile(component4 *component, struct svc_req *req, 5981 struct compound_state *cs, uint32_t access, 5982 change_info4 *cinfo) 5983 { 5984 nfsstat4 status; 5985 vnode_t *dvp = cs->vp; 5986 vattr_t bva, ava, fva; 5987 int error; 5988 5989 /* Get "before" change value */ 5990 bva.va_mask = AT_CTIME|AT_SEQ; 5991 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 5992 if (error) 5993 return (puterrno4(error)); 5994 5995 /* rfs4_lookup may VN_RELE directory */ 5996 VN_HOLD(dvp); 5997 5998 status = rfs4_lookup(component, req, cs); 5999 if (status != NFS4_OK) { 6000 VN_RELE(dvp); 6001 return (status); 6002 } 6003 6004 /* 6005 * Get "after" change value, if it fails, simply return the 6006 * before value. 6007 */ 6008 ava.va_mask = AT_CTIME|AT_SEQ; 6009 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6010 ava.va_ctime = bva.va_ctime; 6011 ava.va_seq = 0; 6012 } 6013 VN_RELE(dvp); 6014 6015 /* 6016 * Validate the file is a file 6017 */ 6018 fva.va_mask = AT_TYPE|AT_MODE; 6019 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL); 6020 if (error) 6021 return (puterrno4(error)); 6022 6023 if (fva.va_type != VREG) { 6024 if (fva.va_type == VDIR) 6025 return (NFS4ERR_ISDIR); 6026 if (fva.va_type == VLNK) 6027 return (NFS4ERR_SYMLINK); 6028 return (NFS4ERR_INVAL); 6029 } 6030 6031 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime); 6032 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6033 6034 /* 6035 * It is undefined if VOP_LOOKUP will change va_seq, so 6036 * cinfo.atomic = TRUE only if we have 6037 * non-zero va_seq's, and they have not changed. 6038 */ 6039 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq) 6040 cinfo->atomic = TRUE; 6041 else 6042 cinfo->atomic = FALSE; 6043 6044 /* Check for mandatory locking */ 6045 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode); 6046 return (check_open_access(access, cs, req)); 6047 } 6048 6049 static nfsstat4 6050 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode, 6051 timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created) 6052 { 6053 int error; 6054 nfsstat4 status = NFS4_OK; 6055 vattr_t va; 6056 6057 tryagain: 6058 6059 /* 6060 * The file open mode used is VWRITE. If the client needs 6061 * some other semantic, then it should do the access checking 6062 * itself. It would have been nice to have the file open mode 6063 * passed as part of the arguments. 6064 */ 6065 6066 *created = TRUE; 6067 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL); 6068 6069 if (error) { 6070 *created = FALSE; 6071 6072 /* 6073 * If we got something other than file already exists 6074 * then just return this error. Otherwise, we got 6075 * EEXIST. If we were doing a GUARDED create, then 6076 * just return this error. Otherwise, we need to 6077 * make sure that this wasn't a duplicate of an 6078 * exclusive create request. 6079 * 6080 * The assumption is made that a non-exclusive create 6081 * request will never return EEXIST. 6082 */ 6083 6084 if (error != EEXIST || mode == GUARDED4) { 6085 status = puterrno4(error); 6086 return (status); 6087 } 6088 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr, 6089 NULL, NULL, NULL); 6090 6091 if (error) { 6092 /* 6093 * We couldn't find the file that we thought that 6094 * we just created. So, we'll just try creating 6095 * it again. 6096 */ 6097 if (error == ENOENT) 6098 goto tryagain; 6099 6100 status = puterrno4(error); 6101 return (status); 6102 } 6103 6104 if (mode == UNCHECKED4) { 6105 /* existing object must be regular file */ 6106 if ((*vpp)->v_type != VREG) { 6107 if ((*vpp)->v_type == VDIR) 6108 status = NFS4ERR_ISDIR; 6109 else if ((*vpp)->v_type == VLNK) 6110 status = NFS4ERR_SYMLINK; 6111 else 6112 status = NFS4ERR_INVAL; 6113 VN_RELE(*vpp); 6114 return (status); 6115 } 6116 6117 return (NFS4_OK); 6118 } 6119 6120 /* Check for duplicate request */ 6121 ASSERT(mtime != 0); 6122 va.va_mask = AT_MTIME; 6123 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL); 6124 if (!error) { 6125 /* We found the file */ 6126 if (va.va_mtime.tv_sec != mtime->tv_sec || 6127 va.va_mtime.tv_nsec != mtime->tv_nsec) { 6128 /* but its not our creation */ 6129 VN_RELE(*vpp); 6130 return (NFS4ERR_EXIST); 6131 } 6132 *created = TRUE; /* retrans of create == created */ 6133 return (NFS4_OK); 6134 } 6135 VN_RELE(*vpp); 6136 return (NFS4ERR_EXIST); 6137 } 6138 6139 return (NFS4_OK); 6140 } 6141 6142 static nfsstat4 6143 check_open_access(uint32_t access, 6144 struct compound_state *cs, struct svc_req *req) 6145 { 6146 int error; 6147 vnode_t *vp; 6148 bool_t readonly; 6149 cred_t *cr = cs->cr; 6150 6151 /* For now we don't allow mandatory locking as per V2/V3 */ 6152 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) { 6153 return (NFS4ERR_ACCESS); 6154 } 6155 6156 vp = cs->vp; 6157 ASSERT(cr != NULL && vp->v_type == VREG); 6158 6159 /* 6160 * If the file system is exported read only and we are trying 6161 * to open for write, then return NFS4ERR_ROFS 6162 */ 6163 6164 readonly = rdonly4(cs->exi, cs->vp, req); 6165 6166 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly) 6167 return (NFS4ERR_ROFS); 6168 6169 if (access & OPEN4_SHARE_ACCESS_READ) { 6170 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) && 6171 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) { 6172 return (NFS4ERR_ACCESS); 6173 } 6174 } 6175 6176 if (access & OPEN4_SHARE_ACCESS_WRITE) { 6177 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 6178 if (error) 6179 return (NFS4ERR_ACCESS); 6180 } 6181 6182 return (NFS4_OK); 6183 } 6184 6185 static nfsstat4 6186 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, 6187 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid) 6188 { 6189 struct nfs4_svgetit_arg sarg; 6190 struct nfs4_ntov_table ntov; 6191 6192 bool_t ntov_table_init = FALSE; 6193 struct statvfs64 sb; 6194 nfsstat4 status; 6195 vnode_t *vp; 6196 vattr_t bva, ava, iva, cva, *vap; 6197 vnode_t *dvp; 6198 timespec32_t *mtime; 6199 char *nm = NULL; 6200 uint_t buflen; 6201 bool_t created; 6202 bool_t setsize = FALSE; 6203 len_t reqsize; 6204 int error; 6205 bool_t trunc; 6206 caller_context_t ct; 6207 component4 *component; 6208 bslabel_t *clabel; 6209 struct sockaddr *ca; 6210 char *name = NULL; 6211 6212 sarg.sbp = &sb; 6213 6214 dvp = cs->vp; 6215 6216 /* Check if the file system is read only */ 6217 if (rdonly4(cs->exi, dvp, req)) 6218 return (NFS4ERR_ROFS); 6219 6220 /* check the label of including directory */ 6221 if (is_system_labeled()) { 6222 ASSERT(req->rq_label != NULL); 6223 clabel = req->rq_label; 6224 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 6225 "got client label from request(1)", 6226 struct svc_req *, req); 6227 if (!blequal(&l_admin_low->tsl_label, clabel)) { 6228 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) { 6229 return (NFS4ERR_ACCESS); 6230 } 6231 } 6232 } 6233 6234 /* 6235 * Get the last component of path name in nm. cs will reference 6236 * the including directory on success. 6237 */ 6238 component = &args->open_claim4_u.file; 6239 if (!utf8_dir_verify(component)) 6240 return (NFS4ERR_INVAL); 6241 6242 nm = utf8_to_fn(component, &buflen, NULL); 6243 6244 if (nm == NULL) 6245 return (NFS4ERR_RESOURCE); 6246 6247 if (buflen > MAXNAMELEN) { 6248 kmem_free(nm, buflen); 6249 return (NFS4ERR_NAMETOOLONG); 6250 } 6251 6252 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ; 6253 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6254 if (error) { 6255 kmem_free(nm, buflen); 6256 return (puterrno4(error)); 6257 } 6258 6259 if (bva.va_type != VDIR) { 6260 kmem_free(nm, buflen); 6261 return (NFS4ERR_NOTDIR); 6262 } 6263 6264 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime) 6265 6266 switch (args->mode) { 6267 case GUARDED4: 6268 /*FALLTHROUGH*/ 6269 case UNCHECKED4: 6270 nfs4_ntov_table_init(&ntov); 6271 ntov_table_init = TRUE; 6272 6273 *attrset = 0; 6274 status = do_rfs4_set_attrs(attrset, 6275 &args->createhow4_u.createattrs, 6276 cs, &sarg, &ntov, NFS4ATTR_SETIT); 6277 6278 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) && 6279 sarg.vap->va_type != VREG) { 6280 if (sarg.vap->va_type == VDIR) 6281 status = NFS4ERR_ISDIR; 6282 else if (sarg.vap->va_type == VLNK) 6283 status = NFS4ERR_SYMLINK; 6284 else 6285 status = NFS4ERR_INVAL; 6286 } 6287 6288 if (status != NFS4_OK) { 6289 kmem_free(nm, buflen); 6290 nfs4_ntov_table_free(&ntov, &sarg); 6291 *attrset = 0; 6292 return (status); 6293 } 6294 6295 vap = sarg.vap; 6296 vap->va_type = VREG; 6297 vap->va_mask |= AT_TYPE; 6298 6299 if ((vap->va_mask & AT_MODE) == 0) { 6300 vap->va_mask |= AT_MODE; 6301 vap->va_mode = (mode_t)0600; 6302 } 6303 6304 if (vap->va_mask & AT_SIZE) { 6305 6306 /* Disallow create with a non-zero size */ 6307 6308 if ((reqsize = sarg.vap->va_size) != 0) { 6309 kmem_free(nm, buflen); 6310 nfs4_ntov_table_free(&ntov, &sarg); 6311 *attrset = 0; 6312 return (NFS4ERR_INVAL); 6313 } 6314 setsize = TRUE; 6315 } 6316 break; 6317 6318 case EXCLUSIVE4: 6319 /* prohibit EXCL create of named attributes */ 6320 if (dvp->v_flag & V_XATTRDIR) { 6321 kmem_free(nm, buflen); 6322 *attrset = 0; 6323 return (NFS4ERR_INVAL); 6324 } 6325 6326 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE; 6327 cva.va_type = VREG; 6328 /* 6329 * Ensure no time overflows. Assumes underlying 6330 * filesystem supports at least 32 bits. 6331 * Truncate nsec to usec resolution to allow valid 6332 * compares even if the underlying filesystem truncates. 6333 */ 6334 mtime = (timespec32_t *)&args->createhow4_u.createverf; 6335 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX; 6336 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000; 6337 cva.va_mode = (mode_t)0; 6338 vap = &cva; 6339 break; 6340 } 6341 6342 /* If necessary, convert to UTF-8 for illbehaved clients */ 6343 6344 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6345 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6346 MAXPATHLEN + 1); 6347 6348 if (name == NULL) { 6349 kmem_free(nm, buflen); 6350 return (NFS4ERR_SERVERFAULT); 6351 } 6352 6353 status = create_vnode(dvp, name, vap, args->mode, mtime, 6354 cs->cr, &vp, &created); 6355 if (nm != name) 6356 kmem_free(name, MAXPATHLEN + 1); 6357 kmem_free(nm, buflen); 6358 6359 if (status != NFS4_OK) { 6360 if (ntov_table_init) 6361 nfs4_ntov_table_free(&ntov, &sarg); 6362 *attrset = 0; 6363 return (status); 6364 } 6365 6366 trunc = (setsize && !created); 6367 6368 if (args->mode != EXCLUSIVE4) { 6369 bitmap4 createmask = args->createhow4_u.createattrs.attrmask; 6370 6371 /* 6372 * True verification that object was created with correct 6373 * attrs is impossible. The attrs could have been changed 6374 * immediately after object creation. If attributes did 6375 * not verify, the only recourse for the server is to 6376 * destroy the object. Maybe if some attrs (like gid) 6377 * are set incorrectly, the object should be destroyed; 6378 * however, seems bad as a default policy. Do we really 6379 * want to destroy an object over one of the times not 6380 * verifying correctly? For these reasons, the server 6381 * currently sets bits in attrset for createattrs 6382 * that were set; however, no verification is done. 6383 * 6384 * vmask_to_nmask accounts for vattr bits set on create 6385 * [do_rfs4_set_attrs() only sets resp bits for 6386 * non-vattr/vfs bits.] 6387 * Mask off any bits we set by default so as not to return 6388 * more attrset bits than were requested in createattrs 6389 */ 6390 if (created) { 6391 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset); 6392 *attrset &= createmask; 6393 } else { 6394 /* 6395 * We did not create the vnode (we tried but it 6396 * already existed). In this case, the only createattr 6397 * that the spec allows the server to set is size, 6398 * and even then, it can only be set if it is 0. 6399 */ 6400 *attrset = 0; 6401 if (trunc) 6402 *attrset = FATTR4_SIZE_MASK; 6403 } 6404 } 6405 if (ntov_table_init) 6406 nfs4_ntov_table_free(&ntov, &sarg); 6407 6408 /* 6409 * Get the initial "after" sequence number, if it fails, 6410 * set to zero, time to before. 6411 */ 6412 iva.va_mask = AT_CTIME|AT_SEQ; 6413 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) { 6414 iva.va_seq = 0; 6415 iva.va_ctime = bva.va_ctime; 6416 } 6417 6418 /* 6419 * create_vnode attempts to create the file exclusive, 6420 * if it already exists the VOP_CREATE will fail and 6421 * may not increase va_seq. It is atomic if 6422 * we haven't changed the directory, but if it has changed 6423 * we don't know what changed it. 6424 */ 6425 if (!created) { 6426 if (bva.va_seq && iva.va_seq && 6427 bva.va_seq == iva.va_seq) 6428 cinfo->atomic = TRUE; 6429 else 6430 cinfo->atomic = FALSE; 6431 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime); 6432 } else { 6433 /* 6434 * The entry was created, we need to sync the 6435 * directory metadata. 6436 */ 6437 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 6438 6439 /* 6440 * Get "after" change value, if it fails, simply return the 6441 * before value. 6442 */ 6443 ava.va_mask = AT_CTIME|AT_SEQ; 6444 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6445 ava.va_ctime = bva.va_ctime; 6446 ava.va_seq = 0; 6447 } 6448 6449 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6450 6451 /* 6452 * The cinfo->atomic = TRUE only if we have 6453 * non-zero va_seq's, and it has incremented by exactly one 6454 * during the create_vnode and it didn't 6455 * change during the VOP_FSYNC. 6456 */ 6457 if (bva.va_seq && iva.va_seq && ava.va_seq && 6458 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 6459 cinfo->atomic = TRUE; 6460 else 6461 cinfo->atomic = FALSE; 6462 } 6463 6464 /* Check for mandatory locking and that the size gets set. */ 6465 cva.va_mask = AT_MODE; 6466 if (setsize) 6467 cva.va_mask |= AT_SIZE; 6468 6469 /* Assume the worst */ 6470 cs->mandlock = TRUE; 6471 6472 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) { 6473 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode); 6474 6475 /* 6476 * Truncate the file if necessary; this would be 6477 * the case for create over an existing file. 6478 */ 6479 6480 if (trunc) { 6481 int in_crit = 0; 6482 rfs4_file_t *fp; 6483 bool_t create = FALSE; 6484 6485 /* 6486 * We are writing over an existing file. 6487 * Check to see if we need to recall a delegation. 6488 */ 6489 rfs4_hold_deleg_policy(); 6490 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { 6491 if (rfs4_check_delegated_byfp(FWRITE, fp, 6492 (reqsize == 0), FALSE, FALSE, &clientid)) { 6493 rfs4_file_rele(fp); 6494 rfs4_rele_deleg_policy(); 6495 VN_RELE(vp); 6496 *attrset = 0; 6497 return (NFS4ERR_DELAY); 6498 } 6499 rfs4_file_rele(fp); 6500 } 6501 rfs4_rele_deleg_policy(); 6502 6503 if (nbl_need_check(vp)) { 6504 in_crit = 1; 6505 6506 ASSERT(reqsize == 0); 6507 6508 nbl_start_crit(vp, RW_READER); 6509 if (nbl_conflict(vp, NBL_WRITE, 0, 6510 cva.va_size, 0, NULL)) { 6511 in_crit = 0; 6512 nbl_end_crit(vp); 6513 VN_RELE(vp); 6514 *attrset = 0; 6515 return (NFS4ERR_ACCESS); 6516 } 6517 } 6518 ct.cc_sysid = 0; 6519 ct.cc_pid = 0; 6520 ct.cc_caller_id = nfs4_srv_caller_id; 6521 ct.cc_flags = CC_DONTBLOCK; 6522 6523 cva.va_mask = AT_SIZE; 6524 cva.va_size = reqsize; 6525 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct); 6526 if (in_crit) 6527 nbl_end_crit(vp); 6528 } 6529 } 6530 6531 error = makefh4(&cs->fh, vp, cs->exi); 6532 6533 /* 6534 * Force modified data and metadata out to stable storage. 6535 */ 6536 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 6537 6538 if (error) { 6539 VN_RELE(vp); 6540 *attrset = 0; 6541 return (puterrno4(error)); 6542 } 6543 6544 /* if parent dir is attrdir, set namedattr fh flag */ 6545 if (dvp->v_flag & V_XATTRDIR) 6546 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 6547 6548 if (cs->vp) 6549 VN_RELE(cs->vp); 6550 6551 cs->vp = vp; 6552 6553 /* 6554 * if we did not create the file, we will need to check 6555 * the access bits on the file 6556 */ 6557 6558 if (!created) { 6559 if (setsize) 6560 args->share_access |= OPEN4_SHARE_ACCESS_WRITE; 6561 status = check_open_access(args->share_access, cs, req); 6562 if (status != NFS4_OK) 6563 *attrset = 0; 6564 } 6565 return (status); 6566 } 6567 6568 /*ARGSUSED*/ 6569 static void 6570 rfs4_do_open(struct compound_state *cs, struct svc_req *req, 6571 rfs4_openowner_t *oo, delegreq_t deleg, 6572 uint32_t access, uint32_t deny, 6573 OPEN4res *resp, int deleg_cur) 6574 { 6575 /* XXX Currently not using req */ 6576 rfs4_state_t *state; 6577 rfs4_file_t *file; 6578 bool_t screate = TRUE; 6579 bool_t fcreate = TRUE; 6580 uint32_t amodes; 6581 uint32_t dmodes; 6582 rfs4_deleg_state_t *dsp; 6583 struct shrlock shr; 6584 struct shr_locowner shr_loco; 6585 sysid_t sysid; 6586 nfsstat4 status; 6587 caller_context_t ct; 6588 int fflags = 0; 6589 int recall = 0; 6590 int err; 6591 int cmd; 6592 6593 /* get the file struct and hold a lock on it during initial open */ 6594 file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate); 6595 if (file == NULL) { 6596 NFS4_DEBUG(rfs4_debug, 6597 (CE_NOTE, "rfs4_do_open: can't find file")); 6598 resp->status = NFS4ERR_SERVERFAULT; 6599 return; 6600 } 6601 6602 state = rfs4_findstate_by_owner_file(oo, file, &screate); 6603 if (state == NULL) { 6604 NFS4_DEBUG(rfs4_debug, 6605 (CE_NOTE, "rfs4_do_open: can't find state")); 6606 resp->status = NFS4ERR_RESOURCE; 6607 /* No need to keep any reference */ 6608 rfs4_file_rele_withunlock(file); 6609 return; 6610 } 6611 6612 /* try to get the sysid before continuing */ 6613 if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) { 6614 resp->status = status; 6615 rfs4_file_rele(file); 6616 /* Not a fully formed open; "close" it */ 6617 if (screate == TRUE) 6618 rfs4_state_close(state, FALSE, FALSE, cs->cr); 6619 rfs4_state_rele(state); 6620 return; 6621 } 6622 6623 /* Calculate the fflags for this OPEN. */ 6624 if (access & OPEN4_SHARE_ACCESS_READ) 6625 fflags |= FREAD; 6626 if (access & OPEN4_SHARE_ACCESS_WRITE) 6627 fflags |= FWRITE; 6628 6629 /* 6630 * Calculate the new deny and access mode that this open is adding to 6631 * the file for this open owner; 6632 */ 6633 dmodes = (deny & ~state->share_deny); 6634 amodes = (access & ~state->share_access); 6635 6636 /* 6637 * Check to see the client has already sent an open for this 6638 * open owner on this file with the same share/deny modes. 6639 * If so, we don't need to check for a conflict and we don't 6640 * need to add another shrlock. If not, then we need to 6641 * check for conflicts in deny and access before checking for 6642 * conflicts in delegation. We don't want to recall a 6643 * delegation based on an open that will eventually fail based 6644 * on shares modes. 6645 */ 6646 6647 if (dmodes || amodes) { 6648 shr.s_access = (short)access; 6649 shr.s_deny = (short)deny; 6650 shr.s_pid = rfs4_dbe_getid(oo->dbe); 6651 shr.s_sysid = sysid; 6652 shr_loco.sl_pid = shr.s_pid; 6653 shr_loco.sl_id = shr.s_sysid; 6654 shr.s_owner = (caddr_t)&shr_loco; 6655 shr.s_own_len = sizeof (shr_loco); 6656 6657 cmd = nbl_need_check(cs->vp) ? F_SHARE_NBMAND : F_SHARE; 6658 if ((err = vop_shrlock(cs->vp, cmd, &shr, fflags)) != 0) { 6659 6660 resp->status = err == EAGAIN ? 6661 NFS4ERR_SHARE_DENIED : puterrno4(err); 6662 6663 rfs4_file_rele(file); 6664 /* Not a fully formed open; "close" it */ 6665 if (screate == TRUE) 6666 rfs4_state_close(state, FALSE, FALSE, cs->cr); 6667 rfs4_state_rele(state); 6668 return; 6669 } 6670 } 6671 6672 rfs4_dbe_lock(state->dbe); 6673 rfs4_dbe_lock(file->dbe); 6674 6675 /* 6676 * Check to see if this file is delegated and if so, if a 6677 * recall needs to be done. 6678 */ 6679 if (rfs4_check_recall(state, access)) { 6680 rfs4_dbe_unlock(file->dbe); 6681 rfs4_dbe_unlock(state->dbe); 6682 rfs4_recall_deleg(file, FALSE, state->owner->client); 6683 delay(NFS4_DELEGATION_CONFLICT_DELAY); 6684 rfs4_dbe_lock(state->dbe); 6685 rfs4_dbe_lock(file->dbe); 6686 /* Let's see if the delegation was returned */ 6687 if (rfs4_check_recall(state, access)) { 6688 rfs4_dbe_unlock(file->dbe); 6689 rfs4_dbe_unlock(state->dbe); 6690 rfs4_file_rele(file); 6691 rfs4_update_lease(state->owner->client); 6692 (void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags); 6693 /* Not a fully formed open; "close" it */ 6694 if (screate == TRUE) 6695 rfs4_state_close(state, FALSE, FALSE, cs->cr); 6696 rfs4_state_rele(state); 6697 resp->status = NFS4ERR_DELAY; 6698 return; 6699 } 6700 } 6701 /* 6702 * the share check passed and any delegation conflict has been 6703 * taken care of, now call vop_open. 6704 * if this is the first open then call vop_open with fflags. 6705 * if not, call vn_open_upgrade with just the upgrade flags. 6706 * 6707 * if the file has been opened already, it will have the current 6708 * access mode in the state struct. if it has no share access, then 6709 * this is a new open. 6710 * 6711 * However, if this is open with CLAIM_DLEGATE_CUR, then don't 6712 * call VOP_OPEN(), just do the open upgrade. 6713 */ 6714 if (((state->share_access & OPEN4_SHARE_ACCESS_BOTH) == 0) && 6715 !deleg_cur) { 6716 ct.cc_sysid = sysid; 6717 ct.cc_pid = shr.s_pid; 6718 ct.cc_caller_id = nfs4_srv_caller_id; 6719 ct.cc_flags = CC_DONTBLOCK; 6720 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct); 6721 if (err) { 6722 rfs4_dbe_unlock(file->dbe); 6723 rfs4_dbe_unlock(state->dbe); 6724 rfs4_file_rele(file); 6725 (void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags); 6726 /* Not a fully formed open; "close" it */ 6727 if (screate == TRUE) 6728 rfs4_state_close(state, FALSE, FALSE, cs->cr); 6729 rfs4_state_rele(state); 6730 /* check if a monitor detected a delegation conflict */ 6731 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 6732 resp->status = NFS4ERR_DELAY; 6733 else 6734 resp->status = NFS4ERR_SERVERFAULT; 6735 return; 6736 } 6737 } else { /* open upgrade */ 6738 /* 6739 * calculate the fflags for the new mode that is being added 6740 * by this upgrade. 6741 */ 6742 fflags = 0; 6743 if (amodes & OPEN4_SHARE_ACCESS_READ) 6744 fflags |= FREAD; 6745 if (amodes & OPEN4_SHARE_ACCESS_WRITE) 6746 fflags |= FWRITE; 6747 vn_open_upgrade(cs->vp, fflags); 6748 } 6749 6750 if (dmodes & OPEN4_SHARE_DENY_READ) 6751 file->deny_read++; 6752 if (dmodes & OPEN4_SHARE_DENY_WRITE) 6753 file->deny_write++; 6754 file->share_deny |= deny; 6755 state->share_deny |= deny; 6756 6757 if (amodes & OPEN4_SHARE_ACCESS_READ) 6758 file->access_read++; 6759 if (amodes & OPEN4_SHARE_ACCESS_WRITE) 6760 file->access_write++; 6761 file->share_access |= access; 6762 state->share_access |= access; 6763 6764 /* 6765 * Check for delegation here. if the deleg argument is not 6766 * DELEG_ANY, then this is a reclaim from a client and 6767 * we must honor the delegation requested. If necessary we can 6768 * set the recall flag. 6769 */ 6770 6771 dsp = rfs4_grant_delegation(deleg, state, &recall); 6772 6773 cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE); 6774 6775 next_stateid(&state->stateid); 6776 6777 resp->stateid = state->stateid.stateid; 6778 6779 rfs4_dbe_unlock(file->dbe); 6780 rfs4_dbe_unlock(state->dbe); 6781 6782 if (dsp) { 6783 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall); 6784 rfs4_deleg_state_rele(dsp); 6785 } 6786 6787 rfs4_file_rele(file); 6788 rfs4_state_rele(state); 6789 6790 resp->status = NFS4_OK; 6791 } 6792 6793 /*ARGSUSED*/ 6794 static void 6795 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req, 6796 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 6797 { 6798 change_info4 *cinfo = &resp->cinfo; 6799 bitmap4 *attrset = &resp->attrset; 6800 6801 if (args->opentype == OPEN4_NOCREATE) 6802 resp->status = rfs4_lookupfile(&args->open_claim4_u.file, 6803 req, cs, args->share_access, cinfo); 6804 else { 6805 /* inhibit delegation grants during exclusive create */ 6806 6807 if (args->mode == EXCLUSIVE4) 6808 rfs4_disable_delegation(); 6809 6810 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset, 6811 oo->client->clientid); 6812 } 6813 6814 if (resp->status == NFS4_OK) { 6815 6816 /* cs->vp cs->fh now reference the desired file */ 6817 6818 rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access, 6819 args->share_deny, resp, 0); 6820 6821 /* 6822 * If rfs4_createfile set attrset, we must 6823 * clear this attrset before the response is copied. 6824 */ 6825 if (resp->status != NFS4_OK && resp->attrset) { 6826 resp->attrset = 0; 6827 } 6828 } 6829 else 6830 *cs->statusp = resp->status; 6831 6832 if (args->mode == EXCLUSIVE4) 6833 rfs4_enable_delegation(); 6834 } 6835 6836 /*ARGSUSED*/ 6837 static void 6838 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req, 6839 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 6840 { 6841 change_info4 *cinfo = &resp->cinfo; 6842 vattr_t va; 6843 vtype_t v_type = cs->vp->v_type; 6844 int error = 0; 6845 6846 /* Verify that we have a regular file */ 6847 if (v_type != VREG) { 6848 if (v_type == VDIR) 6849 resp->status = NFS4ERR_ISDIR; 6850 else if (v_type == VLNK) 6851 resp->status = NFS4ERR_SYMLINK; 6852 else 6853 resp->status = NFS4ERR_INVAL; 6854 return; 6855 } 6856 6857 va.va_mask = AT_MODE|AT_UID; 6858 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL); 6859 if (error) { 6860 resp->status = puterrno4(error); 6861 return; 6862 } 6863 6864 cs->mandlock = MANDLOCK(cs->vp, va.va_mode); 6865 6866 /* 6867 * Check if we have access to the file, Note the the file 6868 * could have originally been open UNCHECKED or GUARDED 6869 * with mode bits that will now fail, but there is nothing 6870 * we can really do about that except in the case that the 6871 * owner of the file is the one requesting the open. 6872 */ 6873 if (crgetuid(cs->cr) != va.va_uid) { 6874 resp->status = check_open_access(args->share_access, cs, req); 6875 if (resp->status != NFS4_OK) { 6876 return; 6877 } 6878 } 6879 6880 /* 6881 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero 6882 */ 6883 cinfo->before = 0; 6884 cinfo->after = 0; 6885 cinfo->atomic = FALSE; 6886 6887 rfs4_do_open(cs, req, oo, 6888 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type), 6889 args->share_access, args->share_deny, resp, 0); 6890 } 6891 6892 static void 6893 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req, 6894 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 6895 { 6896 int error; 6897 nfsstat4 status; 6898 stateid4 stateid = 6899 args->open_claim4_u.delegate_cur_info.delegate_stateid; 6900 rfs4_deleg_state_t *dsp; 6901 6902 /* 6903 * Find the state info from the stateid and confirm that the 6904 * file is delegated. If the state openowner is the same as 6905 * the supplied openowner we're done. If not, get the file 6906 * info from the found state info. Use that file info to 6907 * create the state for this lock owner. Note solaris doen't 6908 * really need the pathname to find the file. We may want to 6909 * lookup the pathname and make sure that the vp exist and 6910 * matches the vp in the file structure. However it is 6911 * possible that the pathname nolonger exists (local process 6912 * unlinks the file), so this may not be that useful. 6913 */ 6914 6915 status = rfs4_get_deleg_state(&stateid, &dsp); 6916 if (status != NFS4_OK) { 6917 resp->status = status; 6918 return; 6919 } 6920 6921 ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE); 6922 6923 /* 6924 * New lock owner, create state. Since this was probably called 6925 * in response to a CB_RECALL we set deleg to DELEG_NONE 6926 */ 6927 6928 ASSERT(cs->vp != NULL); 6929 VN_RELE(cs->vp); 6930 VN_HOLD(dsp->finfo->vp); 6931 cs->vp = dsp->finfo->vp; 6932 6933 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) { 6934 rfs4_deleg_state_rele(dsp); 6935 *cs->statusp = resp->status = puterrno4(error); 6936 return; 6937 } 6938 6939 /* Mark progress for delegation returns */ 6940 dsp->finfo->dinfo->time_lastwrite = gethrestime_sec(); 6941 rfs4_deleg_state_rele(dsp); 6942 rfs4_do_open(cs, req, oo, DELEG_NONE, 6943 args->share_access, args->share_deny, resp, 1); 6944 } 6945 6946 /*ARGSUSED*/ 6947 static void 6948 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req, 6949 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 6950 { 6951 /* 6952 * Lookup the pathname, it must already exist since this file 6953 * was delegated. 6954 * 6955 * Find the file and state info for this vp and open owner pair. 6956 * check that they are in fact delegated. 6957 * check that the state access and deny modes are the same. 6958 * 6959 * Return the delgation possibly seting the recall flag. 6960 */ 6961 rfs4_file_t *file; 6962 rfs4_state_t *state; 6963 bool_t create = FALSE; 6964 bool_t dcreate = FALSE; 6965 rfs4_deleg_state_t *dsp; 6966 nfsace4 *ace; 6967 6968 6969 /* Note we ignore oflags */ 6970 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev, 6971 req, cs, args->share_access, &resp->cinfo); 6972 6973 if (resp->status != NFS4_OK) { 6974 return; 6975 } 6976 6977 /* get the file struct and hold a lock on it during initial open */ 6978 file = rfs4_findfile_withlock(cs->vp, NULL, &create); 6979 if (file == NULL) { 6980 NFS4_DEBUG(rfs4_debug, 6981 (CE_NOTE, "rfs4_do_opendelprev: can't find file")); 6982 resp->status = NFS4ERR_SERVERFAULT; 6983 return; 6984 } 6985 6986 state = rfs4_findstate_by_owner_file(oo, file, &create); 6987 if (state == NULL) { 6988 NFS4_DEBUG(rfs4_debug, 6989 (CE_NOTE, "rfs4_do_opendelprev: can't find state")); 6990 resp->status = NFS4ERR_SERVERFAULT; 6991 rfs4_file_rele_withunlock(file); 6992 return; 6993 } 6994 6995 rfs4_dbe_lock(state->dbe); 6996 rfs4_dbe_lock(file->dbe); 6997 if (args->share_access != state->share_access || 6998 args->share_deny != state->share_deny || 6999 state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) { 7000 NFS4_DEBUG(rfs4_debug, 7001 (CE_NOTE, "rfs4_do_opendelprev: state mixup")); 7002 rfs4_dbe_unlock(file->dbe); 7003 rfs4_dbe_unlock(state->dbe); 7004 rfs4_file_rele(file); 7005 rfs4_state_rele(state); 7006 resp->status = NFS4ERR_SERVERFAULT; 7007 return; 7008 } 7009 rfs4_dbe_unlock(file->dbe); 7010 rfs4_dbe_unlock(state->dbe); 7011 7012 dsp = rfs4_finddeleg(state, &dcreate); 7013 if (dsp == NULL) { 7014 rfs4_state_rele(state); 7015 rfs4_file_rele(file); 7016 resp->status = NFS4ERR_SERVERFAULT; 7017 return; 7018 } 7019 7020 next_stateid(&state->stateid); 7021 7022 resp->stateid = state->stateid.stateid; 7023 7024 resp->delegation.delegation_type = dsp->dtype; 7025 7026 if (dsp->dtype == OPEN_DELEGATE_READ) { 7027 open_read_delegation4 *rv = 7028 &resp->delegation.open_delegation4_u.read; 7029 7030 rv->stateid = dsp->delegid.stateid; 7031 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7032 ace = &rv->permissions; 7033 } else { 7034 open_write_delegation4 *rv = 7035 &resp->delegation.open_delegation4_u.write; 7036 7037 rv->stateid = dsp->delegid.stateid; 7038 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7039 ace = &rv->permissions; 7040 rv->space_limit.limitby = NFS_LIMIT_SIZE; 7041 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX; 7042 } 7043 7044 /* XXX For now */ 7045 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE; 7046 ace->flag = 0; 7047 ace->access_mask = 0; 7048 ace->who.utf8string_len = 0; 7049 ace->who.utf8string_val = 0; 7050 7051 rfs4_deleg_state_rele(dsp); 7052 rfs4_state_rele(state); 7053 rfs4_file_rele(file); 7054 } 7055 7056 typedef enum { 7057 NFS4_CHKSEQ_OKAY = 0, 7058 NFS4_CHKSEQ_REPLAY = 1, 7059 NFS4_CHKSEQ_BAD = 2 7060 } rfs4_chkseq_t; 7061 7062 /* 7063 * Generic function for sequence number checks. 7064 */ 7065 static rfs4_chkseq_t 7066 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop, 7067 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres) 7068 { 7069 /* Same sequence ids and matching operations? */ 7070 if (seqid == rqst_seq && resop->resop == lastop->resop) { 7071 if (copyres == TRUE) { 7072 rfs4_free_reply(resop); 7073 rfs4_copy_reply(resop, lastop); 7074 } 7075 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 7076 "Replayed SEQID %d\n", seqid)); 7077 return (NFS4_CHKSEQ_REPLAY); 7078 } 7079 7080 /* If the incoming sequence is not the next expected then it is bad */ 7081 if (rqst_seq != seqid + 1) { 7082 if (rqst_seq == seqid) { 7083 NFS4_DEBUG(rfs4_debug, 7084 (CE_NOTE, "BAD SEQID: Replayed sequence id " 7085 "but last op was %d current op is %d\n", 7086 lastop->resop, resop->resop)); 7087 return (NFS4_CHKSEQ_BAD); 7088 } 7089 NFS4_DEBUG(rfs4_debug, 7090 (CE_NOTE, "BAD SEQID: got %u expecting %u\n", 7091 rqst_seq, seqid)); 7092 return (NFS4_CHKSEQ_BAD); 7093 } 7094 7095 /* Everything okay -- next expected */ 7096 return (NFS4_CHKSEQ_OKAY); 7097 } 7098 7099 7100 static rfs4_chkseq_t 7101 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop) 7102 { 7103 rfs4_chkseq_t rc; 7104 7105 rfs4_dbe_lock(op->dbe); 7106 rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE); 7107 rfs4_dbe_unlock(op->dbe); 7108 7109 if (rc == NFS4_CHKSEQ_OKAY) 7110 rfs4_update_lease(op->client); 7111 7112 return (rc); 7113 } 7114 7115 static rfs4_chkseq_t 7116 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, 7117 nfs_resop4 *resop) 7118 { 7119 rfs4_chkseq_t rc; 7120 7121 rfs4_dbe_lock(op->dbe); 7122 rc = rfs4_check_seqid(op->open_seqid, op->reply, 7123 olo_seqid, resop, FALSE); 7124 rfs4_dbe_unlock(op->dbe); 7125 7126 return (rc); 7127 } 7128 7129 static rfs4_chkseq_t 7130 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop) 7131 { 7132 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY; 7133 7134 rfs4_dbe_lock(lp->dbe); 7135 if (!lp->skip_seqid_check) 7136 rc = rfs4_check_seqid(lp->seqid, lp->reply, seqid, resop, TRUE); 7137 rfs4_dbe_unlock(lp->dbe); 7138 7139 return (rc); 7140 } 7141 7142 static void 7143 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop, 7144 struct svc_req *req, struct compound_state *cs) 7145 { 7146 OPEN4args *args = &argop->nfs_argop4_u.opopen; 7147 OPEN4res *resp = &resop->nfs_resop4_u.opopen; 7148 open_owner4 *owner = &args->owner; 7149 open_claim_type4 claim = args->claim; 7150 rfs4_client_t *cp; 7151 rfs4_openowner_t *oo; 7152 bool_t create; 7153 bool_t replay = FALSE; 7154 int can_reclaim; 7155 7156 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs, 7157 OPEN4args *, args); 7158 7159 if (cs->vp == NULL) { 7160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7161 goto end; 7162 } 7163 7164 /* 7165 * Need to check clientid and lease expiration first based on 7166 * error ordering and incrementing sequence id. 7167 */ 7168 cp = rfs4_findclient_by_id(owner->clientid, FALSE); 7169 if (cp == NULL) { 7170 *cs->statusp = resp->status = 7171 rfs4_check_clientid(&owner->clientid, 0); 7172 goto end; 7173 } 7174 7175 if (rfs4_lease_expired(cp)) { 7176 rfs4_client_close(cp); 7177 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7178 goto end; 7179 } 7180 can_reclaim = cp->can_reclaim; 7181 7182 /* 7183 * Find the open_owner for use from this point forward. Take 7184 * care in updating the sequence id based on the type of error 7185 * being returned. 7186 */ 7187 retry: 7188 create = TRUE; 7189 oo = rfs4_findopenowner(owner, &create, args->seqid); 7190 if (oo == NULL) { 7191 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 7192 rfs4_client_rele(cp); 7193 goto end; 7194 } 7195 7196 /* Hold off access to the sequence space while the open is done */ 7197 rfs4_sw_enter(&oo->oo_sw); 7198 7199 /* 7200 * If the open_owner existed before at the server, then check 7201 * the sequence id. 7202 */ 7203 if (!create && !oo->postpone_confirm) { 7204 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) { 7205 case NFS4_CHKSEQ_BAD: 7206 if ((args->seqid > oo->open_seqid) && 7207 oo->need_confirm) { 7208 rfs4_free_opens(oo, TRUE, FALSE); 7209 rfs4_sw_exit(&oo->oo_sw); 7210 rfs4_openowner_rele(oo); 7211 goto retry; 7212 } 7213 resp->status = NFS4ERR_BAD_SEQID; 7214 goto out; 7215 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */ 7216 replay = TRUE; 7217 goto out; 7218 default: 7219 break; 7220 } 7221 7222 /* 7223 * Sequence was ok and open owner exists 7224 * check to see if we have yet to see an 7225 * open_confirm. 7226 */ 7227 if (oo->need_confirm) { 7228 rfs4_free_opens(oo, TRUE, FALSE); 7229 rfs4_sw_exit(&oo->oo_sw); 7230 rfs4_openowner_rele(oo); 7231 goto retry; 7232 } 7233 } 7234 /* Grace only applies to regular-type OPENs */ 7235 if (rfs4_clnt_in_grace(cp) && 7236 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) { 7237 *cs->statusp = resp->status = NFS4ERR_GRACE; 7238 goto out; 7239 } 7240 7241 /* 7242 * If previous state at the server existed then can_reclaim 7243 * will be set. If not reply NFS4ERR_NO_GRACE to the 7244 * client. 7245 */ 7246 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) { 7247 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7248 goto out; 7249 } 7250 7251 7252 /* 7253 * Reject the open if the client has missed the grace period 7254 */ 7255 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) { 7256 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7257 goto out; 7258 } 7259 7260 /* Couple of up-front bookkeeping items */ 7261 if (oo->need_confirm) { 7262 /* 7263 * If this is a reclaim OPEN then we should not ask 7264 * for a confirmation of the open_owner per the 7265 * protocol specification. 7266 */ 7267 if (claim == CLAIM_PREVIOUS) 7268 oo->need_confirm = FALSE; 7269 else 7270 resp->rflags |= OPEN4_RESULT_CONFIRM; 7271 } 7272 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX; 7273 7274 /* 7275 * If there is an unshared filesystem mounted on this vnode, 7276 * do not allow to open/create in this directory. 7277 */ 7278 if (vn_ismntpt(cs->vp)) { 7279 *cs->statusp = resp->status = NFS4ERR_ACCESS; 7280 goto out; 7281 } 7282 7283 /* 7284 * access must READ, WRITE, or BOTH. No access is invalid. 7285 * deny can be READ, WRITE, BOTH, or NONE. 7286 * bits not defined for access/deny are invalid. 7287 */ 7288 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) || 7289 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) || 7290 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) { 7291 *cs->statusp = resp->status = NFS4ERR_INVAL; 7292 goto out; 7293 } 7294 7295 7296 /* 7297 * make sure attrset is zero before response is built. 7298 */ 7299 resp->attrset = 0; 7300 7301 switch (claim) { 7302 case CLAIM_NULL: 7303 rfs4_do_opennull(cs, req, args, oo, resp); 7304 break; 7305 case CLAIM_PREVIOUS: 7306 rfs4_do_openprev(cs, req, args, oo, resp); 7307 break; 7308 case CLAIM_DELEGATE_CUR: 7309 rfs4_do_opendelcur(cs, req, args, oo, resp); 7310 break; 7311 case CLAIM_DELEGATE_PREV: 7312 rfs4_do_opendelprev(cs, req, args, oo, resp); 7313 break; 7314 default: 7315 resp->status = NFS4ERR_INVAL; 7316 break; 7317 } 7318 7319 out: 7320 rfs4_client_rele(cp); 7321 7322 /* Catch sequence id handling here to make it a little easier */ 7323 switch (resp->status) { 7324 case NFS4ERR_BADXDR: 7325 case NFS4ERR_BAD_SEQID: 7326 case NFS4ERR_BAD_STATEID: 7327 case NFS4ERR_NOFILEHANDLE: 7328 case NFS4ERR_RESOURCE: 7329 case NFS4ERR_STALE_CLIENTID: 7330 case NFS4ERR_STALE_STATEID: 7331 /* 7332 * The protocol states that if any of these errors are 7333 * being returned, the sequence id should not be 7334 * incremented. Any other return requires an 7335 * increment. 7336 */ 7337 break; 7338 default: 7339 /* Always update the lease in this case */ 7340 rfs4_update_lease(oo->client); 7341 7342 /* Regular response - copy the result */ 7343 if (!replay) 7344 rfs4_update_open_resp(oo, resop, &cs->fh); 7345 7346 /* 7347 * REPLAY case: Only if the previous response was OK 7348 * do we copy the filehandle. If not OK, no 7349 * filehandle to copy. 7350 */ 7351 if (replay == TRUE && 7352 resp->status == NFS4_OK && 7353 oo->reply_fh.nfs_fh4_val) { 7354 /* 7355 * If this is a replay, we must restore the 7356 * current filehandle/vp to that of what was 7357 * returned originally. Try our best to do 7358 * it. 7359 */ 7360 nfs_fh4_fmt_t *fh_fmtp = 7361 (nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val; 7362 7363 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, 7364 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 7365 7366 if (cs->exi == NULL) { 7367 resp->status = NFS4ERR_STALE; 7368 goto finish; 7369 } 7370 7371 VN_RELE(cs->vp); 7372 7373 cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi, 7374 &resp->status); 7375 7376 if (cs->vp == NULL) 7377 goto finish; 7378 7379 nfs_fh4_copy(&oo->reply_fh, &cs->fh); 7380 } 7381 7382 /* 7383 * If this was a replay, no need to update the 7384 * sequence id. If the open_owner was not created on 7385 * this pass, then update. The first use of an 7386 * open_owner will not bump the sequence id. 7387 */ 7388 if (replay == FALSE && !create) 7389 rfs4_update_open_sequence(oo); 7390 /* 7391 * If the client is receiving an error and the 7392 * open_owner needs to be confirmed, there is no way 7393 * to notify the client of this fact ignoring the fact 7394 * that the server has no method of returning a 7395 * stateid to confirm. Therefore, the server needs to 7396 * mark this open_owner in a way as to avoid the 7397 * sequence id checking the next time the client uses 7398 * this open_owner. 7399 */ 7400 if (resp->status != NFS4_OK && oo->need_confirm) 7401 oo->postpone_confirm = TRUE; 7402 /* 7403 * If OK response then clear the postpone flag and 7404 * reset the sequence id to keep in sync with the 7405 * client. 7406 */ 7407 if (resp->status == NFS4_OK && oo->postpone_confirm) { 7408 oo->postpone_confirm = FALSE; 7409 oo->open_seqid = args->seqid; 7410 } 7411 break; 7412 } 7413 7414 finish: 7415 *cs->statusp = resp->status; 7416 7417 rfs4_sw_exit(&oo->oo_sw); 7418 rfs4_openowner_rele(oo); 7419 7420 end: 7421 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs, 7422 OPEN4res *, resp); 7423 } 7424 7425 /*ARGSUSED*/ 7426 void 7427 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 7428 struct svc_req *req, struct compound_state *cs) 7429 { 7430 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm; 7431 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm; 7432 rfs4_state_t *sp; 7433 nfsstat4 status; 7434 7435 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs, 7436 OPEN_CONFIRM4args *, args); 7437 7438 if (cs->vp == NULL) { 7439 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7440 goto out; 7441 } 7442 7443 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7444 if (status != NFS4_OK) { 7445 *cs->statusp = resp->status = status; 7446 goto out; 7447 } 7448 7449 /* Ensure specified filehandle matches */ 7450 if (cs->vp != sp->finfo->vp) { 7451 rfs4_state_rele(sp); 7452 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7453 goto out; 7454 } 7455 7456 /* hold off other access to open_owner while we tinker */ 7457 rfs4_sw_enter(&sp->owner->oo_sw); 7458 7459 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) { 7460 case NFS4_CHECK_STATEID_OKAY: 7461 if (rfs4_check_open_seqid(args->seqid, sp->owner, 7462 resop) != 0) { 7463 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7464 break; 7465 } 7466 /* 7467 * If it is the appropriate stateid and determined to 7468 * be "OKAY" then this means that the stateid does not 7469 * need to be confirmed and the client is in error for 7470 * sending an OPEN_CONFIRM. 7471 */ 7472 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7473 break; 7474 case NFS4_CHECK_STATEID_OLD: 7475 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7476 break; 7477 case NFS4_CHECK_STATEID_BAD: 7478 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7479 break; 7480 case NFS4_CHECK_STATEID_EXPIRED: 7481 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7482 break; 7483 case NFS4_CHECK_STATEID_CLOSED: 7484 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7485 break; 7486 case NFS4_CHECK_STATEID_REPLAY: 7487 switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) { 7488 case NFS4_CHKSEQ_OKAY: 7489 /* 7490 * This is replayed stateid; if seqid matches 7491 * next expected, then client is using wrong seqid. 7492 */ 7493 /* fall through */ 7494 case NFS4_CHKSEQ_BAD: 7495 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7496 break; 7497 case NFS4_CHKSEQ_REPLAY: 7498 /* 7499 * Note this case is the duplicate case so 7500 * resp->status is already set. 7501 */ 7502 *cs->statusp = resp->status; 7503 rfs4_update_lease(sp->owner->client); 7504 break; 7505 } 7506 break; 7507 case NFS4_CHECK_STATEID_UNCONFIRMED: 7508 if (rfs4_check_open_seqid(args->seqid, sp->owner, 7509 resop) != NFS4_CHKSEQ_OKAY) { 7510 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7511 break; 7512 } 7513 *cs->statusp = resp->status = NFS4_OK; 7514 7515 next_stateid(&sp->stateid); 7516 resp->open_stateid = sp->stateid.stateid; 7517 sp->owner->need_confirm = FALSE; 7518 rfs4_update_lease(sp->owner->client); 7519 rfs4_update_open_sequence(sp->owner); 7520 rfs4_update_open_resp(sp->owner, resop, NULL); 7521 break; 7522 default: 7523 ASSERT(FALSE); 7524 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 7525 break; 7526 } 7527 rfs4_sw_exit(&sp->owner->oo_sw); 7528 rfs4_state_rele(sp); 7529 7530 out: 7531 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs, 7532 OPEN_CONFIRM4res *, resp); 7533 } 7534 7535 /*ARGSUSED*/ 7536 void 7537 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop, 7538 struct svc_req *req, struct compound_state *cs) 7539 { 7540 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade; 7541 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade; 7542 uint32_t access = args->share_access; 7543 uint32_t deny = args->share_deny; 7544 nfsstat4 status; 7545 rfs4_state_t *sp; 7546 rfs4_file_t *fp; 7547 int fflags = 0; 7548 7549 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs, 7550 OPEN_DOWNGRADE4args *, args); 7551 7552 if (cs->vp == NULL) { 7553 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7554 goto out; 7555 } 7556 7557 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7558 if (status != NFS4_OK) { 7559 *cs->statusp = resp->status = status; 7560 goto out; 7561 } 7562 7563 /* Ensure specified filehandle matches */ 7564 if (cs->vp != sp->finfo->vp) { 7565 rfs4_state_rele(sp); 7566 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7567 goto out; 7568 } 7569 7570 /* hold off other access to open_owner while we tinker */ 7571 rfs4_sw_enter(&sp->owner->oo_sw); 7572 7573 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) { 7574 case NFS4_CHECK_STATEID_OKAY: 7575 if (rfs4_check_open_seqid(args->seqid, sp->owner, 7576 resop) != NFS4_CHKSEQ_OKAY) { 7577 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7578 goto end; 7579 } 7580 break; 7581 case NFS4_CHECK_STATEID_OLD: 7582 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7583 goto end; 7584 case NFS4_CHECK_STATEID_BAD: 7585 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7586 goto end; 7587 case NFS4_CHECK_STATEID_EXPIRED: 7588 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7589 goto end; 7590 case NFS4_CHECK_STATEID_CLOSED: 7591 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7592 goto end; 7593 case NFS4_CHECK_STATEID_UNCONFIRMED: 7594 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7595 goto end; 7596 case NFS4_CHECK_STATEID_REPLAY: 7597 /* Check the sequence id for the open owner */ 7598 switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) { 7599 case NFS4_CHKSEQ_OKAY: 7600 /* 7601 * This is replayed stateid; if seqid matches 7602 * next expected, then client is using wrong seqid. 7603 */ 7604 /* fall through */ 7605 case NFS4_CHKSEQ_BAD: 7606 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7607 goto end; 7608 case NFS4_CHKSEQ_REPLAY: 7609 /* 7610 * Note this case is the duplicate case so 7611 * resp->status is already set. 7612 */ 7613 *cs->statusp = resp->status; 7614 rfs4_update_lease(sp->owner->client); 7615 goto end; 7616 } 7617 break; 7618 default: 7619 ASSERT(FALSE); 7620 break; 7621 } 7622 7623 rfs4_dbe_lock(sp->dbe); 7624 /* 7625 * Check that the new access modes and deny modes are valid. 7626 * Check that no invalid bits are set. 7627 */ 7628 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) || 7629 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) { 7630 *cs->statusp = resp->status = NFS4ERR_INVAL; 7631 rfs4_update_open_sequence(sp->owner); 7632 rfs4_dbe_unlock(sp->dbe); 7633 goto end; 7634 } 7635 7636 /* 7637 * The new modes must be a subset of the current modes and 7638 * the access must specify at least one mode. To test that 7639 * the new mode is a subset of the current modes we bitwise 7640 * AND them together and check that the result equals the new 7641 * mode. For example: 7642 * New mode, access == R and current mode, sp->share_access == RW 7643 * access & sp->share_access == R == access, so the new access mode 7644 * is valid. Consider access == RW, sp->share_access = R 7645 * access & sp->share_access == R != access, so the new access mode 7646 * is invalid. 7647 */ 7648 if ((access & sp->share_access) != access || 7649 (deny & sp->share_deny) != deny || 7650 (access & 7651 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) { 7652 *cs->statusp = resp->status = NFS4ERR_INVAL; 7653 rfs4_update_open_sequence(sp->owner); 7654 rfs4_dbe_unlock(sp->dbe); 7655 goto end; 7656 } 7657 7658 /* 7659 * Release any share locks associated with this stateID. 7660 * Strictly speaking, this violates the spec because the 7661 * spec effectively requires that open downgrade be atomic. 7662 * At present, fs_shrlock does not have this capability. 7663 */ 7664 rfs4_dbe_unlock(sp->dbe); 7665 rfs4_unshare(sp); 7666 rfs4_dbe_lock(sp->dbe); 7667 7668 fp = sp->finfo; 7669 rfs4_dbe_lock(fp->dbe); 7670 7671 /* 7672 * If the current mode has deny read and the new mode 7673 * does not, decrement the number of deny read mode bits 7674 * and if it goes to zero turn off the deny read bit 7675 * on the file. 7676 */ 7677 if ((sp->share_deny & OPEN4_SHARE_DENY_READ) && 7678 (deny & OPEN4_SHARE_DENY_READ) == 0) { 7679 fp->deny_read--; 7680 if (fp->deny_read == 0) 7681 fp->share_deny &= ~OPEN4_SHARE_DENY_READ; 7682 } 7683 7684 /* 7685 * If the current mode has deny write and the new mode 7686 * does not, decrement the number of deny write mode bits 7687 * and if it goes to zero turn off the deny write bit 7688 * on the file. 7689 */ 7690 if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) && 7691 (deny & OPEN4_SHARE_DENY_WRITE) == 0) { 7692 fp->deny_write--; 7693 if (fp->deny_write == 0) 7694 fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE; 7695 } 7696 7697 /* 7698 * If the current mode has access read and the new mode 7699 * does not, decrement the number of access read mode bits 7700 * and if it goes to zero turn off the access read bit 7701 * on the file. set fflags to FREAD for the call to 7702 * vn_open_downgrade(). 7703 */ 7704 if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) && 7705 (access & OPEN4_SHARE_ACCESS_READ) == 0) { 7706 fp->access_read--; 7707 if (fp->access_read == 0) 7708 fp->share_access &= ~OPEN4_SHARE_ACCESS_READ; 7709 fflags |= FREAD; 7710 } 7711 7712 /* 7713 * If the current mode has access write and the new mode 7714 * does not, decrement the number of access write mode bits 7715 * and if it goes to zero turn off the access write bit 7716 * on the file. set fflags to FWRITE for the call to 7717 * vn_open_downgrade(). 7718 */ 7719 if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) && 7720 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) { 7721 fp->access_write--; 7722 if (fp->access_write == 0) 7723 fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE; 7724 fflags |= FWRITE; 7725 } 7726 7727 /* Set the new access and deny modes */ 7728 sp->share_access = access; 7729 sp->share_deny = deny; 7730 /* Check that the file is still accessible */ 7731 ASSERT(fp->share_access); 7732 7733 rfs4_dbe_unlock(fp->dbe); 7734 7735 rfs4_dbe_unlock(sp->dbe); 7736 7737 if ((status = rfs4_share(sp)) != NFS4_OK) { 7738 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 7739 rfs4_update_open_sequence(sp->owner); 7740 goto end; 7741 } 7742 7743 /* 7744 * we successfully downgraded the share lock, now we need to downgrade 7745 * the open. it is possible that the downgrade was only for a deny 7746 * mode and we have nothing else to do. 7747 */ 7748 if ((fflags & (FREAD|FWRITE)) != 0) 7749 vn_open_downgrade(cs->vp, fflags); 7750 7751 rfs4_dbe_lock(sp->dbe); 7752 7753 /* Update the stateid */ 7754 next_stateid(&sp->stateid); 7755 resp->open_stateid = sp->stateid.stateid; 7756 7757 rfs4_dbe_unlock(sp->dbe); 7758 7759 *cs->statusp = resp->status = NFS4_OK; 7760 /* Update the lease */ 7761 rfs4_update_lease(sp->owner->client); 7762 /* And the sequence */ 7763 rfs4_update_open_sequence(sp->owner); 7764 rfs4_update_open_resp(sp->owner, resop, NULL); 7765 7766 end: 7767 rfs4_sw_exit(&sp->owner->oo_sw); 7768 rfs4_state_rele(sp); 7769 out: 7770 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs, 7771 OPEN_DOWNGRADE4res *, resp); 7772 } 7773 7774 /* 7775 * The logic behind this function is detailed in the NFSv4 RFC in the 7776 * SETCLIENTID operation description under IMPLEMENTATION. Refer to 7777 * that section for explicit guidance to server behavior for 7778 * SETCLIENTID. 7779 */ 7780 void 7781 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop, 7782 struct svc_req *req, struct compound_state *cs) 7783 { 7784 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid; 7785 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid; 7786 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed; 7787 bool_t create = TRUE; 7788 char *addr, *netid; 7789 int len; 7790 7791 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs, 7792 SETCLIENTID4args *, args); 7793 retry: 7794 newcp = cp_confirmed = cp_unconfirmed = NULL; 7795 7796 /* 7797 * In search of an EXISTING client matching the incoming 7798 * request to establish a new client identifier at the server 7799 */ 7800 create = TRUE; 7801 cp = rfs4_findclient(&args->client, &create, NULL); 7802 7803 /* Should never happen */ 7804 ASSERT(cp != NULL); 7805 7806 if (cp == NULL) { 7807 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 7808 goto out; 7809 } 7810 7811 /* 7812 * Easiest case. Client identifier is newly created and is 7813 * unconfirmed. Also note that for this case, no other 7814 * entries exist for the client identifier. Nothing else to 7815 * check. Just setup the response and respond. 7816 */ 7817 if (create) { 7818 *cs->statusp = res->status = NFS4_OK; 7819 res->SETCLIENTID4res_u.resok4.clientid = cp->clientid; 7820 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 7821 cp->confirm_verf; 7822 /* Setup callback information; CB_NULL confirmation later */ 7823 rfs4_client_setcb(cp, &args->callback, args->callback_ident); 7824 7825 rfs4_client_rele(cp); 7826 goto out; 7827 } 7828 7829 /* 7830 * An existing, confirmed client may exist but it may not have 7831 * been active for at least one lease period. If so, then 7832 * "close" the client and create a new client identifier 7833 */ 7834 if (rfs4_lease_expired(cp)) { 7835 rfs4_client_close(cp); 7836 goto retry; 7837 } 7838 7839 if (cp->need_confirm == TRUE) 7840 cp_unconfirmed = cp; 7841 else 7842 cp_confirmed = cp; 7843 7844 cp = NULL; 7845 7846 /* 7847 * We have a confirmed client, now check for an 7848 * unconfimred entry 7849 */ 7850 if (cp_confirmed) { 7851 /* If creds don't match then client identifier is inuse */ 7852 if (!creds_ok(cp_confirmed->cr_set, req, cs)) { 7853 rfs4_cbinfo_t *cbp; 7854 /* 7855 * Some one else has established this client 7856 * id. Try and say * who they are. We will use 7857 * the call back address supplied by * the 7858 * first client. 7859 */ 7860 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 7861 7862 addr = netid = NULL; 7863 7864 cbp = &cp_confirmed->cbinfo; 7865 if (cbp->cb_callback.cb_location.r_addr && 7866 cbp->cb_callback.cb_location.r_netid) { 7867 cb_client4 *cbcp = &cbp->cb_callback; 7868 7869 len = strlen(cbcp->cb_location.r_addr)+1; 7870 addr = kmem_alloc(len, KM_SLEEP); 7871 bcopy(cbcp->cb_location.r_addr, addr, len); 7872 len = strlen(cbcp->cb_location.r_netid)+1; 7873 netid = kmem_alloc(len, KM_SLEEP); 7874 bcopy(cbcp->cb_location.r_netid, netid, len); 7875 } 7876 7877 res->SETCLIENTID4res_u.client_using.r_addr = addr; 7878 res->SETCLIENTID4res_u.client_using.r_netid = netid; 7879 7880 rfs4_client_rele(cp_confirmed); 7881 } 7882 7883 /* 7884 * Confirmed, creds match, and verifier matches; must 7885 * be an update of the callback info 7886 */ 7887 if (cp_confirmed->nfs_client.verifier == 7888 args->client.verifier) { 7889 /* Setup callback information */ 7890 rfs4_client_setcb(cp_confirmed, &args->callback, 7891 args->callback_ident); 7892 7893 /* everything okay -- move ahead */ 7894 *cs->statusp = res->status = NFS4_OK; 7895 res->SETCLIENTID4res_u.resok4.clientid = 7896 cp_confirmed->clientid; 7897 7898 /* update the confirm_verifier and return it */ 7899 rfs4_client_scv_next(cp_confirmed); 7900 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 7901 cp_confirmed->confirm_verf; 7902 7903 rfs4_client_rele(cp_confirmed); 7904 goto out; 7905 } 7906 7907 /* 7908 * Creds match but the verifier doesn't. Must search 7909 * for an unconfirmed client that would be replaced by 7910 * this request. 7911 */ 7912 create = FALSE; 7913 cp_unconfirmed = rfs4_findclient(&args->client, &create, 7914 cp_confirmed); 7915 } 7916 7917 /* 7918 * At this point, we have taken care of the brand new client 7919 * struct, INUSE case, update of an existing, and confirmed 7920 * client struct. 7921 */ 7922 7923 /* 7924 * check to see if things have changed while we originally 7925 * picked up the client struct. If they have, then return and 7926 * retry the processing of this SETCLIENTID request. 7927 */ 7928 if (cp_unconfirmed) { 7929 rfs4_dbe_lock(cp_unconfirmed->dbe); 7930 if (!cp_unconfirmed->need_confirm) { 7931 rfs4_dbe_unlock(cp_unconfirmed->dbe); 7932 rfs4_client_rele(cp_unconfirmed); 7933 if (cp_confirmed) 7934 rfs4_client_rele(cp_confirmed); 7935 goto retry; 7936 } 7937 /* do away with the old unconfirmed one */ 7938 rfs4_dbe_invalidate(cp_unconfirmed->dbe); 7939 rfs4_dbe_unlock(cp_unconfirmed->dbe); 7940 rfs4_client_rele(cp_unconfirmed); 7941 cp_unconfirmed = NULL; 7942 } 7943 7944 /* 7945 * This search will temporarily hide the confirmed client 7946 * struct while a new client struct is created as the 7947 * unconfirmed one. 7948 */ 7949 create = TRUE; 7950 newcp = rfs4_findclient(&args->client, &create, cp_confirmed); 7951 7952 ASSERT(newcp != NULL); 7953 7954 if (newcp == NULL) { 7955 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 7956 rfs4_client_rele(cp_confirmed); 7957 goto out; 7958 } 7959 7960 /* 7961 * If one was not created, then a similar request must be in 7962 * process so release and start over with this one 7963 */ 7964 if (create != TRUE) { 7965 rfs4_client_rele(newcp); 7966 if (cp_confirmed) 7967 rfs4_client_rele(cp_confirmed); 7968 goto retry; 7969 } 7970 7971 *cs->statusp = res->status = NFS4_OK; 7972 res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid; 7973 res->SETCLIENTID4res_u.resok4.setclientid_confirm = newcp->confirm_verf; 7974 /* Setup callback information; CB_NULL confirmation later */ 7975 rfs4_client_setcb(newcp, &args->callback, args->callback_ident); 7976 7977 newcp->cp_confirmed = cp_confirmed; 7978 7979 rfs4_client_rele(newcp); 7980 7981 out: 7982 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs, 7983 SETCLIENTID4res *, res); 7984 } 7985 7986 /*ARGSUSED*/ 7987 void 7988 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 7989 struct svc_req *req, struct compound_state *cs) 7990 { 7991 SETCLIENTID_CONFIRM4args *args = 7992 &argop->nfs_argop4_u.opsetclientid_confirm; 7993 SETCLIENTID_CONFIRM4res *res = 7994 &resop->nfs_resop4_u.opsetclientid_confirm; 7995 rfs4_client_t *cp, *cptoclose = NULL; 7996 7997 DTRACE_NFSV4_2(op__setclientid__confirm__start, 7998 struct compound_state *, cs, 7999 SETCLIENTID_CONFIRM4args *, args); 8000 8001 *cs->statusp = res->status = NFS4_OK; 8002 8003 cp = rfs4_findclient_by_id(args->clientid, TRUE); 8004 8005 if (cp == NULL) { 8006 *cs->statusp = res->status = 8007 rfs4_check_clientid(&args->clientid, 1); 8008 goto out; 8009 } 8010 8011 if (!creds_ok(cp, req, cs)) { 8012 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8013 rfs4_client_rele(cp); 8014 goto out; 8015 } 8016 8017 /* If the verifier doesn't match, the record doesn't match */ 8018 if (cp->confirm_verf != args->setclientid_confirm) { 8019 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID; 8020 rfs4_client_rele(cp); 8021 goto out; 8022 } 8023 8024 rfs4_dbe_lock(cp->dbe); 8025 cp->need_confirm = FALSE; 8026 if (cp->cp_confirmed) { 8027 cptoclose = cp->cp_confirmed; 8028 cptoclose->ss_remove = 1; 8029 cp->cp_confirmed = NULL; 8030 } 8031 8032 /* 8033 * Update the client's associated server instance, if it's changed 8034 * since the client was created. 8035 */ 8036 if (rfs4_servinst(cp) != rfs4_cur_servinst) 8037 rfs4_servinst_assign(cp, rfs4_cur_servinst); 8038 8039 /* 8040 * Record clientid in stable storage. 8041 * Must be done after server instance has been assigned. 8042 */ 8043 rfs4_ss_clid(cp, req); 8044 8045 rfs4_dbe_unlock(cp->dbe); 8046 8047 if (cptoclose) 8048 /* don't need to rele, client_close does it */ 8049 rfs4_client_close(cptoclose); 8050 8051 /* If needed, initiate CB_NULL call for callback path */ 8052 rfs4_deleg_cb_check(cp); 8053 rfs4_update_lease(cp); 8054 8055 /* 8056 * Check to see if client can perform reclaims 8057 */ 8058 rfs4_ss_chkclid(cp); 8059 8060 rfs4_client_rele(cp); 8061 8062 out: 8063 DTRACE_NFSV4_2(op__setclientid__confirm__done, 8064 struct compound_state *, cs, 8065 SETCLIENTID_CONFIRM4 *, res); 8066 } 8067 8068 8069 /*ARGSUSED*/ 8070 void 8071 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop, 8072 struct svc_req *req, struct compound_state *cs) 8073 { 8074 CLOSE4args *args = &argop->nfs_argop4_u.opclose; 8075 CLOSE4res *resp = &resop->nfs_resop4_u.opclose; 8076 rfs4_state_t *sp; 8077 nfsstat4 status; 8078 8079 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs, 8080 CLOSE4args *, args); 8081 8082 if (cs->vp == NULL) { 8083 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8084 goto out; 8085 } 8086 8087 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID); 8088 if (status != NFS4_OK) { 8089 *cs->statusp = resp->status = status; 8090 goto out; 8091 } 8092 8093 /* Ensure specified filehandle matches */ 8094 if (cs->vp != sp->finfo->vp) { 8095 rfs4_state_rele(sp); 8096 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8097 goto out; 8098 } 8099 8100 /* hold off other access to open_owner while we tinker */ 8101 rfs4_sw_enter(&sp->owner->oo_sw); 8102 8103 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) { 8104 case NFS4_CHECK_STATEID_OKAY: 8105 if (rfs4_check_open_seqid(args->seqid, sp->owner, 8106 resop) != NFS4_CHKSEQ_OKAY) { 8107 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8108 goto end; 8109 } 8110 break; 8111 case NFS4_CHECK_STATEID_OLD: 8112 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8113 goto end; 8114 case NFS4_CHECK_STATEID_BAD: 8115 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8116 goto end; 8117 case NFS4_CHECK_STATEID_EXPIRED: 8118 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8119 goto end; 8120 case NFS4_CHECK_STATEID_CLOSED: 8121 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8122 goto end; 8123 case NFS4_CHECK_STATEID_UNCONFIRMED: 8124 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8125 goto end; 8126 case NFS4_CHECK_STATEID_REPLAY: 8127 /* Check the sequence id for the open owner */ 8128 switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) { 8129 case NFS4_CHKSEQ_OKAY: 8130 /* 8131 * This is replayed stateid; if seqid matches 8132 * next expected, then client is using wrong seqid. 8133 */ 8134 /* FALL THROUGH */ 8135 case NFS4_CHKSEQ_BAD: 8136 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8137 goto end; 8138 case NFS4_CHKSEQ_REPLAY: 8139 /* 8140 * Note this case is the duplicate case so 8141 * resp->status is already set. 8142 */ 8143 *cs->statusp = resp->status; 8144 rfs4_update_lease(sp->owner->client); 8145 goto end; 8146 } 8147 break; 8148 default: 8149 ASSERT(FALSE); 8150 break; 8151 } 8152 8153 rfs4_dbe_lock(sp->dbe); 8154 8155 /* Update the stateid. */ 8156 next_stateid(&sp->stateid); 8157 resp->open_stateid = sp->stateid.stateid; 8158 8159 rfs4_dbe_unlock(sp->dbe); 8160 8161 rfs4_update_lease(sp->owner->client); 8162 rfs4_update_open_sequence(sp->owner); 8163 rfs4_update_open_resp(sp->owner, resop, NULL); 8164 8165 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 8166 8167 *cs->statusp = resp->status = status; 8168 8169 end: 8170 rfs4_sw_exit(&sp->owner->oo_sw); 8171 rfs4_state_rele(sp); 8172 out: 8173 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs, 8174 CLOSE4res *, resp); 8175 } 8176 8177 /* 8178 * Manage the counts on the file struct and close all file locks 8179 */ 8180 /*ARGSUSED*/ 8181 void 8182 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr, 8183 bool_t close_of_client) 8184 { 8185 rfs4_file_t *fp = sp->finfo; 8186 rfs4_lo_state_t *lsp; 8187 struct shrlock shr; 8188 struct shr_locowner shr_loco; 8189 int fflags, s_access, s_deny; 8190 8191 fflags = s_access = s_deny = 0; 8192 /* 8193 * Decrement the count for each access and deny bit that this 8194 * state has contributed to the file. If the file counts go to zero 8195 * clear the appropriate bit in the appropriate mask. 8196 */ 8197 8198 if (sp->share_access & OPEN4_SHARE_ACCESS_READ) { 8199 fp->access_read--; 8200 fflags |= FREAD; 8201 s_access |= F_RDACC; 8202 if (fp->access_read == 0) 8203 fp->share_access &= ~OPEN4_SHARE_ACCESS_READ; 8204 } 8205 if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) { 8206 fp->access_write--; 8207 fflags |= FWRITE; 8208 s_access |= F_WRACC; 8209 if (fp->access_write == 0) 8210 fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE; 8211 } 8212 if (sp->share_deny & OPEN4_SHARE_DENY_READ) { 8213 fp->deny_read--; 8214 s_deny |= F_RDDNY; 8215 if (fp->deny_read == 0) 8216 fp->share_deny &= ~OPEN4_SHARE_DENY_READ; 8217 } 8218 if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) { 8219 fp->deny_write--; 8220 s_deny |= F_WRDNY; 8221 if (fp->deny_write == 0) 8222 fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8223 } 8224 8225 /* 8226 * If this call is part of the larger closing down of client 8227 * state then it is just easier to release all locks 8228 * associated with this client instead of going through each 8229 * individual file and cleaning locks there. 8230 */ 8231 if (close_of_client) { 8232 if (sp->owner->client->unlksys_completed == FALSE && 8233 sp->lockownerlist.next->lsp != NULL && 8234 sp->owner->client->sysidt != LM_NOSYSID) { 8235 /* Is the PxFS kernel module loaded? */ 8236 if (lm_remove_file_locks != NULL) { 8237 int new_sysid; 8238 8239 /* Encode the cluster nodeid in new sysid */ 8240 new_sysid = sp->owner->client->sysidt; 8241 lm_set_nlmid_flk(&new_sysid); 8242 8243 /* 8244 * This PxFS routine removes file locks for a 8245 * client over all nodes of a cluster. 8246 */ 8247 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 8248 "lm_remove_file_locks(sysid=0x%x)\n", 8249 new_sysid)); 8250 (*lm_remove_file_locks)(new_sysid); 8251 } else { 8252 struct flock64 flk; 8253 8254 /* Release all locks for this client */ 8255 flk.l_type = F_UNLKSYS; 8256 flk.l_whence = 0; 8257 flk.l_start = 0; 8258 flk.l_len = 0; 8259 flk.l_sysid = sp->owner->client->sysidt; 8260 flk.l_pid = 0; 8261 (void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk, 8262 F_REMOTELOCK | FREAD | FWRITE, 8263 (u_offset_t)0, NULL, CRED(), NULL); 8264 } 8265 8266 sp->owner->client->unlksys_completed = TRUE; 8267 } 8268 } 8269 8270 /* 8271 * Release all locks on this file by this lock owner or at 8272 * least mark the locks as having been released 8273 */ 8274 for (lsp = sp->lockownerlist.next->lsp; lsp != NULL; 8275 lsp = lsp->lockownerlist.next->lsp) { 8276 8277 lsp->locks_cleaned = TRUE; 8278 8279 /* Was this already taken care of above? */ 8280 if (!close_of_client && 8281 sp->owner->client->sysidt != LM_NOSYSID) 8282 (void) cleanlocks(sp->finfo->vp, lsp->locker->pid, 8283 lsp->locker->client->sysidt); 8284 } 8285 8286 /* 8287 * Release any shrlocks associated with this open state ID. 8288 * This must be done before the rfs4_state gets marked closed. 8289 */ 8290 if (sp->owner->client->sysidt != LM_NOSYSID) { 8291 shr.s_access = s_access; 8292 shr.s_deny = s_deny; 8293 shr.s_pid = rfs4_dbe_getid(sp->owner->dbe); 8294 shr.s_sysid = sp->owner->client->sysidt; 8295 shr_loco.sl_pid = shr.s_pid; 8296 shr_loco.sl_id = shr.s_sysid; 8297 shr.s_owner = (caddr_t)&shr_loco; 8298 shr.s_own_len = sizeof (shr_loco); 8299 (void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags); 8300 } 8301 8302 (void) VOP_CLOSE(fp->vp, fflags, 1, (offset_t)0, cr, NULL); 8303 } 8304 8305 /* 8306 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure. 8307 */ 8308 static nfsstat4 8309 lock_denied(LOCK4denied *dp, struct flock64 *flk) 8310 { 8311 rfs4_lockowner_t *lo; 8312 rfs4_client_t *cp; 8313 uint32_t len; 8314 8315 lo = rfs4_findlockowner_by_pid(flk->l_pid); 8316 if (lo != NULL) { 8317 cp = lo->client; 8318 if (rfs4_lease_expired(cp)) { 8319 rfs4_lockowner_rele(lo); 8320 rfs4_dbe_hold(cp->dbe); 8321 rfs4_client_close(cp); 8322 return (NFS4ERR_EXPIRED); 8323 } 8324 dp->owner.clientid = lo->owner.clientid; 8325 len = lo->owner.owner_len; 8326 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8327 bcopy(lo->owner.owner_val, dp->owner.owner_val, len); 8328 dp->owner.owner_len = len; 8329 rfs4_lockowner_rele(lo); 8330 goto finish; 8331 } 8332 8333 /* 8334 * Its not a NFS4 lock. We take advantage that the upper 32 bits 8335 * of the client id contain the boot time for a NFS4 lock. So we 8336 * fabricate and identity by setting clientid to the sysid, and 8337 * the lock owner to the pid. 8338 */ 8339 dp->owner.clientid = flk->l_sysid; 8340 len = sizeof (pid_t); 8341 dp->owner.owner_len = len; 8342 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8343 bcopy(&flk->l_pid, dp->owner.owner_val, len); 8344 finish: 8345 dp->offset = flk->l_start; 8346 dp->length = flk->l_len; 8347 8348 if (flk->l_type == F_RDLCK) 8349 dp->locktype = READ_LT; 8350 else if (flk->l_type == F_WRLCK) 8351 dp->locktype = WRITE_LT; 8352 else 8353 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */ 8354 8355 return (NFS4_OK); 8356 } 8357 8358 static int 8359 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred) 8360 { 8361 int error; 8362 struct flock64 flk; 8363 int i; 8364 clock_t delaytime; 8365 int cmd; 8366 8367 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK; 8368 retry: 8369 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 8370 8371 for (i = 0; i < rfs4_maxlock_tries; i++) { 8372 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock); 8373 error = VOP_FRLOCK(vp, cmd, 8374 flock, flag, (u_offset_t)0, NULL, cred, NULL); 8375 8376 if (error != EAGAIN && error != EACCES) 8377 break; 8378 8379 if (i < rfs4_maxlock_tries - 1) { 8380 delay(delaytime); 8381 delaytime *= 2; 8382 } 8383 } 8384 8385 if (error == EAGAIN || error == EACCES) { 8386 /* Get the owner of the lock */ 8387 flk = *flock; 8388 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk); 8389 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 8390 (u_offset_t)0, NULL, cred, NULL) == 0) { 8391 if (flk.l_type == F_UNLCK) { 8392 /* No longer locked, retry */ 8393 goto retry; 8394 } 8395 *flock = flk; 8396 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)", 8397 F_GETLK, &flk); 8398 } 8399 } 8400 8401 return (error); 8402 } 8403 8404 /*ARGSUSED*/ 8405 static nfsstat4 8406 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype, 8407 seqid4 seqid, offset4 offset, 8408 length4 length, cred_t *cred, nfs_resop4 *resop) 8409 { 8410 nfsstat4 status; 8411 rfs4_lockowner_t *lo = lp->locker; 8412 rfs4_state_t *sp = lp->state; 8413 struct flock64 flock; 8414 int16_t ltype; 8415 int flag; 8416 int error; 8417 sysid_t sysid; 8418 LOCK4res *lres; 8419 8420 if (rfs4_lease_expired(lo->client)) { 8421 return (NFS4ERR_EXPIRED); 8422 } 8423 8424 if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK) 8425 return (status); 8426 8427 /* Check for zero length. To lock to end of file use all ones for V4 */ 8428 if (length == 0) 8429 return (NFS4ERR_INVAL); 8430 else if (length == (length4)(~0)) 8431 length = 0; /* Posix to end of file */ 8432 8433 retry: 8434 rfs4_dbe_lock(sp->dbe); 8435 8436 8437 if (resop->resop != OP_LOCKU) { 8438 switch (locktype) { 8439 case READ_LT: 8440 case READW_LT: 8441 if ((sp->share_access 8442 & OPEN4_SHARE_ACCESS_READ) == 0) { 8443 rfs4_dbe_unlock(sp->dbe); 8444 8445 return (NFS4ERR_OPENMODE); 8446 } 8447 ltype = F_RDLCK; 8448 break; 8449 case WRITE_LT: 8450 case WRITEW_LT: 8451 if ((sp->share_access 8452 & OPEN4_SHARE_ACCESS_WRITE) == 0) { 8453 rfs4_dbe_unlock(sp->dbe); 8454 8455 return (NFS4ERR_OPENMODE); 8456 } 8457 ltype = F_WRLCK; 8458 break; 8459 } 8460 } else 8461 ltype = F_UNLCK; 8462 8463 flock.l_type = ltype; 8464 flock.l_whence = 0; /* SEEK_SET */ 8465 flock.l_start = offset; 8466 flock.l_len = length; 8467 flock.l_sysid = sysid; 8468 flock.l_pid = lp->locker->pid; 8469 8470 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 8471 if (flock.l_len < 0 || flock.l_start < 0) { 8472 rfs4_dbe_unlock(sp->dbe); 8473 return (NFS4ERR_INVAL); 8474 } 8475 8476 /* 8477 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and 8478 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE. 8479 */ 8480 flag = (int)sp->share_access | F_REMOTELOCK; 8481 8482 error = setlock(sp->finfo->vp, &flock, flag, cred); 8483 if (error == 0) { 8484 rfs4_dbe_lock(lp->dbe); 8485 next_stateid(&lp->lockid); 8486 rfs4_dbe_unlock(lp->dbe); 8487 } 8488 8489 rfs4_dbe_unlock(sp->dbe); 8490 8491 /* 8492 * N.B. We map error values to nfsv4 errors. This is differrent 8493 * than puterrno4 routine. 8494 */ 8495 switch (error) { 8496 case 0: 8497 status = NFS4_OK; 8498 break; 8499 case EAGAIN: 8500 case EACCES: /* Old value */ 8501 /* Can only get here if op is OP_LOCK */ 8502 ASSERT(resop->resop == OP_LOCK); 8503 lres = &resop->nfs_resop4_u.oplock; 8504 status = NFS4ERR_DENIED; 8505 if (lock_denied(&lres->LOCK4res_u.denied, &flock) 8506 == NFS4ERR_EXPIRED) 8507 goto retry; 8508 break; 8509 case ENOLCK: 8510 status = NFS4ERR_DELAY; 8511 break; 8512 case EOVERFLOW: 8513 status = NFS4ERR_INVAL; 8514 break; 8515 case EINVAL: 8516 status = NFS4ERR_NOTSUPP; 8517 break; 8518 default: 8519 status = NFS4ERR_SERVERFAULT; 8520 break; 8521 } 8522 8523 return (status); 8524 } 8525 8526 /*ARGSUSED*/ 8527 void 8528 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop, 8529 struct svc_req *req, struct compound_state *cs) 8530 { 8531 LOCK4args *args = &argop->nfs_argop4_u.oplock; 8532 LOCK4res *resp = &resop->nfs_resop4_u.oplock; 8533 nfsstat4 status; 8534 stateid4 *stateid; 8535 rfs4_lockowner_t *lo; 8536 rfs4_client_t *cp; 8537 rfs4_state_t *sp = NULL; 8538 rfs4_lo_state_t *lsp = NULL; 8539 bool_t ls_sw_held = FALSE; 8540 bool_t create = TRUE; 8541 bool_t lcreate = TRUE; 8542 bool_t dup_lock = FALSE; 8543 int rc; 8544 8545 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs, 8546 LOCK4args *, args); 8547 8548 if (cs->vp == NULL) { 8549 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8550 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 8551 cs, LOCK4res *, resp); 8552 return; 8553 } 8554 8555 if (args->locker.new_lock_owner) { 8556 /* Create a new lockowner for this instance */ 8557 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner; 8558 8559 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner")); 8560 8561 stateid = &olo->open_stateid; 8562 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID); 8563 if (status != NFS4_OK) { 8564 NFS4_DEBUG(rfs4_debug, 8565 (CE_NOTE, "Get state failed in lock %d", status)); 8566 *cs->statusp = resp->status = status; 8567 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 8568 cs, LOCK4res *, resp); 8569 return; 8570 } 8571 8572 /* Ensure specified filehandle matches */ 8573 if (cs->vp != sp->finfo->vp) { 8574 rfs4_state_rele(sp); 8575 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8576 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 8577 cs, LOCK4res *, resp); 8578 return; 8579 } 8580 8581 /* hold off other access to open_owner while we tinker */ 8582 rfs4_sw_enter(&sp->owner->oo_sw); 8583 8584 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) { 8585 case NFS4_CHECK_STATEID_OLD: 8586 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8587 goto end; 8588 case NFS4_CHECK_STATEID_BAD: 8589 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8590 goto end; 8591 case NFS4_CHECK_STATEID_EXPIRED: 8592 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8593 goto end; 8594 case NFS4_CHECK_STATEID_UNCONFIRMED: 8595 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8596 goto end; 8597 case NFS4_CHECK_STATEID_CLOSED: 8598 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8599 goto end; 8600 case NFS4_CHECK_STATEID_OKAY: 8601 case NFS4_CHECK_STATEID_REPLAY: 8602 switch (rfs4_check_olo_seqid(olo->open_seqid, 8603 sp->owner, resop)) { 8604 case NFS4_CHKSEQ_OKAY: 8605 if (rc == NFS4_CHECK_STATEID_OKAY) 8606 break; 8607 /* 8608 * This is replayed stateid; if seqid 8609 * matches next expected, then client 8610 * is using wrong seqid. 8611 */ 8612 /* FALLTHROUGH */ 8613 case NFS4_CHKSEQ_BAD: 8614 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8615 goto end; 8616 case NFS4_CHKSEQ_REPLAY: 8617 /* This is a duplicate LOCK request */ 8618 dup_lock = TRUE; 8619 8620 /* 8621 * For a duplicate we do not want to 8622 * create a new lockowner as it should 8623 * already exist. 8624 * Turn off the lockowner create flag. 8625 */ 8626 lcreate = FALSE; 8627 } 8628 break; 8629 } 8630 8631 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate); 8632 if (lo == NULL) { 8633 NFS4_DEBUG(rfs4_debug, 8634 (CE_NOTE, "rfs4_op_lock: no lock owner")); 8635 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 8636 goto end; 8637 } 8638 8639 lsp = rfs4_findlo_state_by_owner(lo, sp, &create); 8640 if (lsp == NULL) { 8641 rfs4_update_lease(sp->owner->client); 8642 /* 8643 * Only update theh open_seqid if this is not 8644 * a duplicate request 8645 */ 8646 if (dup_lock == FALSE) { 8647 rfs4_update_open_sequence(sp->owner); 8648 } 8649 8650 NFS4_DEBUG(rfs4_debug, 8651 (CE_NOTE, "rfs4_op_lock: no state")); 8652 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8653 rfs4_update_open_resp(sp->owner, resop, NULL); 8654 rfs4_lockowner_rele(lo); 8655 goto end; 8656 } 8657 8658 /* 8659 * This is the new_lock_owner branch and the client is 8660 * supposed to be associating a new lock_owner with 8661 * the open file at this point. If we find that a 8662 * lock_owner/state association already exists and a 8663 * successful LOCK request was returned to the client, 8664 * an error is returned to the client since this is 8665 * not appropriate. The client should be using the 8666 * existing lock_owner branch. 8667 */ 8668 if (dup_lock == FALSE && create == FALSE) { 8669 if (lsp->lock_completed == TRUE) { 8670 *cs->statusp = 8671 resp->status = NFS4ERR_BAD_SEQID; 8672 rfs4_lockowner_rele(lo); 8673 goto end; 8674 } 8675 } 8676 8677 rfs4_update_lease(sp->owner->client); 8678 8679 /* 8680 * Only update theh open_seqid if this is not 8681 * a duplicate request 8682 */ 8683 if (dup_lock == FALSE) { 8684 rfs4_update_open_sequence(sp->owner); 8685 } 8686 8687 /* 8688 * If this is a duplicate lock request, just copy the 8689 * previously saved reply and return. 8690 */ 8691 if (dup_lock == TRUE) { 8692 /* verify that lock_seqid's match */ 8693 if (lsp->seqid != olo->lock_seqid) { 8694 NFS4_DEBUG(rfs4_debug, 8695 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad" 8696 "lsp->seqid=%d old->seqid=%d", 8697 lsp->seqid, olo->lock_seqid)); 8698 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8699 } else { 8700 rfs4_copy_reply(resop, lsp->reply); 8701 /* 8702 * Make sure to copy the just 8703 * retrieved reply status into the 8704 * overall compound status 8705 */ 8706 *cs->statusp = resp->status; 8707 } 8708 rfs4_lockowner_rele(lo); 8709 goto end; 8710 } 8711 8712 rfs4_dbe_lock(lsp->dbe); 8713 8714 /* Make sure to update the lock sequence id */ 8715 lsp->seqid = olo->lock_seqid; 8716 8717 NFS4_DEBUG(rfs4_debug, 8718 (CE_NOTE, "Lock seqid established as %d", lsp->seqid)); 8719 8720 /* 8721 * This is used to signify the newly created lockowner 8722 * stateid and its sequence number. The checks for 8723 * sequence number and increment don't occur on the 8724 * very first lock request for a lockowner. 8725 */ 8726 lsp->skip_seqid_check = TRUE; 8727 8728 /* hold off other access to lsp while we tinker */ 8729 rfs4_sw_enter(&lsp->ls_sw); 8730 ls_sw_held = TRUE; 8731 8732 rfs4_dbe_unlock(lsp->dbe); 8733 8734 rfs4_lockowner_rele(lo); 8735 } else { 8736 stateid = &args->locker.locker4_u.lock_owner.lock_stateid; 8737 /* get lsp and hold the lock on the underlying file struct */ 8738 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) 8739 != NFS4_OK) { 8740 *cs->statusp = resp->status = status; 8741 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 8742 cs, LOCK4res *, resp); 8743 return; 8744 } 8745 create = FALSE; /* We didn't create lsp */ 8746 8747 /* Ensure specified filehandle matches */ 8748 if (cs->vp != lsp->state->finfo->vp) { 8749 rfs4_lo_state_rele(lsp, TRUE); 8750 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8751 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 8752 cs, LOCK4res *, resp); 8753 return; 8754 } 8755 8756 /* hold off other access to lsp while we tinker */ 8757 rfs4_sw_enter(&lsp->ls_sw); 8758 ls_sw_held = TRUE; 8759 8760 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) { 8761 /* 8762 * The stateid looks like it was okay (expected to be 8763 * the next one) 8764 */ 8765 case NFS4_CHECK_STATEID_OKAY: 8766 /* 8767 * The sequence id is now checked. Determine 8768 * if this is a replay or if it is in the 8769 * expected (next) sequence. In the case of a 8770 * replay, there are two replay conditions 8771 * that may occur. The first is the normal 8772 * condition where a LOCK is done with a 8773 * NFS4_OK response and the stateid is 8774 * updated. That case is handled below when 8775 * the stateid is identified as a REPLAY. The 8776 * second is the case where an error is 8777 * returned, like NFS4ERR_DENIED, and the 8778 * sequence number is updated but the stateid 8779 * is not updated. This second case is dealt 8780 * with here. So it may seem odd that the 8781 * stateid is okay but the sequence id is a 8782 * replay but it is okay. 8783 */ 8784 switch (rfs4_check_lock_seqid( 8785 args->locker.locker4_u.lock_owner.lock_seqid, 8786 lsp, resop)) { 8787 case NFS4_CHKSEQ_REPLAY: 8788 if (resp->status != NFS4_OK) { 8789 /* 8790 * Here is our replay and need 8791 * to verify that the last 8792 * response was an error. 8793 */ 8794 *cs->statusp = resp->status; 8795 goto end; 8796 } 8797 /* 8798 * This is done since the sequence id 8799 * looked like a replay but it didn't 8800 * pass our check so a BAD_SEQID is 8801 * returned as a result. 8802 */ 8803 /*FALLTHROUGH*/ 8804 case NFS4_CHKSEQ_BAD: 8805 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8806 goto end; 8807 case NFS4_CHKSEQ_OKAY: 8808 /* Everything looks okay move ahead */ 8809 break; 8810 } 8811 break; 8812 case NFS4_CHECK_STATEID_OLD: 8813 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8814 goto end; 8815 case NFS4_CHECK_STATEID_BAD: 8816 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8817 goto end; 8818 case NFS4_CHECK_STATEID_EXPIRED: 8819 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8820 goto end; 8821 case NFS4_CHECK_STATEID_CLOSED: 8822 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8823 goto end; 8824 case NFS4_CHECK_STATEID_REPLAY: 8825 switch (rfs4_check_lock_seqid( 8826 args->locker.locker4_u.lock_owner.lock_seqid, 8827 lsp, resop)) { 8828 case NFS4_CHKSEQ_OKAY: 8829 /* 8830 * This is a replayed stateid; if 8831 * seqid matches the next expected, 8832 * then client is using wrong seqid. 8833 */ 8834 case NFS4_CHKSEQ_BAD: 8835 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8836 goto end; 8837 case NFS4_CHKSEQ_REPLAY: 8838 rfs4_update_lease(lsp->locker->client); 8839 *cs->statusp = status = resp->status; 8840 goto end; 8841 } 8842 break; 8843 default: 8844 ASSERT(FALSE); 8845 break; 8846 } 8847 8848 rfs4_update_lock_sequence(lsp); 8849 rfs4_update_lease(lsp->locker->client); 8850 } 8851 8852 /* 8853 * NFS4 only allows locking on regular files, so 8854 * verify type of object. 8855 */ 8856 if (cs->vp->v_type != VREG) { 8857 if (cs->vp->v_type == VDIR) 8858 status = NFS4ERR_ISDIR; 8859 else 8860 status = NFS4ERR_INVAL; 8861 goto out; 8862 } 8863 8864 cp = lsp->state->owner->client; 8865 8866 if (rfs4_clnt_in_grace(cp) && !args->reclaim) { 8867 status = NFS4ERR_GRACE; 8868 goto out; 8869 } 8870 8871 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) { 8872 status = NFS4ERR_NO_GRACE; 8873 goto out; 8874 } 8875 8876 if (!rfs4_clnt_in_grace(cp) && args->reclaim) { 8877 status = NFS4ERR_NO_GRACE; 8878 goto out; 8879 } 8880 8881 if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE) 8882 cs->deleg = TRUE; 8883 8884 status = rfs4_do_lock(lsp, args->locktype, 8885 args->locker.locker4_u.lock_owner.lock_seqid, 8886 args->offset, args->length, cs->cr, resop); 8887 8888 out: 8889 lsp->skip_seqid_check = FALSE; 8890 8891 *cs->statusp = resp->status = status; 8892 8893 if (status == NFS4_OK) { 8894 resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid; 8895 lsp->lock_completed = TRUE; 8896 } 8897 /* 8898 * Only update the "OPEN" response here if this was a new 8899 * lock_owner 8900 */ 8901 if (sp) 8902 rfs4_update_open_resp(sp->owner, resop, NULL); 8903 8904 rfs4_update_lock_resp(lsp, resop); 8905 8906 end: 8907 if (lsp) { 8908 if (ls_sw_held) 8909 rfs4_sw_exit(&lsp->ls_sw); 8910 /* 8911 * If an sp obtained, then the lsp does not represent 8912 * a lock on the file struct. 8913 */ 8914 if (sp != NULL) 8915 rfs4_lo_state_rele(lsp, FALSE); 8916 else 8917 rfs4_lo_state_rele(lsp, TRUE); 8918 } 8919 if (sp) { 8920 rfs4_sw_exit(&sp->owner->oo_sw); 8921 rfs4_state_rele(sp); 8922 } 8923 8924 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs, 8925 LOCK4res *, resp); 8926 } 8927 8928 /* free function for LOCK/LOCKT */ 8929 static void 8930 lock_denied_free(nfs_resop4 *resop) 8931 { 8932 LOCK4denied *dp = NULL; 8933 8934 switch (resop->resop) { 8935 case OP_LOCK: 8936 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED) 8937 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied; 8938 break; 8939 case OP_LOCKT: 8940 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED) 8941 dp = &resop->nfs_resop4_u.oplockt.denied; 8942 break; 8943 default: 8944 break; 8945 } 8946 8947 if (dp) 8948 kmem_free(dp->owner.owner_val, dp->owner.owner_len); 8949 } 8950 8951 /*ARGSUSED*/ 8952 void 8953 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop, 8954 struct svc_req *req, struct compound_state *cs) 8955 { 8956 LOCKU4args *args = &argop->nfs_argop4_u.oplocku; 8957 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku; 8958 nfsstat4 status; 8959 stateid4 *stateid = &args->lock_stateid; 8960 rfs4_lo_state_t *lsp; 8961 8962 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs, 8963 LOCKU4args *, args); 8964 8965 if (cs->vp == NULL) { 8966 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8967 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 8968 LOCKU4res *, resp); 8969 return; 8970 } 8971 8972 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) { 8973 *cs->statusp = resp->status = status; 8974 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 8975 LOCKU4res *, resp); 8976 return; 8977 } 8978 8979 /* Ensure specified filehandle matches */ 8980 if (cs->vp != lsp->state->finfo->vp) { 8981 rfs4_lo_state_rele(lsp, TRUE); 8982 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8983 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 8984 LOCKU4res *, resp); 8985 return; 8986 } 8987 8988 /* hold off other access to lsp while we tinker */ 8989 rfs4_sw_enter(&lsp->ls_sw); 8990 8991 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) { 8992 case NFS4_CHECK_STATEID_OKAY: 8993 if (rfs4_check_lock_seqid(args->seqid, lsp, resop) 8994 != NFS4_CHKSEQ_OKAY) { 8995 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8996 goto end; 8997 } 8998 break; 8999 case NFS4_CHECK_STATEID_OLD: 9000 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9001 goto end; 9002 case NFS4_CHECK_STATEID_BAD: 9003 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9004 goto end; 9005 case NFS4_CHECK_STATEID_EXPIRED: 9006 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9007 goto end; 9008 case NFS4_CHECK_STATEID_CLOSED: 9009 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9010 goto end; 9011 case NFS4_CHECK_STATEID_REPLAY: 9012 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) { 9013 case NFS4_CHKSEQ_OKAY: 9014 /* 9015 * This is a replayed stateid; if 9016 * seqid matches the next expected, 9017 * then client is using wrong seqid. 9018 */ 9019 case NFS4_CHKSEQ_BAD: 9020 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9021 goto end; 9022 case NFS4_CHKSEQ_REPLAY: 9023 rfs4_update_lease(lsp->locker->client); 9024 *cs->statusp = status = resp->status; 9025 goto end; 9026 } 9027 break; 9028 default: 9029 ASSERT(FALSE); 9030 break; 9031 } 9032 9033 rfs4_update_lock_sequence(lsp); 9034 rfs4_update_lease(lsp->locker->client); 9035 9036 /* 9037 * NFS4 only allows locking on regular files, so 9038 * verify type of object. 9039 */ 9040 if (cs->vp->v_type != VREG) { 9041 if (cs->vp->v_type == VDIR) 9042 status = NFS4ERR_ISDIR; 9043 else 9044 status = NFS4ERR_INVAL; 9045 goto out; 9046 } 9047 9048 if (rfs4_clnt_in_grace(lsp->state->owner->client)) { 9049 status = NFS4ERR_GRACE; 9050 goto out; 9051 } 9052 9053 status = rfs4_do_lock(lsp, args->locktype, 9054 args->seqid, args->offset, args->length, cs->cr, resop); 9055 9056 out: 9057 *cs->statusp = resp->status = status; 9058 9059 if (status == NFS4_OK) 9060 resp->lock_stateid = lsp->lockid.stateid; 9061 9062 rfs4_update_lock_resp(lsp, resop); 9063 9064 end: 9065 rfs4_sw_exit(&lsp->ls_sw); 9066 rfs4_lo_state_rele(lsp, TRUE); 9067 9068 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9069 LOCKU4res *, resp); 9070 } 9071 9072 /* 9073 * LOCKT is a best effort routine, the client can not be guaranteed that 9074 * the status return is still in effect by the time the reply is received. 9075 * They are numerous race conditions in this routine, but we are not required 9076 * and can not be accurate. 9077 */ 9078 /*ARGSUSED*/ 9079 void 9080 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop, 9081 struct svc_req *req, struct compound_state *cs) 9082 { 9083 LOCKT4args *args = &argop->nfs_argop4_u.oplockt; 9084 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt; 9085 rfs4_lockowner_t *lo; 9086 rfs4_client_t *cp; 9087 bool_t create = FALSE; 9088 struct flock64 flk; 9089 int error; 9090 int flag = FREAD | FWRITE; 9091 int ltype; 9092 length4 posix_length; 9093 sysid_t sysid; 9094 pid_t pid; 9095 9096 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs, 9097 LOCKT4args *, args); 9098 9099 if (cs->vp == NULL) { 9100 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9101 goto out; 9102 } 9103 9104 /* 9105 * NFS4 only allows locking on regular files, so 9106 * verify type of object. 9107 */ 9108 if (cs->vp->v_type != VREG) { 9109 if (cs->vp->v_type == VDIR) 9110 *cs->statusp = resp->status = NFS4ERR_ISDIR; 9111 else 9112 *cs->statusp = resp->status = NFS4ERR_INVAL; 9113 goto out; 9114 } 9115 9116 /* 9117 * Check out the clientid to ensure the server knows about it 9118 * so that we correctly inform the client of a server reboot. 9119 */ 9120 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE)) 9121 == NULL) { 9122 *cs->statusp = resp->status = 9123 rfs4_check_clientid(&args->owner.clientid, 0); 9124 goto out; 9125 } 9126 if (rfs4_lease_expired(cp)) { 9127 rfs4_client_close(cp); 9128 /* 9129 * Protocol doesn't allow returning NFS4ERR_STALE as 9130 * other operations do on this check so STALE_CLIENTID 9131 * is returned instead 9132 */ 9133 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 9134 goto out; 9135 } 9136 9137 if (rfs4_clnt_in_grace(cp) && !(cp->can_reclaim)) { 9138 *cs->statusp = resp->status = NFS4ERR_GRACE; 9139 rfs4_client_rele(cp); 9140 goto out; 9141 } 9142 rfs4_client_rele(cp); 9143 9144 resp->status = NFS4_OK; 9145 9146 switch (args->locktype) { 9147 case READ_LT: 9148 case READW_LT: 9149 ltype = F_RDLCK; 9150 break; 9151 case WRITE_LT: 9152 case WRITEW_LT: 9153 ltype = F_WRLCK; 9154 break; 9155 } 9156 9157 posix_length = args->length; 9158 /* Check for zero length. To lock to end of file use all ones for V4 */ 9159 if (posix_length == 0) { 9160 *cs->statusp = resp->status = NFS4ERR_INVAL; 9161 goto out; 9162 } else if (posix_length == (length4)(~0)) { 9163 posix_length = 0; /* Posix to end of file */ 9164 } 9165 9166 /* Find or create a lockowner */ 9167 lo = rfs4_findlockowner(&args->owner, &create); 9168 9169 if (lo) { 9170 pid = lo->pid; 9171 if ((resp->status = 9172 rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK) 9173 goto err; 9174 } else { 9175 pid = 0; 9176 sysid = lockt_sysid; 9177 } 9178 retry: 9179 flk.l_type = ltype; 9180 flk.l_whence = 0; /* SEEK_SET */ 9181 flk.l_start = args->offset; 9182 flk.l_len = posix_length; 9183 flk.l_sysid = sysid; 9184 flk.l_pid = pid; 9185 flag |= F_REMOTELOCK; 9186 9187 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk); 9188 9189 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9190 if (flk.l_len < 0 || flk.l_start < 0) { 9191 resp->status = NFS4ERR_INVAL; 9192 goto err; 9193 } 9194 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0, 9195 NULL, cs->cr, NULL); 9196 9197 /* 9198 * N.B. We map error values to nfsv4 errors. This is differrent 9199 * than puterrno4 routine. 9200 */ 9201 switch (error) { 9202 case 0: 9203 if (flk.l_type == F_UNLCK) 9204 resp->status = NFS4_OK; 9205 else { 9206 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED) 9207 goto retry; 9208 resp->status = NFS4ERR_DENIED; 9209 } 9210 break; 9211 case EOVERFLOW: 9212 resp->status = NFS4ERR_INVAL; 9213 break; 9214 case EINVAL: 9215 resp->status = NFS4ERR_NOTSUPP; 9216 break; 9217 default: 9218 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)", 9219 error); 9220 resp->status = NFS4ERR_SERVERFAULT; 9221 break; 9222 } 9223 9224 err: 9225 if (lo) 9226 rfs4_lockowner_rele(lo); 9227 *cs->statusp = resp->status; 9228 out: 9229 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs, 9230 LOCKT4res *, resp); 9231 } 9232 9233 static int 9234 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags) 9235 { 9236 int err; 9237 9238 if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0) 9239 return (0); 9240 9241 err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED(), NULL); 9242 9243 NFS4_DEBUG(rfs4_shrlock_debug, 9244 (CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d " 9245 "pid=%d err=%d\n", cmd == F_UNSHARE ? "UNSHR" : "SHARE", 9246 (void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid, 9247 err)); 9248 9249 return (err); 9250 } 9251 9252 static int 9253 rfs4_shrlock(rfs4_state_t *sp, int cmd) 9254 { 9255 struct shrlock shr; 9256 struct shr_locowner shr_loco; 9257 int fflags; 9258 9259 fflags = shr.s_access = shr.s_deny = 0; 9260 9261 if (sp->share_access & OPEN4_SHARE_ACCESS_READ) { 9262 fflags |= FREAD; 9263 shr.s_access |= F_RDACC; 9264 } 9265 if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) { 9266 fflags |= FWRITE; 9267 shr.s_access |= F_WRACC; 9268 } 9269 if (sp->share_deny & OPEN4_SHARE_DENY_READ) 9270 shr.s_deny |= F_RDDNY; 9271 if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) 9272 shr.s_deny |= F_WRDNY; 9273 9274 shr.s_pid = rfs4_dbe_getid(sp->owner->dbe); 9275 shr.s_sysid = sp->owner->client->sysidt; 9276 shr_loco.sl_pid = shr.s_pid; 9277 shr_loco.sl_id = shr.s_sysid; 9278 shr.s_owner = (caddr_t)&shr_loco; 9279 shr.s_own_len = sizeof (shr_loco); 9280 return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags)); 9281 } 9282 9283 static int 9284 rfs4_share(rfs4_state_t *sp) 9285 { 9286 int cmd; 9287 cmd = nbl_need_check(sp->finfo->vp) ? F_SHARE_NBMAND : F_SHARE; 9288 return (rfs4_shrlock(sp, cmd)); 9289 } 9290 9291 void 9292 rfs4_unshare(rfs4_state_t *sp) 9293 { 9294 (void) rfs4_shrlock(sp, F_UNSHARE); 9295 } 9296 9297 static int 9298 rdma_setup_read_data4(READ4args * args, READ4res * rok) 9299 { 9300 struct clist *wcl; 9301 int data_len, avail_len, num; 9302 count4 count = rok->data_len; 9303 9304 data_len = num = avail_len = 0; 9305 9306 wcl = args->wlist; 9307 while (wcl != NULL) { 9308 if (wcl->c_dmemhandle.mrc_rmr == 0) 9309 break; 9310 9311 avail_len += wcl->c_len; 9312 if (wcl->c_len < count) { 9313 data_len += wcl->c_len; 9314 } else { 9315 /* Can make the rest chunks all 0-len */ 9316 data_len += count; 9317 wcl->c_len = count; 9318 } 9319 count -= wcl->c_len; 9320 num++; 9321 wcl = wcl->c_next; 9322 } 9323 9324 /* 9325 * MUST fail if there are still more data 9326 */ 9327 if (count > 0) { 9328 DTRACE_PROBE2(nfss__e__read4_wlist_fail, 9329 int, data_len, int, count); 9330 return (FALSE); 9331 } 9332 wcl = args->wlist; 9333 rok->data_len = data_len; 9334 rok->wlist_len = data_len; 9335 rok->wlist = wcl; 9336 9337 return (TRUE); 9338 } 9339