1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2018 Nexenta Systems, Inc. 29 */ 30 31 #include <sys/systm.h> 32 #include <sys/sdt.h> 33 #include <rpc/types.h> 34 #include <rpc/auth.h> 35 #include <rpc/auth_unix.h> 36 #include <rpc/auth_des.h> 37 #include <rpc/svc.h> 38 #include <rpc/xdr.h> 39 #include <nfs/nfs4.h> 40 #include <nfs/nfs_dispatch.h> 41 #include <nfs/nfs4_drc.h> 42 43 #define NFS4_MAX_MINOR_VERSION 0 44 45 /* 46 * The default size of the duplicate request cache 47 */ 48 uint32_t nfs4_drc_max = 8 * 1024; 49 50 /* 51 * The number of buckets we'd like to hash the 52 * replies into.. do not change this on the fly. 53 */ 54 uint32_t nfs4_drc_hash = 541; 55 56 static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp); 57 58 /* 59 * Initialize a duplicate request cache. 60 */ 61 rfs4_drc_t * 62 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size) 63 { 64 rfs4_drc_t *drc; 65 uint32_t bki; 66 67 ASSERT(drc_size); 68 ASSERT(drc_hash_size); 69 70 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); 71 72 drc->max_size = drc_size; 73 drc->in_use = 0; 74 75 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); 76 77 drc->dr_hash = drc_hash_size; 78 79 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); 80 81 for (bki = 0; bki < drc_hash_size; bki++) { 82 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), 83 offsetof(rfs4_dupreq_t, dr_bkt_next)); 84 } 85 86 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), 87 offsetof(rfs4_dupreq_t, dr_next)); 88 89 return (drc); 90 } 91 92 /* 93 * Destroy a duplicate request cache. 94 */ 95 void 96 rfs4_fini_drc(void) 97 { 98 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 99 rfs4_drc_t *drc = nsrv4->nfs4_drc; 100 rfs4_dupreq_t *drp, *drp_next; 101 102 /* iterate over the dr_cache and free the enties */ 103 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { 104 105 if (drp->dr_state == NFS4_DUP_REPLAY) 106 rfs4_compound_free(&(drp->dr_res)); 107 108 if (drp->dr_addr.buf != NULL) 109 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 110 111 drp_next = list_next(&(drc->dr_cache), drp); 112 113 kmem_free(drp, sizeof (rfs4_dupreq_t)); 114 } 115 116 mutex_destroy(&drc->lock); 117 kmem_free(drc->dr_buckets, 118 sizeof (list_t)*drc->dr_hash); 119 kmem_free(drc, sizeof (rfs4_drc_t)); 120 } 121 122 /* 123 * rfs4_dr_chstate: 124 * 125 * Change the state of a rfs4_dupreq. If it's not in transition 126 * to the FREE state, return. If we are moving to the FREE state 127 * then we need to clean up the compound results and move the entry 128 * to the end of the list. 129 */ 130 void 131 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) 132 { 133 rfs4_drc_t *drc; 134 135 ASSERT(drp); 136 ASSERT(drp->drc); 137 ASSERT(drp->dr_bkt); 138 ASSERT(MUTEX_HELD(&drp->drc->lock)); 139 140 drp->dr_state = new_state; 141 142 if (new_state != NFS4_DUP_FREE) 143 return; 144 145 drc = drp->drc; 146 147 /* 148 * Remove entry from the bucket and 149 * dr_cache list, free compound results. 150 */ 151 list_remove(drp->dr_bkt, drp); 152 list_remove(&(drc->dr_cache), drp); 153 rfs4_compound_free(&(drp->dr_res)); 154 } 155 156 /* 157 * rfs4_alloc_dr: 158 * 159 * Malloc a new one if we have not reached our maximum cache 160 * limit, otherwise pick an entry off the tail -- Use if it 161 * is marked as NFS4_DUP_FREE, or is an entry in the 162 * NFS4_DUP_REPLAY state. 163 */ 164 rfs4_dupreq_t * 165 rfs4_alloc_dr(rfs4_drc_t *drc) 166 { 167 rfs4_dupreq_t *drp_tail, *drp = NULL; 168 169 ASSERT(drc); 170 ASSERT(MUTEX_HELD(&drc->lock)); 171 172 /* 173 * Have we hit the cache limit yet ? 174 */ 175 if (drc->in_use < drc->max_size) { 176 /* 177 * nope, so let's malloc a new one 178 */ 179 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); 180 drp->drc = drc; 181 drc->in_use++; 182 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); 183 return (drp); 184 } 185 186 /* 187 * Cache is all allocated now traverse the list 188 * backwards to find one we can reuse. 189 */ 190 for (drp_tail = list_tail(&drc->dr_cache); drp_tail != NULL; 191 drp_tail = list_prev(&drc->dr_cache, drp_tail)) { 192 193 switch (drp_tail->dr_state) { 194 195 case NFS4_DUP_FREE: 196 list_remove(&(drc->dr_cache), drp_tail); 197 DTRACE_PROBE1(nfss__i__drc_freeclaim, 198 rfs4_dupreq_t *, drp_tail); 199 return (drp_tail); 200 /* NOTREACHED */ 201 202 case NFS4_DUP_REPLAY: 203 /* grab it. */ 204 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); 205 DTRACE_PROBE1(nfss__i__drc_replayclaim, 206 rfs4_dupreq_t *, drp_tail); 207 return (drp_tail); 208 /* NOTREACHED */ 209 } 210 } 211 DTRACE_PROBE1(nfss__i__drc_full, rfs4_drc_t *, drc); 212 return (NULL); 213 } 214 215 /* 216 * rfs4_find_dr: 217 * 218 * Search for an entry in the duplicate request cache by 219 * calculating the hash index based on the XID, and examining 220 * the entries in the hash bucket. If we find a match, return. 221 * Once we have searched the bucket we call rfs4_alloc_dr() to 222 * allocate a new entry, or reuse one that is available. 223 */ 224 int 225 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) 226 { 227 228 uint32_t the_xid; 229 list_t *dr_bkt; 230 rfs4_dupreq_t *drp; 231 int bktdex; 232 233 /* 234 * Get the XID, calculate the bucket and search to 235 * see if we need to replay from the cache. 236 */ 237 the_xid = req->rq_xprt->xp_xid; 238 bktdex = the_xid % drc->dr_hash; 239 240 dr_bkt = (list_t *) 241 &(drc->dr_buckets[(the_xid % drc->dr_hash)]); 242 243 DTRACE_PROBE3(nfss__i__drc_bktdex, 244 int, bktdex, 245 uint32_t, the_xid, 246 list_t *, dr_bkt); 247 248 *dup = NULL; 249 250 mutex_enter(&drc->lock); 251 /* 252 * Search the bucket for a matching xid and address. 253 */ 254 for (drp = list_head(dr_bkt); drp != NULL; 255 drp = list_next(dr_bkt, drp)) { 256 257 if (drp->dr_xid == the_xid && 258 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 259 bcmp((caddr_t)drp->dr_addr.buf, 260 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 261 drp->dr_addr.len) == 0) { 262 263 /* 264 * Found a match so REPLAY the Reply 265 */ 266 if (drp->dr_state == NFS4_DUP_REPLAY) { 267 rfs4_dr_chstate(drp, NFS4_DUP_INUSE); 268 mutex_exit(&drc->lock); 269 *dup = drp; 270 DTRACE_PROBE1(nfss__i__drc_replay, 271 rfs4_dupreq_t *, drp); 272 return (NFS4_DUP_REPLAY); 273 } 274 275 /* 276 * This entry must be in transition, so return 277 * the 'pending' status. 278 */ 279 mutex_exit(&drc->lock); 280 return (NFS4_DUP_PENDING); 281 } 282 } 283 284 drp = rfs4_alloc_dr(drc); 285 mutex_exit(&drc->lock); 286 287 /* 288 * The DRC is full and all entries are in use. Upper function 289 * should error out this request and force the client to 290 * retransmit -- effectively this is a resource issue. NFSD 291 * threads tied up with native File System, or the cache size 292 * is too small for the server load. 293 */ 294 if (drp == NULL) 295 return (NFS4_DUP_ERROR); 296 297 /* 298 * Init the state to NEW. 299 */ 300 drp->dr_state = NFS4_DUP_NEW; 301 302 /* 303 * If needed, resize the address buffer 304 */ 305 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 306 if (drp->dr_addr.buf != NULL) 307 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 308 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 309 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); 310 if (drp->dr_addr.buf == NULL) { 311 /* 312 * If the malloc fails, mark the entry 313 * as free and put on the tail. 314 */ 315 drp->dr_addr.maxlen = 0; 316 drp->dr_state = NFS4_DUP_FREE; 317 mutex_enter(&drc->lock); 318 list_insert_tail(&(drc->dr_cache), drp); 319 mutex_exit(&drc->lock); 320 return (NFS4_DUP_ERROR); 321 } 322 } 323 324 325 /* 326 * Copy the address. 327 */ 328 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 329 330 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, 331 (caddr_t)drp->dr_addr.buf, 332 drp->dr_addr.len); 333 334 drp->dr_xid = the_xid; 335 drp->dr_bkt = dr_bkt; 336 337 /* 338 * Insert at the head of the bucket and 339 * the drc lists.. 340 */ 341 mutex_enter(&drc->lock); 342 list_insert_head(&drc->dr_cache, drp); 343 list_insert_head(dr_bkt, drp); 344 mutex_exit(&drc->lock); 345 346 *dup = drp; 347 348 return (NFS4_DUP_NEW); 349 } 350 351 /* 352 * 353 * This function handles the duplicate request cache, 354 * NULL_PROC and COMPOUND procedure calls for NFSv4; 355 * 356 * Passed into this function are:- 357 * 358 * disp A pointer to our dispatch table entry 359 * req The request to process 360 * xprt The server transport handle 361 * ap A pointer to the arguments 362 * 363 * 364 * When appropriate this function is responsible for inserting 365 * the reply into the duplicate cache or replaying an existing 366 * cached reply. 367 * 368 * dr_stat reflects the state of the duplicate request that 369 * has been inserted into or retrieved from the cache 370 * 371 * drp is the duplicate request entry 372 * 373 */ 374 int 375 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, SVCXPRT *xprt, 376 char *ap) 377 { 378 379 COMPOUND4res res_buf; 380 COMPOUND4res *rbp; 381 COMPOUND4args *cap; 382 cred_t *cr = NULL; 383 int error = 0; 384 int dis_flags = 0; 385 int dr_stat = NFS4_NOT_DUP; 386 rfs4_dupreq_t *drp = NULL; 387 int rv; 388 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 389 rfs4_drc_t *nfs4_drc = nsrv4->nfs4_drc; 390 391 ASSERT(disp); 392 393 /* 394 * Short circuit the RPC_NULL proc. 395 */ 396 if (disp->dis_proc == rpc_null) { 397 DTRACE_NFSV4_1(null__start, struct svc_req *, req); 398 if (!svc_sendreply(xprt, xdr_void, NULL)) { 399 DTRACE_NFSV4_1(null__done, struct svc_req *, req); 400 svcerr_systemerr(xprt); 401 return (1); 402 } 403 DTRACE_NFSV4_1(null__done, struct svc_req *, req); 404 return (0); 405 } 406 407 /* Only NFSv4 Compounds from this point onward */ 408 409 rbp = &res_buf; 410 cap = (COMPOUND4args *)ap; 411 412 /* 413 * Figure out the disposition of the whole COMPOUND 414 * and record it's IDEMPOTENTCY. 415 */ 416 rfs4_compound_flagproc(cap, &dis_flags); 417 418 /* 419 * If NON-IDEMPOTENT then we need to figure out if this 420 * request can be replied from the duplicate cache. 421 * 422 * If this is a new request then we need to insert the 423 * reply into the duplicate cache. 424 */ 425 if (!(dis_flags & RPC_IDEMPOTENT)) { 426 /* look for a replay from the cache or allocate */ 427 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); 428 429 switch (dr_stat) { 430 431 case NFS4_DUP_ERROR: 432 rfs4_resource_err(req, cap); 433 return (1); 434 /* NOTREACHED */ 435 436 case NFS4_DUP_PENDING: 437 /* 438 * reply has previously been inserted into the 439 * duplicate cache, however the reply has 440 * not yet been sent via svc_sendreply() 441 */ 442 return (1); 443 /* NOTREACHED */ 444 445 case NFS4_DUP_NEW: 446 curthread->t_flag |= T_DONTPEND; 447 /* NON-IDEMPOTENT proc call */ 448 rfs4_compound(cap, rbp, NULL, req, cr, &rv); 449 curthread->t_flag &= ~T_DONTPEND; 450 451 if (rv) /* short ckt sendreply on error */ 452 return (rv); 453 454 /* 455 * dr_res must be initialized before calling 456 * rfs4_dr_chstate (it frees the reply). 457 */ 458 drp->dr_res = res_buf; 459 if (curthread->t_flag & T_WOULDBLOCK) { 460 curthread->t_flag &= ~T_WOULDBLOCK; 461 /* 462 * mark this entry as FREE and plop 463 * on the end of the cache list 464 */ 465 mutex_enter(&drp->drc->lock); 466 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 467 list_insert_tail(&(drp->drc->dr_cache), drp); 468 mutex_exit(&drp->drc->lock); 469 return (1); 470 } 471 break; 472 473 case NFS4_DUP_REPLAY: 474 /* replay from the cache */ 475 rbp = &(drp->dr_res); 476 break; 477 } 478 } else { 479 curthread->t_flag |= T_DONTPEND; 480 /* IDEMPOTENT proc call */ 481 rfs4_compound(cap, rbp, NULL, req, cr, &rv); 482 curthread->t_flag &= ~T_DONTPEND; 483 484 if (rv) /* short ckt sendreply on error */ 485 return (rv); 486 487 if (curthread->t_flag & T_WOULDBLOCK) { 488 curthread->t_flag &= ~T_WOULDBLOCK; 489 return (1); 490 } 491 } 492 493 /* 494 * Send out the replayed reply or the 'real' one. 495 */ 496 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { 497 DTRACE_PROBE2(nfss__e__dispatch_sendfail, 498 struct svc_req *, xprt, 499 char *, rbp); 500 svcerr_systemerr(xprt); 501 error++; 502 } 503 504 /* 505 * If this reply was just inserted into the duplicate cache 506 * or it was replayed from the dup cache; (re)mark it as 507 * available for replay 508 * 509 * At first glance, this 'if' statement seems a little strange; 510 * testing for NFS4_DUP_REPLAY, and then calling... 511 * 512 * rfs4_dr_chatate(NFS4_DUP_REPLAY) 513 * 514 * ... but notice that we are checking dr_stat, and not the 515 * state of the entry itself, the entry will be NFS4_DUP_INUSE, 516 * we do that so that we know not to prematurely reap it whilst 517 * we resent it to the client. 518 * 519 */ 520 if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) { 521 mutex_enter(&drp->drc->lock); 522 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); 523 mutex_exit(&drp->drc->lock); 524 } else if (dr_stat == NFS4_NOT_DUP) { 525 rfs4_compound_free(rbp); 526 } 527 528 return (error); 529 } 530 531 bool_t 532 rfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args) 533 { 534 COMPOUND4args *argsp; 535 COMPOUND4res res_buf, *resp; 536 537 if (req->rq_vers != 4) 538 return (FALSE); 539 540 argsp = (COMPOUND4args *)args; 541 542 if (argsp->minorversion <= NFS4_MAX_MINOR_VERSION) 543 return (FALSE); 544 545 resp = &res_buf; 546 547 /* 548 * Form a reply tag by copying over the request tag. 549 */ 550 resp->tag.utf8string_len = argsp->tag.utf8string_len; 551 if (argsp->tag.utf8string_len != 0) { 552 resp->tag.utf8string_val = 553 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); 554 bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val, 555 resp->tag.utf8string_len); 556 } else { 557 resp->tag.utf8string_val = NULL; 558 } 559 resp->array_len = 0; 560 resp->array = NULL; 561 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 562 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)resp)) { 563 DTRACE_PROBE2(nfss__e__minorvers_mismatch, 564 SVCXPRT *, xprt, char *, resp); 565 svcerr_systemerr(xprt); 566 } 567 rfs4_compound_free(resp); 568 return (TRUE); 569 } 570 571 void 572 rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp) 573 { 574 COMPOUND4res res_buf, *rbp; 575 nfs_resop4 *resop; 576 PUTFH4res *resp; 577 578 rbp = &res_buf; 579 580 /* 581 * Form a reply tag by copying over the request tag. 582 */ 583 rbp->tag.utf8string_len = argsp->tag.utf8string_len; 584 if (argsp->tag.utf8string_len != 0) { 585 rbp->tag.utf8string_val = 586 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); 587 bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val, 588 rbp->tag.utf8string_len); 589 } else { 590 rbp->tag.utf8string_val = NULL; 591 } 592 593 rbp->array_len = 1; 594 rbp->array = kmem_zalloc(rbp->array_len * sizeof (nfs_resop4), 595 KM_SLEEP); 596 resop = &rbp->array[0]; 597 resop->resop = argsp->array[0].argop; /* copy first op over */ 598 599 /* Any op will do, just need to access status field */ 600 resp = &resop->nfs_resop4_u.opputfh; 601 602 /* 603 * NFS4ERR_RESOURCE is allowed for all ops, except OP_ILLEGAL. 604 * Note that all op numbers in the compound array were already 605 * validated by the XDR decoder (xdr_COMPOUND4args_srv()). 606 */ 607 resp->status = (resop->resop == OP_ILLEGAL ? 608 NFS4ERR_OP_ILLEGAL : NFS4ERR_RESOURCE); 609 610 /* compound status is same as first op status */ 611 rbp->status = resp->status; 612 613 if (!svc_sendreply(req->rq_xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { 614 DTRACE_PROBE2(nfss__rsrc_err__sendfail, 615 struct svc_req *, req->rq_xprt, char *, rbp); 616 svcerr_systemerr(req->rq_xprt); 617 } 618 619 UTF8STRING_FREE(rbp->tag); 620 kmem_free(rbp->array, rbp->array_len * sizeof (nfs_resop4)); 621 } 622