1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #include <sys/systm.h> 32 #include <sys/sdt.h> 33 #include <rpc/types.h> 34 #include <rpc/auth.h> 35 #include <rpc/auth_unix.h> 36 #include <rpc/auth_des.h> 37 #include <rpc/svc.h> 38 #include <rpc/xdr.h> 39 #include <nfs/nfs4.h> 40 #include <nfs/nfs_dispatch.h> 41 #include <nfs/nfs4_drc.h> 42 43 #define NFS4_MAX_MINOR_VERSION 0 44 45 /* 46 * This is the duplicate request cache for NFSv4 47 */ 48 rfs4_drc_t *nfs4_drc = NULL; 49 50 /* 51 * The default size of the duplicate request cache 52 */ 53 uint32_t nfs4_drc_max = 8 * 1024; 54 55 /* 56 * The number of buckets we'd like to hash the 57 * replies into.. do not change this on the fly. 58 */ 59 uint32_t nfs4_drc_hash = 541; 60 61 static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp); 62 63 /* 64 * Initialize a duplicate request cache. 65 */ 66 rfs4_drc_t * 67 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size) 68 { 69 rfs4_drc_t *drc; 70 uint32_t bki; 71 72 ASSERT(drc_size); 73 ASSERT(drc_hash_size); 74 75 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); 76 77 drc->max_size = drc_size; 78 drc->in_use = 0; 79 80 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); 81 82 drc->dr_hash = drc_hash_size; 83 84 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); 85 86 for (bki = 0; bki < drc_hash_size; bki++) { 87 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), 88 offsetof(rfs4_dupreq_t, dr_bkt_next)); 89 } 90 91 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), 92 offsetof(rfs4_dupreq_t, dr_next)); 93 94 return (drc); 95 } 96 97 /* 98 * Destroy a duplicate request cache. 99 */ 100 void 101 rfs4_fini_drc(rfs4_drc_t *drc) 102 { 103 rfs4_dupreq_t *drp, *drp_next; 104 105 ASSERT(drc); 106 107 /* iterate over the dr_cache and free the enties */ 108 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { 109 110 if (drp->dr_state == NFS4_DUP_REPLAY) 111 rfs4_compound_free(&(drp->dr_res)); 112 113 if (drp->dr_addr.buf != NULL) 114 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 115 116 drp_next = list_next(&(drc->dr_cache), drp); 117 118 kmem_free(drp, sizeof (rfs4_dupreq_t)); 119 } 120 121 mutex_destroy(&drc->lock); 122 kmem_free(drc->dr_buckets, 123 sizeof (list_t)*drc->dr_hash); 124 kmem_free(drc, sizeof (rfs4_drc_t)); 125 } 126 127 /* 128 * rfs4_dr_chstate: 129 * 130 * Change the state of a rfs4_dupreq. If it's not in transition 131 * to the FREE state, return. If we are moving to the FREE state 132 * then we need to clean up the compound results and move the entry 133 * to the end of the list. 134 */ 135 void 136 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) 137 { 138 rfs4_drc_t *drc; 139 140 ASSERT(drp); 141 ASSERT(drp->drc); 142 ASSERT(drp->dr_bkt); 143 ASSERT(MUTEX_HELD(&drp->drc->lock)); 144 145 drp->dr_state = new_state; 146 147 if (new_state != NFS4_DUP_FREE) 148 return; 149 150 drc = drp->drc; 151 152 /* 153 * Remove entry from the bucket and 154 * dr_cache list, free compound results. 155 */ 156 list_remove(drp->dr_bkt, drp); 157 list_remove(&(drc->dr_cache), drp); 158 rfs4_compound_free(&(drp->dr_res)); 159 } 160 161 /* 162 * rfs4_alloc_dr: 163 * 164 * Malloc a new one if we have not reached our maximum cache 165 * limit, otherwise pick an entry off the tail -- Use if it 166 * is marked as NFS4_DUP_FREE, or is an entry in the 167 * NFS4_DUP_REPLAY state. 168 */ 169 rfs4_dupreq_t * 170 rfs4_alloc_dr(rfs4_drc_t *drc) 171 { 172 rfs4_dupreq_t *drp_tail, *drp = NULL; 173 174 ASSERT(drc); 175 ASSERT(MUTEX_HELD(&drc->lock)); 176 177 /* 178 * Have we hit the cache limit yet ? 179 */ 180 if (drc->in_use < drc->max_size) { 181 /* 182 * nope, so let's malloc a new one 183 */ 184 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); 185 drp->drc = drc; 186 drc->in_use++; 187 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); 188 return (drp); 189 } 190 191 /* 192 * Cache is all allocated now traverse the list 193 * backwards to find one we can reuse. 194 */ 195 for (drp_tail = list_tail(&drc->dr_cache); drp_tail != NULL; 196 drp_tail = list_prev(&drc->dr_cache, drp_tail)) { 197 198 switch (drp_tail->dr_state) { 199 200 case NFS4_DUP_FREE: 201 list_remove(&(drc->dr_cache), drp_tail); 202 DTRACE_PROBE1(nfss__i__drc_freeclaim, 203 rfs4_dupreq_t *, drp_tail); 204 return (drp_tail); 205 /* NOTREACHED */ 206 207 case NFS4_DUP_REPLAY: 208 /* grab it. */ 209 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); 210 DTRACE_PROBE1(nfss__i__drc_replayclaim, 211 rfs4_dupreq_t *, drp_tail); 212 return (drp_tail); 213 /* NOTREACHED */ 214 } 215 } 216 DTRACE_PROBE1(nfss__i__drc_full, rfs4_drc_t *, drc); 217 return (NULL); 218 } 219 220 /* 221 * rfs4_find_dr: 222 * 223 * Search for an entry in the duplicate request cache by 224 * calculating the hash index based on the XID, and examining 225 * the entries in the hash bucket. If we find a match, return. 226 * Once we have searched the bucket we call rfs4_alloc_dr() to 227 * allocate a new entry, or reuse one that is available. 228 */ 229 int 230 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) 231 { 232 233 uint32_t the_xid; 234 list_t *dr_bkt; 235 rfs4_dupreq_t *drp; 236 int bktdex; 237 238 /* 239 * Get the XID, calculate the bucket and search to 240 * see if we need to replay from the cache. 241 */ 242 the_xid = req->rq_xprt->xp_xid; 243 bktdex = the_xid % drc->dr_hash; 244 245 dr_bkt = (list_t *) 246 &(drc->dr_buckets[(the_xid % drc->dr_hash)]); 247 248 DTRACE_PROBE3(nfss__i__drc_bktdex, 249 int, bktdex, 250 uint32_t, the_xid, 251 list_t *, dr_bkt); 252 253 *dup = NULL; 254 255 mutex_enter(&drc->lock); 256 /* 257 * Search the bucket for a matching xid and address. 258 */ 259 for (drp = list_head(dr_bkt); drp != NULL; 260 drp = list_next(dr_bkt, drp)) { 261 262 if (drp->dr_xid == the_xid && 263 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 264 bcmp((caddr_t)drp->dr_addr.buf, 265 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 266 drp->dr_addr.len) == 0) { 267 268 /* 269 * Found a match so REPLAY the Reply 270 */ 271 if (drp->dr_state == NFS4_DUP_REPLAY) { 272 rfs4_dr_chstate(drp, NFS4_DUP_INUSE); 273 mutex_exit(&drc->lock); 274 *dup = drp; 275 DTRACE_PROBE1(nfss__i__drc_replay, 276 rfs4_dupreq_t *, drp); 277 return (NFS4_DUP_REPLAY); 278 } 279 280 /* 281 * This entry must be in transition, so return 282 * the 'pending' status. 283 */ 284 mutex_exit(&drc->lock); 285 return (NFS4_DUP_PENDING); 286 } 287 } 288 289 drp = rfs4_alloc_dr(drc); 290 mutex_exit(&drc->lock); 291 292 /* 293 * The DRC is full and all entries are in use. Upper function 294 * should error out this request and force the client to 295 * retransmit -- effectively this is a resource issue. NFSD 296 * threads tied up with native File System, or the cache size 297 * is too small for the server load. 298 */ 299 if (drp == NULL) 300 return (NFS4_DUP_ERROR); 301 302 /* 303 * Init the state to NEW. 304 */ 305 drp->dr_state = NFS4_DUP_NEW; 306 307 /* 308 * If needed, resize the address buffer 309 */ 310 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 311 if (drp->dr_addr.buf != NULL) 312 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 313 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 314 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); 315 if (drp->dr_addr.buf == NULL) { 316 /* 317 * If the malloc fails, mark the entry 318 * as free and put on the tail. 319 */ 320 drp->dr_addr.maxlen = 0; 321 drp->dr_state = NFS4_DUP_FREE; 322 mutex_enter(&drc->lock); 323 list_insert_tail(&(drc->dr_cache), drp); 324 mutex_exit(&drc->lock); 325 return (NFS4_DUP_ERROR); 326 } 327 } 328 329 330 /* 331 * Copy the address. 332 */ 333 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 334 335 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, 336 (caddr_t)drp->dr_addr.buf, 337 drp->dr_addr.len); 338 339 drp->dr_xid = the_xid; 340 drp->dr_bkt = dr_bkt; 341 342 /* 343 * Insert at the head of the bucket and 344 * the drc lists.. 345 */ 346 mutex_enter(&drc->lock); 347 list_insert_head(&drc->dr_cache, drp); 348 list_insert_head(dr_bkt, drp); 349 mutex_exit(&drc->lock); 350 351 *dup = drp; 352 353 return (NFS4_DUP_NEW); 354 } 355 356 /* 357 * 358 * This function handles the duplicate request cache, 359 * NULL_PROC and COMPOUND procedure calls for NFSv4; 360 * 361 * Passed into this function are:- 362 * 363 * disp A pointer to our dispatch table entry 364 * req The request to process 365 * xprt The server transport handle 366 * ap A pointer to the arguments 367 * rlen A pointer to the reply length (output) 368 * 369 * 370 * When appropriate this function is responsible for inserting 371 * the reply into the duplicate cache or replaying an existing 372 * cached reply. 373 * 374 * dr_stat reflects the state of the duplicate request that 375 * has been inserted into or retrieved from the cache 376 * 377 * drp is the duplicate request entry 378 * 379 */ 380 int 381 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, 382 SVCXPRT *xprt, char *ap, size_t *rlen) 383 { 384 385 COMPOUND4res res_buf; 386 COMPOUND4res *rbp; 387 COMPOUND4args *cap; 388 cred_t *cr = NULL; 389 int error = 0; 390 int dis_flags = 0; 391 int dr_stat = NFS4_NOT_DUP; 392 rfs4_dupreq_t *drp = NULL; 393 int rv; 394 395 ASSERT(disp); 396 397 /* 398 * Short circuit the RPC_NULL proc. 399 */ 400 if (disp->dis_proc == rpc_null) { 401 DTRACE_NFSV4_1(null__start, struct svc_req *, req); 402 if (!svc_sendreply(xprt, xdr_void, NULL)) { 403 DTRACE_NFSV4_1(null__done, struct svc_req *, req); 404 svcerr_systemerr(xprt); 405 return (1); 406 } 407 DTRACE_NFSV4_1(null__done, struct svc_req *, req); 408 *rlen = xdr_sizeof(xdr_void, NULL); 409 return (0); 410 } 411 412 /* Only NFSv4 Compounds from this point onward */ 413 414 rbp = &res_buf; 415 cap = (COMPOUND4args *)ap; 416 417 /* 418 * Update kstats 419 */ 420 rfs4_compound_kstat_args(cap); 421 422 /* 423 * Figure out the disposition of the whole COMPOUND 424 * and record it's IDEMPOTENTCY. 425 */ 426 rfs4_compound_flagproc(cap, &dis_flags); 427 428 /* 429 * If NON-IDEMPOTENT then we need to figure out if this 430 * request can be replied from the duplicate cache. 431 * 432 * If this is a new request then we need to insert the 433 * reply into the duplicate cache. 434 */ 435 if (!(dis_flags & RPC_IDEMPOTENT)) { 436 /* look for a replay from the cache or allocate */ 437 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); 438 439 switch (dr_stat) { 440 441 case NFS4_DUP_ERROR: 442 rfs4_resource_err(req, cap); 443 return (1); 444 /* NOTREACHED */ 445 446 case NFS4_DUP_PENDING: 447 /* 448 * reply has previously been inserted into the 449 * duplicate cache, however the reply has 450 * not yet been sent via svc_sendreply() 451 */ 452 return (1); 453 /* NOTREACHED */ 454 455 case NFS4_DUP_NEW: 456 curthread->t_flag |= T_DONTPEND; 457 /* NON-IDEMPOTENT proc call */ 458 rfs4_compound(cap, rbp, NULL, req, cr, &rv); 459 curthread->t_flag &= ~T_DONTPEND; 460 461 if (rv) /* short ckt sendreply on error */ 462 return (rv); 463 464 /* 465 * dr_res must be initialized before calling 466 * rfs4_dr_chstate (it frees the reply). 467 */ 468 drp->dr_res = res_buf; 469 if (curthread->t_flag & T_WOULDBLOCK) { 470 curthread->t_flag &= ~T_WOULDBLOCK; 471 /* 472 * mark this entry as FREE and plop 473 * on the end of the cache list 474 */ 475 mutex_enter(&drp->drc->lock); 476 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 477 list_insert_tail(&(drp->drc->dr_cache), drp); 478 mutex_exit(&drp->drc->lock); 479 return (1); 480 } 481 break; 482 483 case NFS4_DUP_REPLAY: 484 /* replay from the cache */ 485 rbp = &(drp->dr_res); 486 break; 487 } 488 } else { 489 curthread->t_flag |= T_DONTPEND; 490 /* IDEMPOTENT proc call */ 491 rfs4_compound(cap, rbp, NULL, req, cr, &rv); 492 curthread->t_flag &= ~T_DONTPEND; 493 494 if (rv) /* short ckt sendreply on error */ 495 return (rv); 496 497 if (curthread->t_flag & T_WOULDBLOCK) { 498 curthread->t_flag &= ~T_WOULDBLOCK; 499 return (1); 500 } 501 } 502 503 /* 504 * Send out the replayed reply or the 'real' one. 505 */ 506 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { 507 DTRACE_PROBE2(nfss__e__dispatch_sendfail, 508 struct svc_req *, xprt, 509 char *, rbp); 510 svcerr_systemerr(xprt); 511 error++; 512 } else { 513 /* 514 * Update kstats 515 */ 516 rfs4_compound_kstat_res(rbp); 517 *rlen = xdr_sizeof(xdr_COMPOUND4res_srv, rbp); 518 } 519 520 /* 521 * If this reply was just inserted into the duplicate cache 522 * or it was replayed from the dup cache; (re)mark it as 523 * available for replay 524 * 525 * At first glance, this 'if' statement seems a little strange; 526 * testing for NFS4_DUP_REPLAY, and then calling... 527 * 528 * rfs4_dr_chatate(NFS4_DUP_REPLAY) 529 * 530 * ... but notice that we are checking dr_stat, and not the 531 * state of the entry itself, the entry will be NFS4_DUP_INUSE, 532 * we do that so that we know not to prematurely reap it whilst 533 * we resent it to the client. 534 * 535 */ 536 if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) { 537 mutex_enter(&drp->drc->lock); 538 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); 539 mutex_exit(&drp->drc->lock); 540 } else if (dr_stat == NFS4_NOT_DUP) { 541 rfs4_compound_free(rbp); 542 } 543 544 return (error); 545 } 546 547 bool_t 548 rfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args) 549 { 550 COMPOUND4args *argsp; 551 COMPOUND4res res_buf, *resp; 552 553 if (req->rq_vers != 4) 554 return (FALSE); 555 556 argsp = (COMPOUND4args *)args; 557 558 if (argsp->minorversion <= NFS4_MAX_MINOR_VERSION) 559 return (FALSE); 560 561 resp = &res_buf; 562 563 /* 564 * Form a reply tag by copying over the reqeuest tag. 565 */ 566 resp->tag.utf8string_val = 567 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); 568 resp->tag.utf8string_len = argsp->tag.utf8string_len; 569 bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val, 570 resp->tag.utf8string_len); 571 resp->array_len = 0; 572 resp->array = NULL; 573 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 574 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)resp)) { 575 DTRACE_PROBE2(nfss__e__minorvers_mismatch, 576 SVCXPRT *, xprt, char *, resp); 577 svcerr_systemerr(xprt); 578 } 579 rfs4_compound_free(resp); 580 return (TRUE); 581 } 582 583 void 584 rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp) 585 { 586 COMPOUND4res res_buf, *rbp; 587 nfs_resop4 *resop; 588 PUTFH4res *resp; 589 590 rbp = &res_buf; 591 592 /* 593 * Form a reply tag by copying over the request tag. 594 */ 595 rbp->tag.utf8string_val = 596 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); 597 rbp->tag.utf8string_len = argsp->tag.utf8string_len; 598 bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val, 599 rbp->tag.utf8string_len); 600 601 rbp->array_len = 1; 602 rbp->array = kmem_zalloc(rbp->array_len * sizeof (nfs_resop4), 603 KM_SLEEP); 604 resop = &rbp->array[0]; 605 resop->resop = argsp->array[0].argop; /* copy first op over */ 606 607 /* Any op will do, just need to access status field */ 608 resp = &resop->nfs_resop4_u.opputfh; 609 610 /* 611 * NFS4ERR_RESOURCE is allowed for all ops, except OP_ILLEGAL. 612 * Note that all op numbers in the compound array were already 613 * validated by the XDR decoder (xdr_COMPOUND4args_srv()). 614 */ 615 resp->status = (resop->resop == OP_ILLEGAL ? 616 NFS4ERR_OP_ILLEGAL : NFS4ERR_RESOURCE); 617 618 /* compound status is same as first op status */ 619 rbp->status = resp->status; 620 621 if (!svc_sendreply(req->rq_xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { 622 DTRACE_PROBE2(nfss__rsrc_err__sendfail, 623 struct svc_req *, req->rq_xprt, char *, rbp); 624 svcerr_systemerr(req->rq_xprt); 625 } 626 627 UTF8STRING_FREE(rbp->tag); 628 kmem_free(rbp->array, rbp->array_len * sizeof (nfs_resop4)); 629 } 630