1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/systm.h> 31 #include <sys/sdt.h> 32 #include <rpc/types.h> 33 #include <rpc/auth.h> 34 #include <rpc/auth_unix.h> 35 #include <rpc/auth_des.h> 36 #include <rpc/svc.h> 37 #include <rpc/xdr.h> 38 #include <nfs/nfs4.h> 39 #include <nfs/nfs_dispatch.h> 40 #include <nfs/nfs4_drc.h> 41 42 /* 43 * This is the duplicate request cache for NFSv4 44 */ 45 rfs4_drc_t *nfs4_drc = NULL; 46 47 /* 48 * How long the entry can remain in the cache 49 * once it has been sent to the client and not 50 * used in a reply (in seconds) 51 */ 52 unsigned nfs4_drc_lifetime = 1; 53 54 /* 55 * The default size of the duplicate request cache 56 */ 57 uint32_t nfs4_drc_max = 8 * 1024; 58 59 /* 60 * The number of buckets we'd like to hash the 61 * replies into.. do not change this on the fly. 62 */ 63 uint32_t nfs4_drc_hash = 541; 64 65 /* 66 * Initialize a duplicate request cache. 67 */ 68 rfs4_drc_t * 69 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl) 70 { 71 rfs4_drc_t *drc; 72 uint32_t bki; 73 74 ASSERT(drc_size); 75 ASSERT(drc_hash_size); 76 77 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); 78 79 drc->max_size = drc_size; 80 drc->in_use = 0; 81 drc->drc_ttl = ttl; 82 83 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); 84 85 drc->dr_hash = drc_hash_size; 86 87 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); 88 89 for (bki = 0; bki < drc_hash_size; bki++) { 90 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), 91 offsetof(rfs4_dupreq_t, dr_bkt_next)); 92 } 93 94 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), 95 offsetof(rfs4_dupreq_t, dr_next)); 96 97 return (drc); 98 } 99 100 /* 101 * Destroy a duplicate request cache. 102 */ 103 void 104 rfs4_fini_drc(rfs4_drc_t *drc) 105 { 106 rfs4_dupreq_t *drp, *drp_next; 107 108 ASSERT(drc); 109 110 /* iterate over the dr_cache and free the enties */ 111 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { 112 113 if (drp->dr_state == NFS4_DUP_REPLAY) 114 rfs4_compound_free(&(drp->dr_res)); 115 116 if (drp->dr_addr.buf != NULL) 117 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 118 119 drp_next = list_next(&(drc->dr_cache), drp); 120 121 kmem_free(drp, sizeof (rfs4_dupreq_t)); 122 } 123 124 mutex_destroy(&drc->lock); 125 kmem_free(drc->dr_buckets, 126 sizeof (list_t)*drc->dr_hash); 127 kmem_free(drc, sizeof (rfs4_drc_t)); 128 } 129 130 /* 131 * rfs4_dr_chstate: 132 * 133 * Change the state of a rfs4_dupreq. If it's not in transition 134 * to the FREE state, update the time used and return. If we 135 * are moving to the FREE state then we need to clean up the 136 * compound results and move the entry to the end of the list. 137 */ 138 void 139 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) 140 { 141 rfs4_drc_t *drc; 142 143 ASSERT(drp); 144 ASSERT(drp->drc); 145 ASSERT(drp->dr_bkt); 146 ASSERT(MUTEX_HELD(&drp->drc->lock)); 147 148 drp->dr_state = new_state; 149 150 if (new_state != NFS4_DUP_FREE) { 151 gethrestime(&drp->dr_time_used); 152 return; 153 } 154 155 drc = drp->drc; 156 157 /* 158 * Remove entry from the bucket and 159 * dr_cache list, free compound results. 160 */ 161 list_remove(drp->dr_bkt, drp); 162 list_remove(&(drc->dr_cache), drp); 163 rfs4_compound_free(&(drp->dr_res)); 164 } 165 166 /* 167 * rfs4_alloc_dr: 168 * 169 * Pick an entry off the tail -- Use if it is 170 * marked NFS4_DUP_FREE, or is an entry in the 171 * NFS4_DUP_REPLAY state that has timed-out... 172 * Otherwise malloc a new one if we have not reached 173 * our maximum cache limit. 174 * 175 * The list should be in time order, so no need 176 * to traverse backwards looking for a timed out 177 * entry, NFS4_DUP_FREE's are place on the tail. 178 */ 179 rfs4_dupreq_t * 180 rfs4_alloc_dr(rfs4_drc_t *drc) 181 { 182 rfs4_dupreq_t *drp_tail, *drp = NULL; 183 184 ASSERT(drc); 185 ASSERT(MUTEX_HELD(&drc->lock)); 186 187 if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) { 188 189 switch (drp_tail->dr_state) { 190 191 case NFS4_DUP_FREE: 192 list_remove(&(drc->dr_cache), drp_tail); 193 DTRACE_PROBE1(nfss__i__drc_freeclaim, 194 rfs4_dupreq_t *, drp_tail); 195 return (drp_tail); 196 /* NOTREACHED */ 197 198 case NFS4_DUP_REPLAY: 199 if (gethrestime_sec() > 200 drp_tail->dr_time_used.tv_sec+drc->drc_ttl) { 201 /* this entry has timedout so grab it. */ 202 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); 203 DTRACE_PROBE1(nfss__i__drc_ttlclaim, 204 rfs4_dupreq_t *, drp_tail); 205 return (drp_tail); 206 } 207 break; 208 } 209 } 210 211 /* 212 * Didn't find something to recycle have 213 * we hit the cache limit ? 214 */ 215 if (drc->in_use >= drc->max_size) { 216 DTRACE_PROBE1(nfss__i__drc_full, 217 rfs4_drc_t *, drc); 218 return (NULL); 219 } 220 221 222 /* nope, so let's malloc a new one */ 223 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); 224 drp->drc = drc; 225 drc->in_use++; 226 gethrestime(&drp->dr_time_created); 227 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); 228 229 return (drp); 230 } 231 232 /* 233 * rfs4_find_dr: 234 * 235 * Search for an entry in the duplicate request cache by 236 * calculating the hash index based on the XID, and examining 237 * the entries in the hash bucket. If we find a match stamp the 238 * time_used and return. If the entry does not match it could be 239 * ready to be freed. Once we have searched the bucket and we 240 * have not exhausted the maximum limit for the cache we will 241 * allocate a new entry. 242 */ 243 int 244 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) 245 { 246 247 uint32_t the_xid; 248 list_t *dr_bkt; 249 rfs4_dupreq_t *drp; 250 int bktdex; 251 252 /* 253 * Get the XID, calculate the bucket and search to 254 * see if we need to replay from the cache. 255 */ 256 the_xid = req->rq_xprt->xp_xid; 257 bktdex = the_xid % drc->dr_hash; 258 259 dr_bkt = (list_t *) 260 &(drc->dr_buckets[(the_xid % drc->dr_hash)]); 261 262 DTRACE_PROBE3(nfss__i__drc_bktdex, 263 int, bktdex, 264 uint32_t, the_xid, 265 list_t *, dr_bkt); 266 267 *dup = NULL; 268 269 mutex_enter(&drc->lock); 270 /* 271 * Search the bucket for a matching xid and address. 272 */ 273 for (drp = list_head(dr_bkt); drp != NULL; 274 drp = list_next(dr_bkt, drp)) { 275 276 if (drp->dr_xid == the_xid && 277 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 278 bcmp((caddr_t)drp->dr_addr.buf, 279 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 280 drp->dr_addr.len) == 0) { 281 282 /* 283 * Found a match so REPLAY the Reply 284 */ 285 if (drp->dr_state == NFS4_DUP_REPLAY) { 286 gethrestime(&drp->dr_time_used); 287 mutex_exit(&drc->lock); 288 *dup = drp; 289 DTRACE_PROBE1(nfss__i__drc_replay, 290 rfs4_dupreq_t *, drp); 291 return (NFS4_DUP_REPLAY); 292 } 293 294 /* 295 * This entry must be in transition, so return 296 * the 'pending' status. 297 */ 298 mutex_exit(&drc->lock); 299 return (NFS4_DUP_PENDING); 300 } 301 302 /* 303 * Not a match, but maybe this entry is ready 304 * to be reused. 305 */ 306 if (drp->dr_state == NFS4_DUP_REPLAY && 307 (gethrestime_sec() > 308 drp->dr_time_used.tv_sec+drc->drc_ttl)) { 309 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 310 list_insert_tail(&(drp->drc->dr_cache), drp); 311 } 312 } 313 314 drp = rfs4_alloc_dr(drc); 315 mutex_exit(&drc->lock); 316 317 if (drp == NULL) { 318 return (NFS4_DUP_ERROR); 319 } 320 321 /* 322 * Place at the head of the list, init the state 323 * to NEW and clear the time used field. 324 */ 325 326 drp->dr_state = NFS4_DUP_NEW; 327 drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0; 328 329 /* 330 * If needed, resize the address buffer 331 */ 332 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 333 if (drp->dr_addr.buf != NULL) 334 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 335 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 336 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); 337 if (drp->dr_addr.buf == NULL) { 338 /* 339 * If the malloc fails, mark the entry 340 * as free and put on the tail. 341 */ 342 drp->dr_addr.maxlen = 0; 343 drp->dr_state = NFS4_DUP_FREE; 344 mutex_enter(&drc->lock); 345 list_insert_tail(&(drc->dr_cache), drp); 346 mutex_exit(&drc->lock); 347 return (NFS4_DUP_ERROR); 348 } 349 } 350 351 352 /* 353 * Copy the address. 354 */ 355 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 356 357 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, 358 (caddr_t)drp->dr_addr.buf, 359 drp->dr_addr.len); 360 361 drp->dr_xid = the_xid; 362 drp->dr_bkt = dr_bkt; 363 364 /* 365 * Insert at the head of the bucket and 366 * the drc lists.. 367 */ 368 mutex_enter(&drc->lock); 369 list_insert_head(&drc->dr_cache, drp); 370 list_insert_head(dr_bkt, drp); 371 mutex_exit(&drc->lock); 372 373 *dup = drp; 374 375 return (NFS4_DUP_NEW); 376 } 377 378 /* 379 * 380 * This function handles the duplicate request cache, 381 * NULL_PROC and COMPOUND procedure calls for NFSv4; 382 * 383 * Passed into this function are:- 384 * 385 * disp A pointer to our dispatch table entry 386 * req The request to process 387 * xprt The server transport handle 388 * ap A pointer to the arguments 389 * 390 * 391 * When appropriate this function is responsible for inserting 392 * the reply into the duplicate cache or replaying an existing 393 * cached reply. 394 * 395 * dr_stat reflects the state of the duplicate request that 396 * has been inserted into or retrieved from the cache 397 * 398 * drp is the duplicate request entry 399 * 400 */ 401 int 402 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, 403 SVCXPRT *xprt, char *ap) 404 { 405 406 COMPOUND4res res_buf, *rbp; 407 COMPOUND4args *cap; 408 409 cred_t *cr = NULL; 410 int error = 0; 411 int dis_flags = 0; 412 int dr_stat = NFS4_NOT_DUP; 413 rfs4_dupreq_t *drp = NULL; 414 415 ASSERT(disp); 416 417 /* 418 * Short circuit the RPC_NULL proc. 419 */ 420 if (disp->dis_proc == rpc_null) { 421 if (!svc_sendreply(xprt, xdr_void, NULL)) { 422 return (1); 423 } 424 return (0); 425 } 426 427 /* Only NFSv4 Compounds from this point onward */ 428 429 rbp = &res_buf; 430 cap = (COMPOUND4args *)ap; 431 432 /* 433 * Figure out the disposition of the whole COMPOUND 434 * and record it's IDEMPOTENTCY. 435 */ 436 rfs4_compound_flagproc(cap, &dis_flags); 437 438 /* 439 * If NON-IDEMPOTENT then we need to figure out if this 440 * request can be replied from the duplicate cache. 441 * 442 * If this is a new request then we need to insert the 443 * reply into the duplicate cache. 444 */ 445 if (!(dis_flags & RPC_IDEMPOTENT)) { 446 /* look for a replay from the cache or allocate */ 447 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); 448 449 switch (dr_stat) { 450 451 case NFS4_DUP_ERROR: 452 svcerr_systemerr(xprt); 453 return (1); 454 /* NOTREACHED */ 455 456 case NFS4_DUP_PENDING: 457 /* 458 * reply has previously been inserted into the 459 * duplicate cache, however the reply has 460 * not yet been sent via svc_sendreply() 461 */ 462 return (1); 463 /* NOTREACHED */ 464 465 case NFS4_DUP_NEW: 466 curthread->t_flag |= T_DONTPEND; 467 /* NON-IDEMPOTENT proc call */ 468 rfs4_compound(cap, rbp, NULL, req, cr); 469 470 curthread->t_flag &= ~T_DONTPEND; 471 472 /* 473 * dr_res must be initialized before calling 474 * rfs4_dr_chstate (it frees the reply). 475 */ 476 drp->dr_res = res_buf; 477 if (curthread->t_flag & T_WOULDBLOCK) { 478 curthread->t_flag &= ~T_WOULDBLOCK; 479 /* 480 * mark this entry as FREE and plop 481 * on the end of the cache list 482 */ 483 mutex_enter(&drp->drc->lock); 484 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 485 list_insert_tail(&(drp->drc->dr_cache), drp); 486 mutex_exit(&drp->drc->lock); 487 return (1); 488 } 489 break; 490 491 case NFS4_DUP_REPLAY: 492 /* replay from the cache */ 493 rbp = &(drp->dr_res); 494 break; 495 } 496 } else { 497 curthread->t_flag |= T_DONTPEND; 498 /* IDEMPOTENT proc call */ 499 rfs4_compound(cap, rbp, NULL, req, cr); 500 501 curthread->t_flag &= ~T_DONTPEND; 502 if (curthread->t_flag & T_WOULDBLOCK) { 503 curthread->t_flag &= ~T_WOULDBLOCK; 504 return (1); 505 } 506 } 507 508 /* 509 * Send out the replayed reply or the 'real' one. 510 */ 511 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { 512 DTRACE_PROBE2(nfss__e__dispatch_sendfail, 513 struct svc_req *, xprt, 514 char *, rbp); 515 error++; 516 } 517 518 /* 519 * If this reply was just inserted into the duplicate cache 520 * mark it as available for replay 521 */ 522 if (dr_stat == NFS4_DUP_NEW) { 523 mutex_enter(&drp->drc->lock); 524 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); 525 mutex_exit(&drp->drc->lock); 526 } else if (dr_stat == NFS4_NOT_DUP) { 527 rfs4_compound_free(rbp); 528 } 529 530 return (error); 531 } 532