1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Here is the basic algorithm: 39 * First, some design criteria I used: 40 * - I think a false hit is more serious than a false miss 41 * - A false hit for an RPC that has Op(s) that order via seqid# must be 42 * avoided at all cost 43 * - A valid hit will probably happen a long time after the original reply 44 * and the TCP socket that the original request was received on will no 45 * longer be active 46 * (The long time delay implies to me that LRU is not appropriate.) 47 * - The mechanism will satisfy the requirements of ordering Ops with seqid#s 48 * in them as well as minimizing the risk of redoing retried non-idempotent 49 * Ops. 50 * Because it is biased towards avoiding false hits, multiple entries with 51 * the same xid are to be expected, especially for the case of the entry 52 * in the cache being related to a seqid# sequenced Op. 53 * 54 * The basic algorithm I'm about to code up: 55 * - Null RPCs bypass the cache and are just done 56 * For TCP 57 * - key on <xid, NFS version> (as noted above, there can be several 58 * entries with the same key) 59 * When a request arrives: 60 * For all that match key 61 * - if RPC# != OR request_size != 62 * - not a match with this one 63 * - if NFSv4 and received on same TCP socket OR 64 * received on a TCP connection created before the 65 * entry was cached 66 * - not a match with this one 67 * (V2,3 clients might retry on same TCP socket) 68 * - calculate checksum on first N bytes of NFS XDR 69 * - if checksum != 70 * - not a match for this one 71 * If any of the remaining ones that match has a 72 * seqid_refcnt > 0 73 * - not a match (go do RPC, using new cache entry) 74 * If one match left 75 * - a hit (reply from cache) 76 * else 77 * - miss (go do RPC, using new cache entry) 78 * 79 * During processing of NFSv4 request: 80 * - set a flag when a non-idempotent Op is processed 81 * - when an Op that uses a seqid# (Open,...) is processed 82 * - if same seqid# as referenced entry in cache 83 * - free new cache entry 84 * - reply from referenced cache entry 85 * else if next seqid# in order 86 * - free referenced cache entry 87 * - increment seqid_refcnt on new cache entry 88 * - set pointer from Openowner/Lockowner to 89 * new cache entry (aka reference it) 90 * else if first seqid# in sequence 91 * - increment seqid_refcnt on new cache entry 92 * - set pointer from Openowner/Lockowner to 93 * new cache entry (aka reference it) 94 * 95 * At end of RPC processing: 96 * - if seqid_refcnt > 0 OR flagged non-idempotent on new 97 * cache entry 98 * - save reply in cache entry 99 * - calculate checksum on first N bytes of NFS XDR 100 * request 101 * - note op and length of XDR request (in bytes) 102 * - timestamp it 103 * else 104 * - free new cache entry 105 * - Send reply (noting info for socket activity check, below) 106 * 107 * For cache entries saved above: 108 * - if saved since seqid_refcnt was > 0 109 * - free when seqid_refcnt decrements to 0 110 * (when next one in sequence is processed above, or 111 * when Openowner/Lockowner is discarded) 112 * else { non-idempotent Op(s) } 113 * - free when 114 * - some further activity observed on same 115 * socket 116 * (I'm not yet sure how I'm going to do 117 * this. Maybe look at the TCP connection 118 * to see if the send_tcp_sequence# is well 119 * past sent reply OR K additional RPCs 120 * replied on same socket OR?) 121 * OR 122 * - when very old (hours, days, weeks?) 123 * 124 * For UDP (v2, 3 only), pretty much the old way: 125 * - key on <xid, NFS version, RPC#, Client host ip#> 126 * (at most one entry for each key) 127 * 128 * When a Request arrives: 129 * - if a match with entry via key 130 * - if RPC marked In_progress 131 * - discard request (don't send reply) 132 * else 133 * - reply from cache 134 * - timestamp cache entry 135 * else 136 * - add entry to cache, marked In_progress 137 * - do RPC 138 * - when RPC done 139 * - if RPC# non-idempotent 140 * - mark entry Done (not In_progress) 141 * - save reply 142 * - timestamp cache entry 143 * else 144 * - free cache entry 145 * - send reply 146 * 147 * Later, entries with saved replies are free'd a short time (few minutes) 148 * after reply sent (timestamp). 149 * Reference: Chet Juszczak, "Improving the Performance and Correctness 150 * of an NFS Server", in Proc. Winter 1989 USENIX Conference, 151 * pages 53-63. San Diego, February 1989. 152 * for the UDP case. 153 * nfsrc_floodlevel is set to the allowable upper limit for saved replies 154 * for TCP. For V3, a reply won't be saved when the flood level is 155 * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in 156 * that case. This level should be set high enough that this almost 157 * never happens. 158 */ 159 #ifndef APPLEKEXT 160 #include <fs/nfs/nfsport.h> 161 162 extern struct nfsstats newnfsstats; 163 NFSCACHEMUTEX; 164 int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; 165 #endif /* !APPLEKEXT */ 166 167 static int nfsrc_tcpnonidempotent = 1; 168 static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0; 169 static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; 170 static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE], 171 nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; 172 /* 173 * and the reverse mapping from generic to Version 2 procedure numbers 174 */ 175 static int newnfsv2_procid[NFS_V3NPROCS] = { 176 NFSV2PROC_NULL, 177 NFSV2PROC_GETATTR, 178 NFSV2PROC_SETATTR, 179 NFSV2PROC_LOOKUP, 180 NFSV2PROC_NOOP, 181 NFSV2PROC_READLINK, 182 NFSV2PROC_READ, 183 NFSV2PROC_WRITE, 184 NFSV2PROC_CREATE, 185 NFSV2PROC_MKDIR, 186 NFSV2PROC_SYMLINK, 187 NFSV2PROC_CREATE, 188 NFSV2PROC_REMOVE, 189 NFSV2PROC_RMDIR, 190 NFSV2PROC_RENAME, 191 NFSV2PROC_LINK, 192 NFSV2PROC_READDIR, 193 NFSV2PROC_NOOP, 194 NFSV2PROC_STATFS, 195 NFSV2PROC_NOOP, 196 NFSV2PROC_NOOP, 197 NFSV2PROC_NOOP, 198 }; 199 200 #define NFSRCUDPHASH(xid) \ 201 (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) 202 #define NFSRCHASH(xid) \ 203 (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) 204 #define TRUE 1 205 #define FALSE 0 206 #define NFSRVCACHE_CHECKLEN 100 207 208 /* True iff the rpc reply is an nfs status ONLY! */ 209 static int nfsv2_repstat[NFS_V3NPROCS] = { 210 FALSE, 211 FALSE, 212 FALSE, 213 FALSE, 214 FALSE, 215 FALSE, 216 FALSE, 217 FALSE, 218 FALSE, 219 FALSE, 220 TRUE, 221 TRUE, 222 TRUE, 223 TRUE, 224 FALSE, 225 TRUE, 226 FALSE, 227 FALSE, 228 FALSE, 229 FALSE, 230 FALSE, 231 FALSE, 232 }; 233 234 /* 235 * Will NFS want to work over IPv6 someday? 236 */ 237 #define NETFAMILY(rp) \ 238 (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET) 239 240 /* local functions */ 241 static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 242 static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 243 static void nfsrc_lock(struct nfsrvcache *rp); 244 static void nfsrc_unlock(struct nfsrvcache *rp); 245 static void nfsrc_wanted(struct nfsrvcache *rp); 246 static void nfsrc_freecache(struct nfsrvcache *rp); 247 static void nfsrc_trimcache(u_int64_t, struct socket *); 248 static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t, 249 struct socket *); 250 static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); 251 static void nfsrc_marksametcpconn(u_int64_t); 252 253 /* 254 * Initialize the server request cache list 255 */ 256 APPLESTATIC void 257 nfsrvd_initcache(void) 258 { 259 int i; 260 static int inited = 0; 261 262 if (inited) 263 return; 264 inited = 1; 265 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 266 LIST_INIT(&nfsrvudphashtbl[i]); 267 LIST_INIT(&nfsrvhashtbl[i]); 268 } 269 TAILQ_INIT(&nfsrvudplru); 270 nfsrc_tcpsavedreplies = 0; 271 nfsrc_udpcachesize = 0; 272 newnfsstats.srvcache_tcppeak = 0; 273 newnfsstats.srvcache_size = 0; 274 } 275 276 /* 277 * Get a cache entry for this request. Basically just malloc a new one 278 * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest. 279 * Call nfsrc_trimcache() to clean up the cache before returning. 280 */ 281 APPLESTATIC int 282 nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so) 283 { 284 struct nfsrvcache *newrp; 285 int ret; 286 287 if (nd->nd_procnum == NFSPROC_NULL) 288 panic("nfsd cache null"); 289 MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache), 290 M_NFSRVCACHE, M_WAITOK); 291 NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache)); 292 if (nd->nd_flag & ND_NFSV4) 293 newrp->rc_flag = RC_NFSV4; 294 else if (nd->nd_flag & ND_NFSV3) 295 newrp->rc_flag = RC_NFSV3; 296 else 297 newrp->rc_flag = RC_NFSV2; 298 newrp->rc_xid = nd->nd_retxid; 299 newrp->rc_proc = nd->nd_procnum; 300 newrp->rc_sockref = nd->nd_sockref; 301 newrp->rc_cachetime = nd->nd_tcpconntime; 302 if (nd->nd_flag & ND_SAMETCPCONN) 303 newrp->rc_flag |= RC_SAMETCPCONN; 304 if (nd->nd_nam2 != NULL) { 305 newrp->rc_flag |= RC_UDP; 306 ret = nfsrc_getudp(nd, newrp); 307 } else { 308 ret = nfsrc_gettcp(nd, newrp); 309 } 310 nfsrc_trimcache(nd->nd_sockref, so); 311 NFSEXITCODE2(0, nd); 312 return (ret); 313 } 314 315 /* 316 * For UDP (v2, v3): 317 * - key on <xid, NFS version, RPC#, Client host ip#> 318 * (at most one entry for each key) 319 */ 320 static int 321 nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 322 { 323 struct nfsrvcache *rp; 324 struct sockaddr_in *saddr; 325 struct sockaddr_in6 *saddr6; 326 struct nfsrvhashhead *hp; 327 int ret = 0; 328 329 hp = NFSRCUDPHASH(newrp->rc_xid); 330 loop: 331 NFSLOCKCACHE(); 332 LIST_FOREACH(rp, hp, rc_hash) { 333 if (newrp->rc_xid == rp->rc_xid && 334 newrp->rc_proc == rp->rc_proc && 335 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 336 nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { 337 if ((rp->rc_flag & RC_LOCKED) != 0) { 338 rp->rc_flag |= RC_WANTED; 339 (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, 340 (PZERO - 1) | PDROP, "nfsrc", 10 * hz); 341 goto loop; 342 } 343 if (rp->rc_flag == 0) 344 panic("nfs udp cache0"); 345 rp->rc_flag |= RC_LOCKED; 346 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 347 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 348 if (rp->rc_flag & RC_INPROG) { 349 newnfsstats.srvcache_inproghits++; 350 NFSUNLOCKCACHE(); 351 ret = RC_DROPIT; 352 } else if (rp->rc_flag & RC_REPSTATUS) { 353 /* 354 * V2 only. 355 */ 356 newnfsstats.srvcache_nonidemdonehits++; 357 NFSUNLOCKCACHE(); 358 nfsrvd_rephead(nd); 359 *(nd->nd_errp) = rp->rc_status; 360 ret = RC_REPLY; 361 rp->rc_timestamp = NFSD_MONOSEC + 362 NFSRVCACHE_UDPTIMEOUT; 363 } else if (rp->rc_flag & RC_REPMBUF) { 364 newnfsstats.srvcache_nonidemdonehits++; 365 NFSUNLOCKCACHE(); 366 nd->nd_mreq = m_copym(rp->rc_reply, 0, 367 M_COPYALL, M_WAITOK); 368 ret = RC_REPLY; 369 rp->rc_timestamp = NFSD_MONOSEC + 370 NFSRVCACHE_UDPTIMEOUT; 371 } else { 372 panic("nfs udp cache1"); 373 } 374 nfsrc_unlock(rp); 375 free((caddr_t)newrp, M_NFSRVCACHE); 376 goto out; 377 } 378 } 379 newnfsstats.srvcache_misses++; 380 newnfsstats.srvcache_size++; 381 nfsrc_udpcachesize++; 382 383 newrp->rc_flag |= RC_INPROG; 384 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 385 if (saddr->sin_family == AF_INET) 386 newrp->rc_inet = saddr->sin_addr.s_addr; 387 else if (saddr->sin_family == AF_INET6) { 388 saddr6 = (struct sockaddr_in6 *)saddr; 389 NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6, 390 sizeof (struct in6_addr)); 391 newrp->rc_flag |= RC_INETIPV6; 392 } 393 LIST_INSERT_HEAD(hp, newrp, rc_hash); 394 TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); 395 NFSUNLOCKCACHE(); 396 nd->nd_rp = newrp; 397 ret = RC_DOIT; 398 399 out: 400 NFSEXITCODE2(0, nd); 401 return (ret); 402 } 403 404 /* 405 * Update a request cache entry after the rpc has been done 406 */ 407 APPLESTATIC struct nfsrvcache * 408 nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so) 409 { 410 struct nfsrvcache *rp; 411 struct nfsrvcache *retrp = NULL; 412 mbuf_t m; 413 414 rp = nd->nd_rp; 415 if (!rp) 416 panic("nfsrvd_updatecache null rp"); 417 nd->nd_rp = NULL; 418 NFSLOCKCACHE(); 419 nfsrc_lock(rp); 420 if (!(rp->rc_flag & RC_INPROG)) 421 panic("nfsrvd_updatecache not inprog"); 422 rp->rc_flag &= ~RC_INPROG; 423 if (rp->rc_flag & RC_UDP) { 424 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 425 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 426 } 427 428 /* 429 * Reply from cache is a special case returned by nfsrv_checkseqid(). 430 */ 431 if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { 432 newnfsstats.srvcache_nonidemdonehits++; 433 NFSUNLOCKCACHE(); 434 nd->nd_repstat = 0; 435 if (nd->nd_mreq) 436 mbuf_freem(nd->nd_mreq); 437 if (!(rp->rc_flag & RC_REPMBUF)) 438 panic("reply from cache"); 439 nd->nd_mreq = m_copym(rp->rc_reply, 0, 440 M_COPYALL, M_WAITOK); 441 rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT; 442 nfsrc_unlock(rp); 443 goto out; 444 } 445 446 /* 447 * If rc_refcnt > 0, save it 448 * For UDP, save it if ND_SAVEREPLY is set 449 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set 450 */ 451 if (nd->nd_repstat != NFSERR_DONTREPLY && 452 (rp->rc_refcnt > 0 || 453 ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || 454 ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && 455 nfsrc_tcpsavedreplies <= nfsrc_floodlevel && 456 nfsrc_tcpnonidempotent))) { 457 if (rp->rc_refcnt > 0) { 458 if (!(rp->rc_flag & RC_NFSV4)) 459 panic("update_cache refcnt"); 460 rp->rc_flag |= RC_REFCNT; 461 } 462 if ((nd->nd_flag & ND_NFSV2) && 463 nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { 464 rp->rc_status = nd->nd_repstat; 465 rp->rc_flag |= RC_REPSTATUS; 466 NFSUNLOCKCACHE(); 467 } else { 468 if (!(rp->rc_flag & RC_UDP)) { 469 nfsrc_tcpsavedreplies++; 470 if (nfsrc_tcpsavedreplies > 471 newnfsstats.srvcache_tcppeak) 472 newnfsstats.srvcache_tcppeak = 473 nfsrc_tcpsavedreplies; 474 } 475 NFSUNLOCKCACHE(); 476 m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); 477 NFSLOCKCACHE(); 478 rp->rc_reply = m; 479 rp->rc_flag |= RC_REPMBUF; 480 NFSUNLOCKCACHE(); 481 } 482 if (rp->rc_flag & RC_UDP) { 483 rp->rc_timestamp = NFSD_MONOSEC + 484 NFSRVCACHE_UDPTIMEOUT; 485 nfsrc_unlock(rp); 486 } else { 487 rp->rc_timestamp = NFSD_MONOSEC + 488 NFSRVCACHE_TCPTIMEOUT; 489 if (rp->rc_refcnt > 0) 490 nfsrc_unlock(rp); 491 else 492 retrp = rp; 493 } 494 } else { 495 nfsrc_freecache(rp); 496 NFSUNLOCKCACHE(); 497 } 498 499 out: 500 nfsrc_trimcache(nd->nd_sockref, so); 501 NFSEXITCODE2(0, nd); 502 return (retrp); 503 } 504 505 /* 506 * Invalidate and, if possible, free an in prog cache entry. 507 * Must not sleep. 508 */ 509 APPLESTATIC void 510 nfsrvd_delcache(struct nfsrvcache *rp) 511 { 512 513 if (!(rp->rc_flag & RC_INPROG)) 514 panic("nfsrvd_delcache not in prog"); 515 NFSLOCKCACHE(); 516 rp->rc_flag &= ~RC_INPROG; 517 if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) 518 nfsrc_freecache(rp); 519 NFSUNLOCKCACHE(); 520 } 521 522 /* 523 * Called after nfsrvd_updatecache() once the reply is sent, to update 524 * the entry for nfsrc_activesocket() and unlock it. The argument is 525 * the pointer returned by nfsrvd_updatecache(). 526 */ 527 APPLESTATIC void 528 nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) 529 { 530 tcp_seq tmp_seq; 531 532 if (!(rp->rc_flag & RC_LOCKED)) 533 panic("nfsrvd_sentcache not locked"); 534 if (!err) { 535 if ((so->so_proto->pr_domain->dom_family != AF_INET && 536 so->so_proto->pr_domain->dom_family != AF_INET6) || 537 so->so_proto->pr_protocol != IPPROTO_TCP) 538 panic("nfs sent cache"); 539 if (nfsrv_getsockseqnum(so, &tmp_seq)) { 540 NFSLOCKCACHE(); 541 rp->rc_tcpseq = tmp_seq; 542 rp->rc_flag |= RC_TCPSEQ; 543 NFSUNLOCKCACHE(); 544 } 545 } 546 nfsrc_unlock(rp); 547 } 548 549 /* 550 * Get a cache entry for TCP 551 * - key on <xid, nfs version> 552 * (allow multiple entries for a given key) 553 */ 554 static int 555 nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 556 { 557 struct nfsrvcache *rp, *nextrp; 558 int i; 559 struct nfsrvcache *hitrp; 560 struct nfsrvhashhead *hp, nfsrc_templist; 561 int hit, ret = 0; 562 563 hp = NFSRCHASH(newrp->rc_xid); 564 newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); 565 tryagain: 566 NFSLOCKCACHE(); 567 hit = 1; 568 LIST_INIT(&nfsrc_templist); 569 /* 570 * Get all the matches and put them on the temp list. 571 */ 572 rp = LIST_FIRST(hp); 573 while (rp != LIST_END(hp)) { 574 nextrp = LIST_NEXT(rp, rc_hash); 575 if (newrp->rc_xid == rp->rc_xid && 576 (!(rp->rc_flag & RC_INPROG) || 577 ((newrp->rc_flag & RC_SAMETCPCONN) && 578 newrp->rc_sockref == rp->rc_sockref)) && 579 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 580 newrp->rc_proc == rp->rc_proc && 581 ((newrp->rc_flag & RC_NFSV4) && 582 newrp->rc_sockref != rp->rc_sockref && 583 newrp->rc_cachetime >= rp->rc_cachetime) 584 && newrp->rc_reqlen == rp->rc_reqlen && 585 newrp->rc_cksum == rp->rc_cksum) { 586 LIST_REMOVE(rp, rc_hash); 587 LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash); 588 } 589 rp = nextrp; 590 } 591 592 /* 593 * Now, use nfsrc_templist to decide if there is a match. 594 */ 595 i = 0; 596 LIST_FOREACH(rp, &nfsrc_templist, rc_hash) { 597 i++; 598 if (rp->rc_refcnt > 0) { 599 hit = 0; 600 break; 601 } 602 } 603 /* 604 * Can be a hit only if one entry left. 605 * Note possible hit entry and put nfsrc_templist back on hash 606 * list. 607 */ 608 if (i != 1) 609 hit = 0; 610 hitrp = rp = LIST_FIRST(&nfsrc_templist); 611 while (rp != LIST_END(&nfsrc_templist)) { 612 nextrp = LIST_NEXT(rp, rc_hash); 613 LIST_REMOVE(rp, rc_hash); 614 LIST_INSERT_HEAD(hp, rp, rc_hash); 615 rp = nextrp; 616 } 617 if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist)) 618 panic("nfs gettcp cache templist"); 619 620 if (hit) { 621 rp = hitrp; 622 if ((rp->rc_flag & RC_LOCKED) != 0) { 623 rp->rc_flag |= RC_WANTED; 624 (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, 625 (PZERO - 1) | PDROP, "nfsrc", 10 * hz); 626 goto tryagain; 627 } 628 if (rp->rc_flag == 0) 629 panic("nfs tcp cache0"); 630 rp->rc_flag |= RC_LOCKED; 631 if (rp->rc_flag & RC_INPROG) { 632 newnfsstats.srvcache_inproghits++; 633 NFSUNLOCKCACHE(); 634 if (newrp->rc_sockref == rp->rc_sockref) 635 nfsrc_marksametcpconn(rp->rc_sockref); 636 ret = RC_DROPIT; 637 } else if (rp->rc_flag & RC_REPSTATUS) { 638 /* 639 * V2 only. 640 */ 641 newnfsstats.srvcache_nonidemdonehits++; 642 NFSUNLOCKCACHE(); 643 if (newrp->rc_sockref == rp->rc_sockref) 644 nfsrc_marksametcpconn(rp->rc_sockref); 645 ret = RC_REPLY; 646 nfsrvd_rephead(nd); 647 *(nd->nd_errp) = rp->rc_status; 648 rp->rc_timestamp = NFSD_MONOSEC + 649 NFSRVCACHE_TCPTIMEOUT; 650 } else if (rp->rc_flag & RC_REPMBUF) { 651 newnfsstats.srvcache_nonidemdonehits++; 652 NFSUNLOCKCACHE(); 653 if (newrp->rc_sockref == rp->rc_sockref) 654 nfsrc_marksametcpconn(rp->rc_sockref); 655 ret = RC_REPLY; 656 nd->nd_mreq = m_copym(rp->rc_reply, 0, 657 M_COPYALL, M_WAITOK); 658 rp->rc_timestamp = NFSD_MONOSEC + 659 NFSRVCACHE_TCPTIMEOUT; 660 } else { 661 panic("nfs tcp cache1"); 662 } 663 nfsrc_unlock(rp); 664 free((caddr_t)newrp, M_NFSRVCACHE); 665 goto out; 666 } 667 newnfsstats.srvcache_misses++; 668 newnfsstats.srvcache_size++; 669 670 /* 671 * For TCP, multiple entries for a key are allowed, so don't 672 * chain it into the hash table until done. 673 */ 674 newrp->rc_cachetime = NFSD_MONOSEC; 675 newrp->rc_flag |= RC_INPROG; 676 LIST_INSERT_HEAD(hp, newrp, rc_hash); 677 NFSUNLOCKCACHE(); 678 nd->nd_rp = newrp; 679 ret = RC_DOIT; 680 681 out: 682 NFSEXITCODE2(0, nd); 683 return (ret); 684 } 685 686 /* 687 * Lock a cache entry. 688 * Also puts a mutex lock on the cache list. 689 */ 690 static void 691 nfsrc_lock(struct nfsrvcache *rp) 692 { 693 NFSCACHELOCKREQUIRED(); 694 while ((rp->rc_flag & RC_LOCKED) != 0) { 695 rp->rc_flag |= RC_WANTED; 696 (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1, 697 "nfsrc", 0); 698 } 699 rp->rc_flag |= RC_LOCKED; 700 } 701 702 /* 703 * Unlock a cache entry. 704 */ 705 static void 706 nfsrc_unlock(struct nfsrvcache *rp) 707 { 708 709 NFSLOCKCACHE(); 710 rp->rc_flag &= ~RC_LOCKED; 711 nfsrc_wanted(rp); 712 NFSUNLOCKCACHE(); 713 } 714 715 /* 716 * Wakeup anyone wanting entry. 717 */ 718 static void 719 nfsrc_wanted(struct nfsrvcache *rp) 720 { 721 if (rp->rc_flag & RC_WANTED) { 722 rp->rc_flag &= ~RC_WANTED; 723 wakeup((caddr_t)rp); 724 } 725 } 726 727 /* 728 * Free up the entry. 729 * Must not sleep. 730 */ 731 static void 732 nfsrc_freecache(struct nfsrvcache *rp) 733 { 734 735 NFSCACHELOCKREQUIRED(); 736 LIST_REMOVE(rp, rc_hash); 737 if (rp->rc_flag & RC_UDP) { 738 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 739 nfsrc_udpcachesize--; 740 } 741 nfsrc_wanted(rp); 742 if (rp->rc_flag & RC_REPMBUF) { 743 mbuf_freem(rp->rc_reply); 744 if (!(rp->rc_flag & RC_UDP)) 745 nfsrc_tcpsavedreplies--; 746 } 747 FREE((caddr_t)rp, M_NFSRVCACHE); 748 newnfsstats.srvcache_size--; 749 } 750 751 /* 752 * Clean out the cache. Called when nfsserver module is unloaded. 753 */ 754 APPLESTATIC void 755 nfsrvd_cleancache(void) 756 { 757 struct nfsrvcache *rp, *nextrp; 758 int i; 759 760 NFSLOCKCACHE(); 761 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 762 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { 763 nfsrc_freecache(rp); 764 } 765 } 766 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 767 LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { 768 nfsrc_freecache(rp); 769 } 770 } 771 newnfsstats.srvcache_size = 0; 772 nfsrc_tcpsavedreplies = 0; 773 NFSUNLOCKCACHE(); 774 } 775 776 /* 777 * The basic rule is to get rid of entries that are expired. 778 */ 779 static void 780 nfsrc_trimcache(u_int64_t sockref, struct socket *so) 781 { 782 struct nfsrvcache *rp, *nextrp; 783 int i; 784 785 NFSLOCKCACHE(); 786 TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { 787 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) 788 && rp->rc_refcnt == 0 789 && ((rp->rc_flag & RC_REFCNT) || 790 NFSD_MONOSEC > rp->rc_timestamp || 791 nfsrc_udpcachesize > nfsrc_udphighwater)) 792 nfsrc_freecache(rp); 793 } 794 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 795 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { 796 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) 797 && rp->rc_refcnt == 0 798 && ((rp->rc_flag & RC_REFCNT) || 799 NFSD_MONOSEC > rp->rc_timestamp || 800 nfsrc_activesocket(rp, sockref, so))) 801 nfsrc_freecache(rp); 802 } 803 } 804 NFSUNLOCKCACHE(); 805 } 806 807 /* 808 * Add a seqid# reference to the cache entry. 809 */ 810 APPLESTATIC void 811 nfsrvd_refcache(struct nfsrvcache *rp) 812 { 813 814 NFSLOCKCACHE(); 815 if (rp->rc_refcnt < 0) 816 panic("nfs cache refcnt"); 817 rp->rc_refcnt++; 818 NFSUNLOCKCACHE(); 819 } 820 821 /* 822 * Dereference a seqid# cache entry. 823 */ 824 APPLESTATIC void 825 nfsrvd_derefcache(struct nfsrvcache *rp) 826 { 827 828 NFSLOCKCACHE(); 829 if (rp->rc_refcnt <= 0) 830 panic("nfs cache derefcnt"); 831 rp->rc_refcnt--; 832 if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) 833 nfsrc_freecache(rp); 834 NFSUNLOCKCACHE(); 835 } 836 837 /* 838 * Check to see if the socket is active. 839 * Return 1 if the reply has been received/acknowledged by the client, 840 * 0 otherwise. 841 * XXX - Uses tcp internals. 842 */ 843 static int 844 nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref, 845 struct socket *cur_so) 846 { 847 int ret = 0; 848 849 if (!(rp->rc_flag & RC_TCPSEQ)) 850 return (ret); 851 /* 852 * If the sockref is the same, it is the same TCP connection. 853 */ 854 if (cur_sockref == rp->rc_sockref) 855 ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq); 856 return (ret); 857 } 858 859 /* 860 * Calculate the length of the mbuf list and a checksum on the first up to 861 * NFSRVCACHE_CHECKLEN bytes. 862 */ 863 static int 864 nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum) 865 { 866 int len = 0, cklen; 867 mbuf_t m; 868 869 m = m1; 870 while (m) { 871 len += mbuf_len(m); 872 m = mbuf_next(m); 873 } 874 cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len; 875 *cksum = in_cksum(m1, cklen); 876 return (len); 877 } 878 879 /* 880 * Mark a TCP connection that is seeing retries. Should never happen for 881 * NFSv4. 882 */ 883 static void 884 nfsrc_marksametcpconn(u_int64_t sockref) 885 { 886 } 887 888