1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 29 * All Rights Reserved 30 */ 31 32 /* 33 * Portions of this source code were derived from Berkeley 4.3 BSD 34 * under license from the Regents of the University of California. 35 */ 36 37 #pragma ident "%Z%%M% %I% %E% SMI" 38 39 /* 40 * Implements a kernel based, client side RPC. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/systm.h> 46 #include <sys/sysmacros.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/ddi.h> 50 #include <sys/tiuser.h> 51 #include <sys/tihdr.h> 52 #include <sys/t_kuser.h> 53 #include <sys/errno.h> 54 #include <sys/kmem.h> 55 #include <sys/debug.h> 56 #include <sys/kstat.h> 57 #include <sys/t_lock.h> 58 #include <sys/cmn_err.h> 59 #include <sys/conf.h> 60 #include <sys/disp.h> 61 #include <sys/taskq.h> 62 #include <sys/list.h> 63 #include <sys/atomic.h> 64 #include <sys/zone.h> 65 #include <netinet/in.h> 66 #include <rpc/types.h> 67 #include <rpc/xdr.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/rpc_msg.h> 71 72 static enum clnt_stat clnt_clts_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 73 caddr_t, xdrproc_t, caddr_t, struct timeval); 74 static void clnt_clts_kabort(CLIENT *); 75 static void clnt_clts_kerror(CLIENT *, struct rpc_err *); 76 static bool_t clnt_clts_kfreeres(CLIENT *, xdrproc_t, caddr_t); 77 static bool_t clnt_clts_kcontrol(CLIENT *, int, char *); 78 static void clnt_clts_kdestroy(CLIENT *); 79 static int clnt_clts_ksettimers(CLIENT *, struct rpc_timers *, 80 struct rpc_timers *, int, void (*)(), caddr_t, uint32_t); 81 82 /* 83 * Operations vector for CLTS based RPC 84 */ 85 static struct clnt_ops clts_ops = { 86 clnt_clts_kcallit, /* do rpc call */ 87 clnt_clts_kabort, /* abort call */ 88 clnt_clts_kerror, /* return error status */ 89 clnt_clts_kfreeres, /* free results */ 90 clnt_clts_kdestroy, /* destroy rpc handle */ 91 clnt_clts_kcontrol, /* the ioctl() of rpc */ 92 clnt_clts_ksettimers /* set retry timers */ 93 }; 94 95 /* 96 * Endpoint for CLTS (INET, INET6, loopback, etc.) 97 */ 98 typedef struct endpnt_type { 99 struct endpnt_type *e_next; /* pointer to next endpoint type */ 100 list_t e_pool; /* list of available endpoints */ 101 list_t e_ilist; /* list of idle endpints */ 102 struct endpnt *e_pcurr; /* pointer to current endpoint */ 103 char e_protofmly[KNC_STRSIZE]; /* protocol family */ 104 dev_t e_rdev; /* device */ 105 kmutex_t e_plock; /* pool lock */ 106 kmutex_t e_ilock; /* idle list lock */ 107 timeout_id_t e_itimer; /* timer to dispatch the taskq */ 108 uint_t e_cnt; /* number of endpoints in the pool */ 109 zoneid_t e_zoneid; /* zoneid of endpoint type */ 110 kcondvar_t e_async_cv; /* cv for asynchronous reap threads */ 111 uint_t e_async_count; /* count of asynchronous reap threads */ 112 } endpnt_type_t; 113 114 typedef struct endpnt { 115 list_node_t e_node; /* link to the pool */ 116 list_node_t e_idle; /* link to the idle list */ 117 endpnt_type_t *e_type; /* back pointer to endpoint type */ 118 TIUSER *e_tiptr; /* pointer to transport endpoint */ 119 queue_t *e_wq; /* write queue */ 120 uint_t e_flags; /* endpoint flags */ 121 uint_t e_ref; /* ref count on endpoint */ 122 kcondvar_t e_cv; /* condition variable */ 123 kmutex_t e_lock; /* protects cv and flags */ 124 time_t e_itime; /* time when rele'd */ 125 } endpnt_t; 126 127 #define ENDPNT_ESTABLISHED 0x1 /* endpoint is established */ 128 #define ENDPNT_WAITING 0x2 /* thread waiting for endpoint */ 129 #define ENDPNT_BOUND 0x4 /* endpoint is bound */ 130 #define ENDPNT_STALE 0x8 /* endpoint is dead */ 131 #define ENDPNT_ONIDLE 0x10 /* endpoint is on the idle list */ 132 133 static krwlock_t endpnt_type_lock; /* protects endpnt_type_list */ 134 static endpnt_type_t *endpnt_type_list = NULL; /* list of CLTS endpoints */ 135 static struct kmem_cache *endpnt_cache; /* cache of endpnt_t's */ 136 static taskq_t *endpnt_taskq; /* endpnt_t reaper thread */ 137 static bool_t taskq_created; /* flag for endpnt_taskq */ 138 static kmutex_t endpnt_taskq_lock; /* taskq lock */ 139 static zone_key_t endpnt_destructor_key; 140 141 #define DEFAULT_ENDPOINT_REAP_INTERVAL 60 /* 1 minute */ 142 #define DEFAULT_INTERVAL_SHIFT 30 /* 30 seconds */ 143 144 /* 145 * Endpoint tunables 146 */ 147 static int clnt_clts_max_endpoints = -1; 148 static int clnt_clts_hash_size = DEFAULT_HASH_SIZE; 149 static time_t clnt_clts_endpoint_reap_interval = -1; 150 static clock_t clnt_clts_taskq_dispatch_interval; 151 152 /* 153 * Response completion hash queue 154 */ 155 static call_table_t *clts_call_ht; 156 157 /* 158 * Routines for the endpoint manager 159 */ 160 static struct endpnt_type *endpnt_type_create(struct knetconfig *); 161 static void endpnt_type_free(struct endpnt_type *); 162 static int check_endpnt(struct endpnt *, struct endpnt **); 163 static struct endpnt *endpnt_get(struct knetconfig *, int); 164 static void endpnt_rele(struct endpnt *); 165 static void endpnt_reap_settimer(endpnt_type_t *); 166 static void endpnt_reap(endpnt_type_t *); 167 static void endpnt_reap_dispatch(void *); 168 static void endpnt_reclaim(zoneid_t); 169 170 171 /* 172 * Request dipatching function. 173 */ 174 static int clnt_clts_dispatch_send(queue_t *q, mblk_t *, struct netbuf *addr, 175 calllist_t *, uint_t); 176 177 /* 178 * The size of the preserialized RPC header information. 179 */ 180 #define CKU_HDRSIZE 20 181 /* 182 * The initial allocation size. It is small to reduce space requirements. 183 */ 184 #define CKU_INITSIZE 2048 185 /* 186 * The size of additional allocations, if required. It is larger to 187 * reduce the number of actual allocations. 188 */ 189 #define CKU_ALLOCSIZE 8192 190 191 /* 192 * Private data per rpc handle. This structure is allocated by 193 * clnt_clts_kcreate, and freed by clnt_clts_kdestroy. 194 */ 195 struct cku_private { 196 CLIENT cku_client; /* client handle */ 197 int cku_retrys; /* request retrys */ 198 calllist_t cku_call; 199 struct endpnt *cku_endpnt; /* open end point */ 200 struct knetconfig cku_config; 201 struct netbuf cku_addr; /* remote address */ 202 struct rpc_err cku_err; /* error status */ 203 XDR cku_outxdr; /* xdr stream for output */ 204 XDR cku_inxdr; /* xdr stream for input */ 205 char cku_rpchdr[CKU_HDRSIZE + 4]; /* rpc header */ 206 struct cred *cku_cred; /* credentials */ 207 struct rpc_timers *cku_timers; /* for estimating RTT */ 208 struct rpc_timers *cku_timeall; /* for estimating RTT */ 209 void (*cku_feedback)(int, int, caddr_t); 210 /* ptr to feedback rtn */ 211 caddr_t cku_feedarg; /* argument for feedback func */ 212 uint32_t cku_xid; /* current XID */ 213 bool_t cku_bcast; /* RPC broadcast hint */ 214 int cku_useresvport; /* Use reserved port */ 215 struct rpc_clts_client *cku_stats; /* counters for the zone */ 216 }; 217 218 static const struct rpc_clts_client { 219 kstat_named_t rccalls; 220 kstat_named_t rcbadcalls; 221 kstat_named_t rcretrans; 222 kstat_named_t rcbadxids; 223 kstat_named_t rctimeouts; 224 kstat_named_t rcnewcreds; 225 kstat_named_t rcbadverfs; 226 kstat_named_t rctimers; 227 kstat_named_t rcnomem; 228 kstat_named_t rccantsend; 229 } clts_rcstat_tmpl = { 230 { "calls", KSTAT_DATA_UINT64 }, 231 { "badcalls", KSTAT_DATA_UINT64 }, 232 { "retrans", KSTAT_DATA_UINT64 }, 233 { "badxids", KSTAT_DATA_UINT64 }, 234 { "timeouts", KSTAT_DATA_UINT64 }, 235 { "newcreds", KSTAT_DATA_UINT64 }, 236 { "badverfs", KSTAT_DATA_UINT64 }, 237 { "timers", KSTAT_DATA_UINT64 }, 238 { "nomem", KSTAT_DATA_UINT64 }, 239 { "cantsend", KSTAT_DATA_UINT64 }, 240 }; 241 242 static uint_t clts_rcstat_ndata = 243 sizeof (clts_rcstat_tmpl) / sizeof (kstat_named_t); 244 245 #define RCSTAT_INCR(s, x) \ 246 atomic_add_64(&(s)->x.value.ui64, 1) 247 248 #define ptoh(p) (&((p)->cku_client)) 249 #define htop(h) ((struct cku_private *)((h)->cl_private)) 250 251 /* 252 * Times to retry 253 */ 254 #define SNDTRIES 4 255 #define REFRESHES 2 /* authentication refreshes */ 256 257 static int clnt_clts_do_bindresvport = 0; /* bind to a non-reserved port */ 258 #define BINDRESVPORT_RETRIES 5 259 260 void 261 clnt_clts_stats_init(zoneid_t zoneid, struct rpc_clts_client **statsp) 262 { 263 kstat_t *ksp; 264 kstat_named_t *knp; 265 266 knp = rpcstat_zone_init_common(zoneid, "unix", "rpc_clts_client", 267 (const kstat_named_t *)&clts_rcstat_tmpl, 268 sizeof (clts_rcstat_tmpl)); 269 /* 270 * Backwards compatibility for old kstat clients 271 */ 272 ksp = kstat_create_zone("unix", 0, "rpc_client", "rpc", 273 KSTAT_TYPE_NAMED, clts_rcstat_ndata, 274 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid); 275 if (ksp) { 276 ksp->ks_data = knp; 277 kstat_install(ksp); 278 } 279 *statsp = (struct rpc_clts_client *)knp; 280 } 281 282 void 283 clnt_clts_stats_fini(zoneid_t zoneid, struct rpc_clts_client **statsp) 284 { 285 rpcstat_zone_fini_common(zoneid, "unix", "rpc_clts_client"); 286 kstat_delete_byname_zone("unix", 0, "rpc_client", zoneid); 287 kmem_free(*statsp, sizeof (clts_rcstat_tmpl)); 288 } 289 290 /* 291 * Create an rpc handle for a clts rpc connection. 292 * Allocates space for the handle structure and the private data. 293 */ 294 /* ARGSUSED */ 295 int 296 clnt_clts_kcreate(struct knetconfig *config, struct netbuf *addr, 297 rpcprog_t pgm, rpcvers_t vers, int retrys, struct cred *cred, 298 CLIENT **cl) 299 { 300 CLIENT *h; 301 struct cku_private *p; 302 struct rpc_msg call_msg; 303 int error; 304 int plen; 305 306 if (cl == NULL) 307 return (EINVAL); 308 309 *cl = NULL; 310 error = 0; 311 312 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 313 314 h = ptoh(p); 315 316 /* handle */ 317 h->cl_ops = &clts_ops; 318 h->cl_private = (caddr_t)p; 319 h->cl_auth = authkern_create(); 320 321 /* call message, just used to pre-serialize below */ 322 call_msg.rm_xid = 0; 323 call_msg.rm_direction = CALL; 324 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 325 call_msg.rm_call.cb_prog = pgm; 326 call_msg.rm_call.cb_vers = vers; 327 328 /* private */ 329 clnt_clts_kinit(h, addr, retrys, cred); 330 331 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE); 332 333 /* pre-serialize call message header */ 334 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 335 error = EINVAL; /* XXX */ 336 goto bad; 337 } 338 339 p->cku_config.knc_rdev = config->knc_rdev; 340 p->cku_config.knc_semantics = config->knc_semantics; 341 plen = strlen(config->knc_protofmly) + 1; 342 p->cku_config.knc_protofmly = kmem_alloc(plen, KM_SLEEP); 343 bcopy(config->knc_protofmly, p->cku_config.knc_protofmly, plen); 344 p->cku_useresvport = -1; /* value is has not been set */ 345 346 cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL); 347 mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL); 348 349 *cl = h; 350 return (0); 351 352 bad: 353 auth_destroy(h->cl_auth); 354 kmem_free(p->cku_addr.buf, addr->maxlen); 355 kmem_free(p, sizeof (struct cku_private)); 356 357 return (error); 358 } 359 360 void 361 clnt_clts_kinit(CLIENT *h, struct netbuf *addr, int retrys, cred_t *cred) 362 { 363 /* LINTED pointer alignment */ 364 struct cku_private *p = htop(h); 365 struct rpcstat *rsp; 366 367 rsp = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 368 ASSERT(rsp != NULL); 369 370 p->cku_retrys = retrys; 371 372 if (p->cku_addr.maxlen < addr->len) { 373 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 374 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 375 376 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 377 p->cku_addr.maxlen = addr->maxlen; 378 } 379 380 p->cku_addr.len = addr->len; 381 bcopy(addr->buf, p->cku_addr.buf, addr->len); 382 383 p->cku_cred = cred; 384 p->cku_xid = 0; 385 p->cku_timers = NULL; 386 p->cku_timeall = NULL; 387 p->cku_feedback = NULL; 388 p->cku_bcast = FALSE; 389 p->cku_call.call_xid = 0; 390 p->cku_call.call_hash = 0; 391 p->cku_call.call_notified = FALSE; 392 p->cku_call.call_next = NULL; 393 p->cku_call.call_prev = NULL; 394 p->cku_call.call_reply = NULL; 395 p->cku_call.call_wq = NULL; 396 p->cku_stats = rsp->rpc_clts_client; 397 } 398 399 /* 400 * set the timers. Return current retransmission timeout. 401 */ 402 static int 403 clnt_clts_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 404 int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg, 405 uint32_t xid) 406 { 407 /* LINTED pointer alignment */ 408 struct cku_private *p = htop(h); 409 int value; 410 411 p->cku_feedback = feedback; 412 p->cku_feedarg = arg; 413 p->cku_timers = t; 414 p->cku_timeall = all; 415 if (xid) 416 p->cku_xid = xid; 417 value = all->rt_rtxcur; 418 value += t->rt_rtxcur; 419 if (value < minimum) 420 return (minimum); 421 RCSTAT_INCR(p->cku_stats, rctimers); 422 return (value); 423 } 424 425 /* 426 * Time out back off function. tim is in HZ 427 */ 428 #define MAXTIMO (20 * hz) 429 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 430 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 431 432 #define RETRY_POLL_TIMO 30 433 434 /* 435 * Call remote procedure. 436 * Most of the work of rpc is done here. We serialize what is left 437 * of the header (some was pre-serialized in the handle), serialize 438 * the arguments, and send it off. We wait for a reply or a time out. 439 * Timeout causes an immediate return, other packet problems may cause 440 * a retry on the receive. When a good packet is received we deserialize 441 * it, and check verification. A bad reply code will cause one retry 442 * with full (longhand) credentials. 443 */ 444 enum clnt_stat 445 clnt_clts_kcallit_addr(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 446 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, 447 struct timeval wait, struct netbuf *sin) 448 { 449 /* LINTED pointer alignment */ 450 struct cku_private *p = htop(h); 451 XDR *xdrs; 452 int stries = p->cku_retrys; 453 int refreshes = REFRESHES; /* number of times to refresh cred */ 454 int round_trip; /* time the RPC */ 455 int error; 456 int hdrsz; 457 mblk_t *mp; 458 mblk_t *mpdup; 459 mblk_t *resp = NULL; 460 mblk_t *tmp; 461 calllist_t *call = &p->cku_call; 462 clock_t timout = 0; 463 bool_t interrupted; 464 enum clnt_stat status; 465 struct rpc_msg reply_msg; 466 enum clnt_stat re_status; 467 endpnt_t *endpt; 468 469 RCSTAT_INCR(p->cku_stats, rccalls); 470 471 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_sec: %ld\n", wait.tv_sec); 472 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_usec: %ld\n", wait.tv_usec); 473 474 timout = TIMEVAL_TO_TICK(&wait); 475 476 if (p->cku_xid == 0) { 477 p->cku_xid = alloc_xid(); 478 if (p->cku_endpnt != NULL) 479 endpnt_rele(p->cku_endpnt); 480 p->cku_endpnt = NULL; 481 } 482 483 mpdup = NULL; 484 call_again: 485 486 if (mpdup == NULL) { 487 488 while ((mp = allocb(CKU_INITSIZE, BPRI_LO)) == NULL) { 489 if (strwaitbuf(CKU_INITSIZE, BPRI_LO)) { 490 p->cku_err.re_status = RPC_SYSTEMERROR; 491 p->cku_err.re_errno = ENOSR; 492 goto done; 493 } 494 } 495 496 xdrs = &p->cku_outxdr; 497 xdrmblk_init(xdrs, mp, XDR_ENCODE, CKU_ALLOCSIZE); 498 499 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 500 /* 501 * Copy in the preserialized RPC header 502 * information. 503 */ 504 bcopy(p->cku_rpchdr, mp->b_rptr, CKU_HDRSIZE); 505 506 /* 507 * transaction id is the 1st thing in the output 508 * buffer. 509 */ 510 /* LINTED pointer alignment */ 511 (*(uint32_t *)(mp->b_rptr)) = p->cku_xid; 512 513 /* Skip the preserialized stuff. */ 514 XDR_SETPOS(xdrs, CKU_HDRSIZE); 515 516 /* Serialize dynamic stuff into the output buffer. */ 517 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 518 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 519 (!(*xdr_args)(xdrs, argsp))) { 520 freemsg(mp); 521 p->cku_err.re_status = RPC_CANTENCODEARGS; 522 p->cku_err.re_errno = EIO; 523 goto done; 524 } 525 } else { 526 uint32_t *uproc = (uint32_t *) 527 &p->cku_rpchdr[CKU_HDRSIZE]; 528 IXDR_PUT_U_INT32(uproc, procnum); 529 530 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 531 XDR_SETPOS(xdrs, 0); 532 533 /* Serialize the procedure number and the arguments. */ 534 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 535 CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) { 536 freemsg(mp); 537 p->cku_err.re_status = RPC_CANTENCODEARGS; 538 p->cku_err.re_errno = EIO; 539 goto done; 540 } 541 } 542 } else 543 mp = mpdup; 544 545 mpdup = dupmsg(mp); 546 if (mpdup == NULL) { 547 freemsg(mp); 548 p->cku_err.re_status = RPC_SYSTEMERROR; 549 p->cku_err.re_errno = ENOSR; 550 goto done; 551 } 552 553 /* 554 * Grab an endpnt only if the endpoint is NULL. We could be retrying 555 * the request and in this case we want to go through the same 556 * source port, so that the duplicate request cache may detect a 557 * retry. 558 */ 559 560 if (p->cku_endpnt == NULL) 561 p->cku_endpnt = endpnt_get(&p->cku_config, p->cku_useresvport); 562 563 if (p->cku_endpnt == NULL) { 564 freemsg(mp); 565 p->cku_err.re_status = RPC_SYSTEMERROR; 566 p->cku_err.re_errno = ENOSR; 567 goto done; 568 } 569 570 round_trip = lbolt; 571 572 error = clnt_clts_dispatch_send(p->cku_endpnt->e_wq, mp, 573 &p->cku_addr, call, p->cku_xid); 574 575 if (error != 0) { 576 freemsg(mp); 577 p->cku_err.re_status = RPC_CANTSEND; 578 p->cku_err.re_errno = error; 579 RCSTAT_INCR(p->cku_stats, rccantsend); 580 goto done1; 581 } 582 583 RPCLOG(64, "clnt_clts_kcallit_addr: sent call for xid 0x%x\n", 584 p->cku_xid); 585 586 /* 587 * There are two reasons for which we go back to to tryread. 588 * 589 * a) In case the status is RPC_PROCUNAVAIL and we sent out a 590 * broadcast we should not get any invalid messages with the 591 * RPC_PROCUNAVAIL error back. Some broken RPC implementations 592 * send them and for this we have to ignore them ( as we would 593 * have never received them ) and look for another message 594 * which might contain the valid response because we don't know 595 * how many broken implementations are in the network. So we are 596 * going to loop until 597 * - we received a valid response 598 * - we have processed all invalid responses and 599 * got a time out when we try to receive again a 600 * message. 601 * 602 * b) We will jump back to tryread also in case we failed 603 * within the AUTH_VALIDATE. In this case we should move 604 * on and loop until we received a valid response or we 605 * have processed all responses with broken authentication 606 * and we got a time out when we try to receive a message. 607 */ 608 tryread: 609 mutex_enter(&call->call_lock); 610 interrupted = FALSE; 611 if (call->call_notified == FALSE) { 612 klwp_t *lwp = ttolwp(curthread); 613 clock_t cv_wait_ret = 1; /* init to > 0 */ 614 clock_t cv_timout = timout; 615 616 if (lwp != NULL) 617 lwp->lwp_nostop++; 618 619 cv_timout += lbolt; 620 621 if (h->cl_nosignal) 622 while ((cv_wait_ret = 623 cv_timedwait(&call->call_cv, 624 &call->call_lock, cv_timout)) > 0 && 625 call->call_notified == FALSE); 626 else 627 while ((cv_wait_ret = 628 cv_timedwait_sig(&call->call_cv, 629 &call->call_lock, cv_timout)) > 0 && 630 call->call_notified == FALSE); 631 632 if (cv_wait_ret == 0) 633 interrupted = TRUE; 634 635 if (lwp != NULL) 636 lwp->lwp_nostop--; 637 } 638 resp = call->call_reply; 639 call->call_reply = NULL; 640 status = call->call_status; 641 /* 642 * We have to reset the call_notified here. In case we have 643 * to do a retry ( e.g. in case we got a RPC_PROCUNAVAIL 644 * error ) we need to set this to false to ensure that 645 * we will wait for the next message. When the next message 646 * is going to arrive the function clnt_clts_dispatch_notify 647 * will set this to true again. 648 */ 649 call->call_notified = FALSE; 650 mutex_exit(&call->call_lock); 651 652 if (status == RPC_TIMEDOUT) { 653 if (interrupted) { 654 /* 655 * We got interrupted, bail out 656 */ 657 p->cku_err.re_status = RPC_INTR; 658 p->cku_err.re_errno = EINTR; 659 goto done1; 660 } else { 661 /* 662 * It's possible that our response arrived 663 * right after we timed out. Check to see 664 * if it has arrived before we remove the 665 * calllist from the dispatch queue. 666 */ 667 mutex_enter(&call->call_lock); 668 if (call->call_notified == TRUE) { 669 resp = call->call_reply; 670 call->call_reply = NULL; 671 mutex_exit(&call->call_lock); 672 RPCLOG(8, "clnt_clts_kcallit_addr: " 673 "response received for request " 674 "w/xid 0x%x after timeout\n", 675 p->cku_xid); 676 goto getresponse; 677 } 678 mutex_exit(&call->call_lock); 679 680 RPCLOG(8, "clnt_clts_kcallit_addr: " 681 "request w/xid 0x%x timedout " 682 "waiting for reply\n", p->cku_xid); 683 #if 0 /* XXX not yet */ 684 /* 685 * Timeout may be due to a dead gateway. Send 686 * an ioctl downstream advising deletion of 687 * route when we reach the half-way point to 688 * timing out. 689 */ 690 if (stries == p->cku_retrys/2) { 691 t_kadvise(p->cku_endpnt->e_tiptr, 692 (uchar_t *)p->cku_addr.buf, 693 p->cku_addr.len); 694 } 695 #endif /* not yet */ 696 p->cku_err.re_status = RPC_TIMEDOUT; 697 p->cku_err.re_errno = ETIMEDOUT; 698 RCSTAT_INCR(p->cku_stats, rctimeouts); 699 goto done1; 700 } 701 } 702 703 getresponse: 704 /* 705 * Check to see if a response arrived. If it one is 706 * present then proceed to process the reponse. Otherwise 707 * fall through to retry or retransmit the request. This 708 * is probably not the optimal thing to do, but since we 709 * are most likely dealing with a unrealiable transport it 710 * is the safe thing to so. 711 */ 712 if (resp == NULL) { 713 p->cku_err.re_status = RPC_CANTRECV; 714 p->cku_err.re_errno = EIO; 715 goto done1; 716 } 717 718 /* 719 * Prepare the message for further processing. We need to remove 720 * the datagram header and copy the source address if necessary. No 721 * need to verify the header since rpcmod took care of that. 722 */ 723 /* 724 * Copy the source address if the caller has supplied a netbuf. 725 */ 726 if (sin != NULL) { 727 union T_primitives *pptr; 728 729 pptr = (union T_primitives *)resp->b_rptr; 730 bcopy(resp->b_rptr + pptr->unitdata_ind.SRC_offset, sin->buf, 731 pptr->unitdata_ind.SRC_length); 732 sin->len = pptr->unitdata_ind.SRC_length; 733 } 734 735 /* 736 * Pop off the datagram header. 737 */ 738 hdrsz = resp->b_wptr - resp->b_rptr; 739 if ((resp->b_wptr - (resp->b_rptr + hdrsz)) == 0) { 740 tmp = resp; 741 resp = resp->b_cont; 742 tmp->b_cont = NULL; 743 freeb(tmp); 744 } else { 745 unsigned char *ud_off = resp->b_rptr; 746 resp->b_rptr += hdrsz; 747 tmp = dupb(resp); 748 if (tmp == NULL) { 749 p->cku_err.re_status = RPC_SYSTEMERROR; 750 p->cku_err.re_errno = ENOSR; 751 freemsg(resp); 752 goto done1; 753 } 754 tmp->b_cont = resp->b_cont; 755 resp->b_rptr = ud_off; 756 freeb(resp); 757 resp = tmp; 758 } 759 760 round_trip = lbolt - round_trip; 761 /* 762 * Van Jacobson timer algorithm here, only if NOT a retransmission. 763 */ 764 if (p->cku_timers != NULL && stries == p->cku_retrys) { 765 int rt; 766 767 rt = round_trip; 768 rt -= (p->cku_timers->rt_srtt >> 3); 769 p->cku_timers->rt_srtt += rt; 770 if (rt < 0) 771 rt = - rt; 772 rt -= (p->cku_timers->rt_deviate >> 2); 773 p->cku_timers->rt_deviate += rt; 774 p->cku_timers->rt_rtxcur = 775 (clock_t)((p->cku_timers->rt_srtt >> 2) + 776 p->cku_timers->rt_deviate) >> 1; 777 778 rt = round_trip; 779 rt -= (p->cku_timeall->rt_srtt >> 3); 780 p->cku_timeall->rt_srtt += rt; 781 if (rt < 0) 782 rt = - rt; 783 rt -= (p->cku_timeall->rt_deviate >> 2); 784 p->cku_timeall->rt_deviate += rt; 785 p->cku_timeall->rt_rtxcur = 786 (clock_t)((p->cku_timeall->rt_srtt >> 2) + 787 p->cku_timeall->rt_deviate) >> 1; 788 if (p->cku_feedback != NULL) { 789 (*p->cku_feedback)(FEEDBACK_OK, procnum, 790 p->cku_feedarg); 791 } 792 } 793 794 /* 795 * Process reply 796 */ 797 xdrs = &(p->cku_inxdr); 798 xdrmblk_init(xdrs, resp, XDR_DECODE, 0); 799 800 reply_msg.rm_direction = REPLY; 801 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 802 reply_msg.acpted_rply.ar_stat = SUCCESS; 803 reply_msg.acpted_rply.ar_verf = _null_auth; 804 /* 805 * xdr_results will be done in AUTH_UNWRAP. 806 */ 807 reply_msg.acpted_rply.ar_results.where = NULL; 808 reply_msg.acpted_rply.ar_results.proc = xdr_void; 809 810 /* 811 * Decode and validate the response. 812 */ 813 if (!xdr_replymsg(xdrs, &reply_msg)) { 814 p->cku_err.re_status = RPC_CANTDECODERES; 815 p->cku_err.re_errno = EIO; 816 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 817 goto done1; 818 } 819 820 _seterr_reply(&reply_msg, &(p->cku_err)); 821 822 re_status = p->cku_err.re_status; 823 if (re_status == RPC_SUCCESS) { 824 /* 825 * Reply is good, check auth. 826 */ 827 if (!AUTH_VALIDATE(h->cl_auth, 828 &reply_msg.acpted_rply.ar_verf)) { 829 p->cku_err.re_status = RPC_AUTHERROR; 830 p->cku_err.re_why = AUTH_INVALIDRESP; 831 RCSTAT_INCR(p->cku_stats, rcbadverfs); 832 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 833 goto tryread; 834 } 835 if (!AUTH_UNWRAP(h->cl_auth, xdrs, xdr_results, resultsp)) { 836 p->cku_err.re_status = RPC_CANTDECODERES; 837 p->cku_err.re_errno = EIO; 838 } 839 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 840 goto done1; 841 } 842 /* set errno in case we can't recover */ 843 if (re_status != RPC_VERSMISMATCH && 844 re_status != RPC_AUTHERROR && 845 re_status != RPC_PROGVERSMISMATCH) 846 p->cku_err.re_errno = EIO; 847 /* 848 * Determine whether or not we're doing an RPC 849 * broadcast. Some server implementations don't 850 * follow RFC 1050, section 7.4.2 in that they 851 * don't remain silent when they see a proc 852 * they don't support. Therefore we keep trying 853 * to receive on RPC_PROCUNAVAIL, hoping to get 854 * a valid response from a compliant server. 855 */ 856 if (re_status == RPC_PROCUNAVAIL && p->cku_bcast) { 857 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 858 goto tryread; 859 } 860 if (re_status == RPC_AUTHERROR) { 861 /* 862 * Maybe our credential need to be refreshed 863 */ 864 if (refreshes > 0 && 865 AUTH_REFRESH(h->cl_auth, &reply_msg, p->cku_cred)) { 866 /* 867 * The credential is refreshed. Try the request again. 868 * Even if stries == 0, we still retry as long as 869 * refreshes > 0. This prevents a soft authentication 870 * error turning into a hard one at an upper level. 871 */ 872 refreshes--; 873 RCSTAT_INCR(p->cku_stats, rcbadcalls); 874 RCSTAT_INCR(p->cku_stats, rcnewcreds); 875 876 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 877 freemsg(mpdup); 878 call_table_remove(call); 879 mutex_enter(&call->call_lock); 880 if (call->call_reply != NULL) { 881 freemsg(call->call_reply); 882 call->call_reply = NULL; 883 } 884 mutex_exit(&call->call_lock); 885 886 freemsg(resp); 887 mpdup = NULL; 888 goto call_again; 889 } 890 /* 891 * We have used the client handle to do an AUTH_REFRESH 892 * and the RPC status may be set to RPC_SUCCESS; 893 * Let's make sure to set it to RPC_AUTHERROR. 894 */ 895 p->cku_err.re_status = RPC_CANTDECODERES; 896 897 /* 898 * Map recoverable and unrecoverable 899 * authentication errors to appropriate errno 900 */ 901 switch (p->cku_err.re_why) { 902 case AUTH_TOOWEAK: 903 /* 904 * Could be an nfsportmon failure, set 905 * useresvport and try again. 906 */ 907 if (p->cku_useresvport != 1) { 908 p->cku_useresvport = 1; 909 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 910 freemsg(mpdup); 911 912 call_table_remove(call); 913 mutex_enter(&call->call_lock); 914 if (call->call_reply != NULL) { 915 freemsg(call->call_reply); 916 call->call_reply = NULL; 917 } 918 mutex_exit(&call->call_lock); 919 920 freemsg(resp); 921 mpdup = NULL; 922 endpt = p->cku_endpnt; 923 if (endpt->e_tiptr != NULL) { 924 mutex_enter(&endpt->e_lock); 925 endpt->e_flags &= ~ENDPNT_BOUND; 926 (void) t_kclose(endpt->e_tiptr, 1); 927 endpt->e_tiptr = NULL; 928 mutex_exit(&endpt->e_lock); 929 930 } 931 932 p->cku_xid = alloc_xid(); 933 endpnt_rele(p->cku_endpnt); 934 p->cku_endpnt = NULL; 935 goto call_again; 936 } 937 /* FALLTHRU */ 938 case AUTH_BADCRED: 939 case AUTH_BADVERF: 940 case AUTH_INVALIDRESP: 941 case AUTH_FAILED: 942 case RPCSEC_GSS_NOCRED: 943 case RPCSEC_GSS_FAILED: 944 p->cku_err.re_errno = EACCES; 945 break; 946 case AUTH_REJECTEDCRED: 947 case AUTH_REJECTEDVERF: 948 default: 949 p->cku_err.re_errno = EIO; 950 break; 951 } 952 RPCLOG(1, "clnt_clts_kcallit : authentication failed " 953 "with RPC_AUTHERROR of type %d\n", 954 p->cku_err.re_why); 955 } 956 957 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 958 959 done1: 960 call_table_remove(call); 961 mutex_enter(&call->call_lock); 962 if (call->call_reply != NULL) { 963 freemsg(call->call_reply); 964 call->call_reply = NULL; 965 } 966 mutex_exit(&call->call_lock); 967 RPCLOG(64, "clnt_clts_kcallit_addr: xid 0x%x taken off dispatch list", 968 p->cku_xid); 969 970 done: 971 if (resp != NULL) { 972 freemsg(resp); 973 resp = NULL; 974 } 975 976 if ((p->cku_err.re_status != RPC_SUCCESS) && 977 (p->cku_err.re_status != RPC_INTR) && 978 (p->cku_err.re_status != RPC_UDERROR) && 979 !IS_UNRECOVERABLE_RPC(p->cku_err.re_status)) { 980 if (p->cku_feedback != NULL && stries == p->cku_retrys) { 981 (*p->cku_feedback)(FEEDBACK_REXMIT1, procnum, 982 p->cku_feedarg); 983 } 984 985 timout = backoff(timout); 986 if (p->cku_timeall != (struct rpc_timers *)0) 987 p->cku_timeall->rt_rtxcur = timout; 988 989 if (p->cku_err.re_status == RPC_SYSTEMERROR || 990 p->cku_err.re_status == RPC_CANTSEND) { 991 /* 992 * Errors due to lack of resources, wait a bit 993 * and try again. 994 */ 995 (void) delay(hz/10); 996 /* (void) sleep((caddr_t)&lbolt, PZERO-4); */ 997 } 998 if (stries-- > 0) { 999 RCSTAT_INCR(p->cku_stats, rcretrans); 1000 goto call_again; 1001 } 1002 } 1003 1004 if (mpdup != NULL) 1005 freemsg(mpdup); 1006 1007 if (p->cku_err.re_status != RPC_SUCCESS) { 1008 RCSTAT_INCR(p->cku_stats, rcbadcalls); 1009 } 1010 1011 /* 1012 * Allow the endpoint to be held by the client handle in case this 1013 * RPC was not successful. A retry may occur at a higher level and 1014 * in this case we may want to send the request over the same 1015 * source port. 1016 */ 1017 if (p->cku_err.re_status == RPC_SUCCESS && p->cku_endpnt != NULL) { 1018 endpnt_rele(p->cku_endpnt); 1019 p->cku_endpnt = NULL; 1020 } 1021 1022 return (p->cku_err.re_status); 1023 } 1024 1025 static enum clnt_stat 1026 clnt_clts_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 1027 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, 1028 struct timeval wait) 1029 { 1030 return (clnt_clts_kcallit_addr(h, procnum, xdr_args, argsp, 1031 xdr_results, resultsp, wait, NULL)); 1032 } 1033 1034 /* 1035 * Return error info on this handle. 1036 */ 1037 static void 1038 clnt_clts_kerror(CLIENT *h, struct rpc_err *err) 1039 { 1040 /* LINTED pointer alignment */ 1041 struct cku_private *p = htop(h); 1042 1043 *err = p->cku_err; 1044 } 1045 1046 static bool_t 1047 clnt_clts_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 1048 { 1049 /* LINTED pointer alignment */ 1050 struct cku_private *p = htop(h); 1051 XDR *xdrs; 1052 1053 xdrs = &(p->cku_outxdr); 1054 xdrs->x_op = XDR_FREE; 1055 return ((*xdr_res)(xdrs, res_ptr)); 1056 } 1057 1058 /*ARGSUSED*/ 1059 static void 1060 clnt_clts_kabort(CLIENT *h) 1061 { 1062 } 1063 1064 static bool_t 1065 clnt_clts_kcontrol(CLIENT *h, int cmd, char *arg) 1066 { 1067 /* LINTED pointer alignment */ 1068 struct cku_private *p = htop(h); 1069 1070 switch (cmd) { 1071 case CLSET_XID: 1072 p->cku_xid = *((uint32_t *)arg); 1073 return (TRUE); 1074 1075 case CLGET_XID: 1076 *((uint32_t *)arg) = p->cku_xid; 1077 return (TRUE); 1078 1079 case CLSET_BCAST: 1080 p->cku_bcast = *((uint32_t *)arg); 1081 return (TRUE); 1082 1083 case CLGET_BCAST: 1084 *((uint32_t *)arg) = p->cku_bcast; 1085 return (TRUE); 1086 case CLSET_BINDRESVPORT: 1087 if (arg == NULL) 1088 return (FALSE); 1089 1090 if (*(int *)arg != 1 && *(int *)arg != 0) 1091 return (FALSE); 1092 1093 p->cku_useresvport = *(int *)arg; 1094 1095 return (TRUE); 1096 1097 case CLGET_BINDRESVPORT: 1098 if (arg == NULL) 1099 return (FALSE); 1100 1101 *(int *)arg = p->cku_useresvport; 1102 1103 return (TRUE); 1104 1105 default: 1106 return (FALSE); 1107 } 1108 } 1109 1110 /* 1111 * Destroy rpc handle. 1112 * Frees the space used for output buffer, private data, and handle 1113 * structure, and the file pointer/TLI data on last reference. 1114 */ 1115 static void 1116 clnt_clts_kdestroy(CLIENT *h) 1117 { 1118 /* LINTED pointer alignment */ 1119 struct cku_private *p = htop(h); 1120 calllist_t *call = &p->cku_call; 1121 1122 int plen; 1123 1124 RPCLOG(8, "clnt_clts_kdestroy h: %p\n", (void *)h); 1125 RPCLOG(8, "clnt_clts_kdestroy h: xid=0x%x\n", p->cku_xid); 1126 1127 if (p->cku_endpnt != NULL) 1128 endpnt_rele(p->cku_endpnt); 1129 1130 cv_destroy(&call->call_cv); 1131 mutex_destroy(&call->call_lock); 1132 1133 plen = strlen(p->cku_config.knc_protofmly) + 1; 1134 kmem_free(p->cku_config.knc_protofmly, plen); 1135 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 1136 kmem_free(p, sizeof (*p)); 1137 } 1138 1139 /* 1140 * The connectionless (CLTS) kRPC endpoint management subsystem. 1141 * 1142 * Because endpoints are potentially shared among threads making RPC calls, 1143 * they are managed in a pool according to type (endpnt_type_t). Each 1144 * endpnt_type_t points to a list of usable endpoints through the e_pool 1145 * field, which is of type list_t. list_t is a doubly-linked list. 1146 * The number of endpoints in the pool is stored in the e_cnt field of 1147 * endpnt_type_t and the endpoints are reference counted using the e_ref field 1148 * in the endpnt_t structure. 1149 * 1150 * As an optimization, endpoints that have no references are also linked 1151 * to an idle list via e_ilist which is also of type list_t. When a thread 1152 * calls endpnt_get() to obtain a transport endpoint, the idle list is first 1153 * consulted and if such an endpoint exists, it is removed from the idle list 1154 * and returned to the caller. 1155 * 1156 * If the idle list is empty, then a check is made to see if more endpoints 1157 * can be created. If so, we proceed and create a new endpoint which is added 1158 * to the pool and returned to the caller. If we have reached the limit and 1159 * cannot make a new endpoint then one is returned to the caller via round- 1160 * robin policy. 1161 * 1162 * When an endpoint is placed on the idle list by a thread calling 1163 * endpnt_rele(), it is timestamped and then a reaper taskq is scheduled to 1164 * be dispatched if one hasn't already been. When the timer fires, the 1165 * taskq traverses the idle list and checks to see which endpoints are 1166 * eligible to be closed. It determines this by checking if the timestamp 1167 * when the endpoint was released has exceeded the the threshold for how long 1168 * it should stay alive. 1169 * 1170 * endpnt_t structures remain persistent until the memory reclaim callback, 1171 * endpnt_reclaim(), is invoked. 1172 * 1173 * Here is an example of how the data structures would be laid out by the 1174 * subsystem: 1175 * 1176 * endpnt_type_t 1177 * 1178 * loopback inet 1179 * _______________ ______________ 1180 * | e_next |----------------------->| e_next |---->> 1181 * | e_pool |<---+ | e_pool |<----+ 1182 * | e_ilist |<---+--+ | e_ilist |<----+--+ 1183 * +->| e_pcurr |----+--+--+ +->| e_pcurr |-----+--+--+ 1184 * | | ... | | | | | | ... | | | | 1185 * | | e_itimer (90) | | | | | | e_itimer (0) | | | | 1186 * | | e_cnt (1) | | | | | | e_cnt (3) | | | | 1187 * | +---------------+ | | | | +--------------+ | | | 1188 * | | | | | | | | 1189 * | endpnt_t | | | | | | | 1190 * | ____________ | | | | ____________ | | | 1191 * | | e_node |<------+ | | | | e_node |<------+ | | 1192 * | | e_idle |<---------+ | | | e_idle | | | | 1193 * +--| e_type |<------------+ +--| e_type | | | | 1194 * | e_tiptr | | | e_tiptr | | | | 1195 * | ... | | | ... | | | | 1196 * | e_lock | | | e_lock | | | | 1197 * | ... | | | ... | | | | 1198 * | e_ref (0) | | | e_ref (2) | | | | 1199 * | e_itime | | | e_itime | | | | 1200 * +------------+ | +------------+ | | | 1201 * | | | | 1202 * | | | | 1203 * | ____________ | | | 1204 * | | e_node |<------+ | | 1205 * | | e_idle |<------+--+ | 1206 * +--| e_type | | | 1207 * | | e_tiptr | | | 1208 * | | ... | | | 1209 * | | e_lock | | | 1210 * | | ... | | | 1211 * | | e_ref (0) | | | 1212 * | | e_itime | | | 1213 * | +------------+ | | 1214 * | | | 1215 * | | | 1216 * | ____________ | | 1217 * | | e_node |<------+ | 1218 * | | e_idle | | 1219 * +--| e_type |<------------+ 1220 * | e_tiptr | 1221 * | ... | 1222 * | e_lock | 1223 * | ... | 1224 * | e_ref (1) | 1225 * | e_itime | 1226 * +------------+ 1227 * 1228 * Endpoint locking strategy: 1229 * 1230 * The following functions manipulate lists which hold the endpoint and the 1231 * endpoints themselves: 1232 * 1233 * endpnt_get()/check_endpnt()/endpnt_rele()/endpnt_reap()/do_endpnt_reclaim() 1234 * 1235 * Lock description follows: 1236 * 1237 * endpnt_type_lock: Global reader/writer lock which protects accesses to the 1238 * endpnt_type_list. 1239 * 1240 * e_plock: Lock defined in the endpnt_type_t. It is intended to 1241 * protect accesses to the pool of endopints (e_pool) for a given 1242 * endpnt_type_t. 1243 * 1244 * e_ilock: Lock defined in endpnt_type_t. It is intended to protect accesses 1245 * to the idle list (e_ilist) of available endpoints for a given 1246 * endpnt_type_t. It also protects access to the e_itimer, e_async_cv, 1247 * and e_async_count fields in endpnt_type_t. 1248 * 1249 * e_lock: Lock defined in the endpnt structure. It is intended to protect 1250 * flags, cv, and ref count. 1251 * 1252 * The order goes as follows so as not to induce deadlock. 1253 * 1254 * endpnt_type_lock -> e_plock -> e_ilock -> e_lock 1255 * 1256 * Interaction with Zones and shutting down: 1257 * 1258 * endpnt_type_ts are uniquely identified by the (e_zoneid, e_rdev, e_protofmly) 1259 * tuple, which means that a zone may not reuse another zone's idle endpoints 1260 * without first doing a t_kclose(). 1261 * 1262 * A zone's endpnt_type_ts are destroyed when a zone is shut down; e_async_cv 1263 * and e_async_count are used to keep track of the threads in endpnt_taskq 1264 * trying to reap endpnt_ts in the endpnt_type_t. 1265 */ 1266 1267 /* 1268 * Allocate and initialize an endpnt_type_t 1269 */ 1270 static struct endpnt_type * 1271 endpnt_type_create(struct knetconfig *config) 1272 { 1273 struct endpnt_type *etype; 1274 1275 /* 1276 * Allocate a new endpoint type to hang a list of 1277 * endpoints off of it. 1278 */ 1279 etype = kmem_alloc(sizeof (struct endpnt_type), KM_SLEEP); 1280 etype->e_next = NULL; 1281 etype->e_pcurr = NULL; 1282 etype->e_itimer = 0; 1283 etype->e_cnt = 0; 1284 1285 (void) strncpy(etype->e_protofmly, config->knc_protofmly, KNC_STRSIZE); 1286 mutex_init(&etype->e_plock, NULL, MUTEX_DEFAULT, NULL); 1287 mutex_init(&etype->e_ilock, NULL, MUTEX_DEFAULT, NULL); 1288 etype->e_rdev = config->knc_rdev; 1289 etype->e_zoneid = getzoneid(); 1290 etype->e_async_count = 0; 1291 cv_init(&etype->e_async_cv, NULL, CV_DEFAULT, NULL); 1292 1293 list_create(&etype->e_pool, sizeof (endpnt_t), 1294 offsetof(endpnt_t, e_node)); 1295 list_create(&etype->e_ilist, sizeof (endpnt_t), 1296 offsetof(endpnt_t, e_idle)); 1297 1298 /* 1299 * Check to see if we need to create a taskq for endpoint 1300 * reaping 1301 */ 1302 mutex_enter(&endpnt_taskq_lock); 1303 if (taskq_created == FALSE) { 1304 taskq_created = TRUE; 1305 mutex_exit(&endpnt_taskq_lock); 1306 ASSERT(endpnt_taskq == NULL); 1307 endpnt_taskq = taskq_create("clts_endpnt_taskq", 1, 1308 minclsyspri, 200, INT_MAX, 0); 1309 } else 1310 mutex_exit(&endpnt_taskq_lock); 1311 1312 return (etype); 1313 } 1314 1315 /* 1316 * Free an endpnt_type_t 1317 */ 1318 static void 1319 endpnt_type_free(struct endpnt_type *etype) 1320 { 1321 mutex_destroy(&etype->e_plock); 1322 mutex_destroy(&etype->e_ilock); 1323 list_destroy(&etype->e_pool); 1324 list_destroy(&etype->e_ilist); 1325 kmem_free(etype, sizeof (endpnt_type_t)); 1326 } 1327 1328 /* 1329 * Check the endpoint to ensure that it is suitable for use. 1330 * 1331 * Possible return values: 1332 * 1333 * return (1) - Endpoint is established, but needs to be re-opened. 1334 * return (0) && *newp == NULL - Endpoint is established, but unusable. 1335 * return (0) && *newp != NULL - Endpoint is established and usable. 1336 */ 1337 static int 1338 check_endpnt(struct endpnt *endp, struct endpnt **newp) 1339 { 1340 *newp = endp; 1341 1342 mutex_enter(&endp->e_lock); 1343 ASSERT(endp->e_ref >= 1); 1344 1345 /* 1346 * The first condition we check for is if the endpoint has been 1347 * allocated, but is unusable either because it has been closed or 1348 * has been marked stale. Only *one* thread will be allowed to 1349 * execute the then clause. This is enforced becuase the first thread 1350 * to check this condition will clear the flags, so that subsequent 1351 * thread(s) checking this endpoint will move on. 1352 */ 1353 if ((endp->e_flags & ENDPNT_ESTABLISHED) && 1354 (!(endp->e_flags & ENDPNT_BOUND) || 1355 (endp->e_flags & ENDPNT_STALE))) { 1356 /* 1357 * Clear the flags here since they will be 1358 * set again by this thread. They need to be 1359 * individually cleared because we want to maintain 1360 * the state for ENDPNT_ONIDLE. 1361 */ 1362 endp->e_flags &= ~(ENDPNT_ESTABLISHED | 1363 ENDPNT_WAITING | ENDPNT_BOUND | ENDPNT_STALE); 1364 mutex_exit(&endp->e_lock); 1365 return (1); 1366 } 1367 1368 /* 1369 * The second condition is meant for any thread that is waiting for 1370 * an endpoint to become established. It will cv_wait() until 1371 * the condition for the endpoint has been changed to ENDPNT_BOUND or 1372 * ENDPNT_STALE. 1373 */ 1374 while (!(endp->e_flags & ENDPNT_BOUND) && 1375 !(endp->e_flags & ENDPNT_STALE)) { 1376 endp->e_flags |= ENDPNT_WAITING; 1377 cv_wait(&endp->e_cv, &endp->e_lock); 1378 } 1379 1380 ASSERT(endp->e_flags & ENDPNT_ESTABLISHED); 1381 1382 /* 1383 * The last case we check for is if the endpoint has been marked stale. 1384 * If this is the case then set *newp to NULL and return, so that the 1385 * caller is notified of the error and can take appropriate action. 1386 */ 1387 if (endp->e_flags & ENDPNT_STALE) { 1388 endp->e_ref--; 1389 *newp = NULL; 1390 } 1391 mutex_exit(&endp->e_lock); 1392 return (0); 1393 } 1394 1395 #ifdef DEBUG 1396 /* 1397 * Provide a fault injection setting to test error conditions. 1398 */ 1399 static int endpnt_get_return_null = 0; 1400 #endif 1401 1402 /* 1403 * Returns a handle (struct endpnt *) to an open and bound endpoint 1404 * specified by the knetconfig passed in. Returns NULL if no valid endpoint 1405 * can be obtained. 1406 */ 1407 static struct endpnt * 1408 endpnt_get(struct knetconfig *config, int useresvport) 1409 { 1410 struct endpnt_type *n_etype = NULL; 1411 struct endpnt_type *np = NULL; 1412 struct endpnt *new = NULL; 1413 struct endpnt *endp = NULL; 1414 struct endpnt *next = NULL; 1415 TIUSER *tiptr = NULL; 1416 int rtries = BINDRESVPORT_RETRIES; 1417 int i = 0; 1418 int error; 1419 int retval; 1420 zoneid_t zoneid = getzoneid(); 1421 1422 RPCLOG(1, "endpnt_get: protofmly %s, ", config->knc_protofmly); 1423 RPCLOG(1, "rdev %ld\n", config->knc_rdev); 1424 1425 #ifdef DEBUG 1426 /* 1427 * Inject fault if desired. Pretend we have a stale endpoint 1428 * and return NULL. 1429 */ 1430 if (endpnt_get_return_null > 0) { 1431 endpnt_get_return_null--; 1432 return (NULL); 1433 } 1434 #endif 1435 rw_enter(&endpnt_type_lock, RW_READER); 1436 1437 top: 1438 for (np = endpnt_type_list; np != NULL; np = np->e_next) 1439 if ((np->e_zoneid == zoneid) && 1440 (np->e_rdev == config->knc_rdev) && 1441 (strcmp(np->e_protofmly, 1442 config->knc_protofmly) == 0)) 1443 break; 1444 1445 if (np == NULL && n_etype != NULL) { 1446 ASSERT(rw_write_held(&endpnt_type_lock)); 1447 1448 /* 1449 * Link the endpoint type onto the list 1450 */ 1451 n_etype->e_next = endpnt_type_list; 1452 endpnt_type_list = n_etype; 1453 np = n_etype; 1454 n_etype = NULL; 1455 } 1456 1457 if (np == NULL) { 1458 /* 1459 * The logic here is that we were unable to find an 1460 * endpnt_type_t that matched our criteria, so we allocate a 1461 * new one. Because kmem_alloc() needs to be called with 1462 * KM_SLEEP, we drop our locks so that we don't induce 1463 * deadlock. After allocating and initializing the 1464 * endpnt_type_t, we reaquire the lock and go back to check 1465 * if this entry needs to be added to the list. Since we do 1466 * some operations without any locking other threads may 1467 * have been looking for the same endpnt_type_t and gone 1468 * through this code path. We check for this case and allow 1469 * one thread to link its endpnt_type_t to the list and the 1470 * other threads will simply free theirs. 1471 */ 1472 rw_exit(&endpnt_type_lock); 1473 n_etype = endpnt_type_create(config); 1474 1475 /* 1476 * We need to reaquire the lock with RW_WRITER here so that 1477 * we can safely link the new endpoint type onto the list. 1478 */ 1479 rw_enter(&endpnt_type_lock, RW_WRITER); 1480 goto top; 1481 } 1482 1483 rw_exit(&endpnt_type_lock); 1484 /* 1485 * If n_etype is not NULL, then another thread was able to 1486 * insert an endpnt_type_t of this type onto the list before 1487 * we did. Go ahead and free ours. 1488 */ 1489 if (n_etype != NULL) 1490 endpnt_type_free(n_etype); 1491 1492 mutex_enter(&np->e_ilock); 1493 /* 1494 * The algorithm to hand out endpoints is to first 1495 * give out those that are idle if such endpoints 1496 * exist. Otherwise, create a new one if we haven't 1497 * reached the max threshold. Finally, we give out 1498 * endpoints in a pseudo LRU fashion (round-robin). 1499 * 1500 * Note: The idle list is merely a hint of those endpoints 1501 * that should be idle. There exists a window after the 1502 * endpoint is released and before it is linked back onto the 1503 * idle list where a thread could get a reference to it and 1504 * use it. This is okay, since the reference counts will 1505 * still be consistent. 1506 */ 1507 if ((endp = (endpnt_t *)list_head(&np->e_ilist)) != NULL) { 1508 timeout_id_t t_id = 0; 1509 1510 mutex_enter(&endp->e_lock); 1511 endp->e_ref++; 1512 endp->e_itime = 0; 1513 endp->e_flags &= ~ENDPNT_ONIDLE; 1514 mutex_exit(&endp->e_lock); 1515 1516 /* 1517 * Pop the endpoint off the idle list and hand it off 1518 */ 1519 list_remove(&np->e_ilist, endp); 1520 1521 if (np->e_itimer != 0) { 1522 t_id = np->e_itimer; 1523 np->e_itimer = 0; 1524 } 1525 mutex_exit(&np->e_ilock); 1526 /* 1527 * Reset the idle timer if it has been set 1528 */ 1529 if (t_id != (timeout_id_t)0) 1530 (void) untimeout(t_id); 1531 1532 if (check_endpnt(endp, &new) == 0) 1533 return (new); 1534 } else if (np->e_cnt >= clnt_clts_max_endpoints) { 1535 /* 1536 * There are no idle endpoints currently, so 1537 * create a new one if we have not reached the maximum or 1538 * hand one out in round-robin. 1539 */ 1540 mutex_exit(&np->e_ilock); 1541 mutex_enter(&np->e_plock); 1542 endp = np->e_pcurr; 1543 mutex_enter(&endp->e_lock); 1544 endp->e_ref++; 1545 mutex_exit(&endp->e_lock); 1546 1547 ASSERT(endp != NULL); 1548 /* 1549 * Advance the pointer to the next eligible endpoint, if 1550 * necessary. 1551 */ 1552 if (np->e_cnt > 1) { 1553 next = (endpnt_t *)list_next(&np->e_pool, np->e_pcurr); 1554 if (next == NULL) 1555 next = (endpnt_t *)list_head(&np->e_pool); 1556 np->e_pcurr = next; 1557 } 1558 1559 mutex_exit(&np->e_plock); 1560 1561 /* 1562 * We need to check to see if this endpoint is bound or 1563 * not. If it is in progress then just wait until 1564 * the set up is complete 1565 */ 1566 if (check_endpnt(endp, &new) == 0) 1567 return (new); 1568 } else { 1569 mutex_exit(&np->e_ilock); 1570 mutex_enter(&np->e_plock); 1571 1572 /* 1573 * Allocate a new endpoint to use. If we can't allocate any 1574 * more memory then use one that is already established if any 1575 * such endpoints exist. 1576 */ 1577 new = kmem_cache_alloc(endpnt_cache, KM_NOSLEEP); 1578 if (new == NULL) { 1579 RPCLOG0(1, "endpnt_get: kmem_cache_alloc failed\n"); 1580 /* 1581 * Try to recover by using an existing endpoint. 1582 */ 1583 if (np->e_cnt <= 0) { 1584 mutex_exit(&np->e_plock); 1585 return (NULL); 1586 } 1587 endp = np->e_pcurr; 1588 if ((next = list_next(&np->e_pool, np->e_pcurr)) != 1589 NULL) 1590 np->e_pcurr = next; 1591 ASSERT(endp != NULL); 1592 mutex_enter(&endp->e_lock); 1593 endp->e_ref++; 1594 mutex_exit(&endp->e_lock); 1595 mutex_exit(&np->e_plock); 1596 1597 if (check_endpnt(endp, &new) == 0) 1598 return (new); 1599 } else { 1600 /* 1601 * Partially init an endpoint structure and put 1602 * it on the list, so that other interested threads 1603 * know that one is being created 1604 */ 1605 bzero(new, sizeof (struct endpnt)); 1606 1607 cv_init(&new->e_cv, NULL, CV_DEFAULT, NULL); 1608 mutex_init(&new->e_lock, NULL, MUTEX_DEFAULT, NULL); 1609 new->e_ref = 1; 1610 new->e_type = np; 1611 1612 /* 1613 * Link the endpoint into the pool. 1614 */ 1615 list_insert_head(&np->e_pool, new); 1616 np->e_cnt++; 1617 if (np->e_pcurr == NULL) 1618 np->e_pcurr = new; 1619 mutex_exit(&np->e_plock); 1620 } 1621 } 1622 1623 /* 1624 * The transport should be opened with sufficient privs 1625 */ 1626 error = t_kopen(NULL, config->knc_rdev, FREAD|FWRITE|FNDELAY, &tiptr, 1627 kcred); 1628 if (error) { 1629 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error); 1630 goto bad; 1631 } 1632 1633 new->e_tiptr = tiptr; 1634 rpc_poptimod(tiptr->fp->f_vnode); 1635 1636 /* 1637 * Allow the kernel to push the module on behalf of the user. 1638 */ 1639 error = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0, 1640 K_TO_K, kcred, &retval); 1641 if (error) { 1642 RPCLOG(1, "endpnt_get: kstr_push on rpcmod failed %d\n", error); 1643 goto bad; 1644 } 1645 1646 error = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K, 1647 kcred, &retval); 1648 if (error) { 1649 RPCLOG(1, "endpnt_get: strioctl failed %d\n", error); 1650 goto bad; 1651 } 1652 1653 /* 1654 * Connectionless data flow should bypass the stream head. 1655 */ 1656 new->e_wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next; 1657 1658 error = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0, 1659 K_TO_K, kcred, &retval); 1660 if (error) { 1661 RPCLOG(1, "endpnt_get: kstr_push on timod failed %d\n", error); 1662 goto bad; 1663 } 1664 1665 /* 1666 * Attempt to bind the endpoint. If we fail then propogate 1667 * error back to calling subsystem, so that it can be handled 1668 * appropriately. 1669 * If the caller has not specified reserved port usage then 1670 * take the system default. 1671 */ 1672 if (useresvport == -1) 1673 useresvport = clnt_clts_do_bindresvport; 1674 1675 if (useresvport && 1676 (strcmp(config->knc_protofmly, NC_INET) == 0 || 1677 strcmp(config->knc_protofmly, NC_INET6) == 0)) { 1678 1679 while ((error = 1680 bindresvport(new->e_tiptr, NULL, NULL, FALSE)) != 0) { 1681 RPCLOG(1, 1682 "endpnt_get: bindresvport error %d\n", 1683 error); 1684 if (error != EPROTO) { 1685 if (rtries-- <= 0) 1686 goto bad; 1687 1688 delay(hz << i++); 1689 continue; 1690 } 1691 1692 (void) t_kclose(new->e_tiptr, 1); 1693 /* 1694 * reopen with all privileges 1695 */ 1696 error = t_kopen(NULL, config->knc_rdev, 1697 FREAD|FWRITE|FNDELAY, 1698 &new->e_tiptr, kcred); 1699 if (error) { 1700 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error); 1701 new->e_tiptr = NULL; 1702 goto bad; 1703 } 1704 } 1705 } else if ((error = t_kbind(new->e_tiptr, NULL, NULL)) != 0) { 1706 RPCLOG(1, "endpnt_get: t_kbind failed: %d\n", error); 1707 goto bad; 1708 } 1709 1710 /* 1711 * Set the flags and notify and waiters that we have an established 1712 * endpoint. 1713 */ 1714 mutex_enter(&new->e_lock); 1715 new->e_flags |= ENDPNT_ESTABLISHED; 1716 new->e_flags |= ENDPNT_BOUND; 1717 if (new->e_flags & ENDPNT_WAITING) { 1718 cv_broadcast(&new->e_cv); 1719 new->e_flags &= ~ENDPNT_WAITING; 1720 } 1721 mutex_exit(&new->e_lock); 1722 1723 return (new); 1724 1725 bad: 1726 ASSERT(new != NULL); 1727 /* 1728 * mark this endpoint as stale and notify any threads waiting 1729 * on this endpoint that it will be going away. 1730 */ 1731 mutex_enter(&new->e_lock); 1732 if (new->e_ref > 0) { 1733 new->e_flags |= ENDPNT_ESTABLISHED; 1734 new->e_flags |= ENDPNT_STALE; 1735 if (new->e_flags & ENDPNT_WAITING) { 1736 cv_broadcast(&new->e_cv); 1737 new->e_flags &= ~ENDPNT_WAITING; 1738 } 1739 } 1740 new->e_ref--; 1741 new->e_tiptr = NULL; 1742 mutex_exit(&new->e_lock); 1743 1744 /* 1745 * If there was a transport endopoint opened, then close it. 1746 */ 1747 if (tiptr != NULL) 1748 (void) t_kclose(tiptr, 1); 1749 1750 return (NULL); 1751 } 1752 1753 /* 1754 * Release a referece to the endpoint 1755 */ 1756 static void 1757 endpnt_rele(struct endpnt *sp) 1758 { 1759 mutex_enter(&sp->e_lock); 1760 ASSERT(sp->e_ref > 0); 1761 sp->e_ref--; 1762 /* 1763 * If the ref count is zero, then start the idle timer and link 1764 * the endpoint onto the idle list. 1765 */ 1766 if (sp->e_ref == 0) { 1767 sp->e_itime = gethrestime_sec(); 1768 1769 /* 1770 * Check to see if the endpoint is already linked to the idle 1771 * list, so that we don't try to reinsert it. 1772 */ 1773 if (sp->e_flags & ENDPNT_ONIDLE) { 1774 mutex_exit(&sp->e_lock); 1775 mutex_enter(&sp->e_type->e_ilock); 1776 endpnt_reap_settimer(sp->e_type); 1777 mutex_exit(&sp->e_type->e_ilock); 1778 return; 1779 } 1780 1781 sp->e_flags |= ENDPNT_ONIDLE; 1782 mutex_exit(&sp->e_lock); 1783 mutex_enter(&sp->e_type->e_ilock); 1784 list_insert_tail(&sp->e_type->e_ilist, sp); 1785 endpnt_reap_settimer(sp->e_type); 1786 mutex_exit(&sp->e_type->e_ilock); 1787 } else 1788 mutex_exit(&sp->e_lock); 1789 } 1790 1791 static void 1792 endpnt_reap_settimer(endpnt_type_t *etp) 1793 { 1794 if (etp->e_itimer == (timeout_id_t)0) 1795 etp->e_itimer = timeout(endpnt_reap_dispatch, (void *)etp, 1796 clnt_clts_taskq_dispatch_interval); 1797 } 1798 1799 static void 1800 endpnt_reap_dispatch(void *a) 1801 { 1802 endpnt_type_t *etp = a; 1803 1804 /* 1805 * The idle timer has fired, so dispatch the taskq to close the 1806 * endpoint. 1807 */ 1808 if (taskq_dispatch(endpnt_taskq, (task_func_t *)endpnt_reap, etp, 1809 TQ_NOSLEEP) == NULL) 1810 return; 1811 mutex_enter(&etp->e_ilock); 1812 etp->e_async_count++; 1813 mutex_exit(&etp->e_ilock); 1814 } 1815 1816 /* 1817 * Traverse the idle list and close those endpoints that have reached their 1818 * timeout interval. 1819 */ 1820 static void 1821 endpnt_reap(endpnt_type_t *etp) 1822 { 1823 struct endpnt *e; 1824 struct endpnt *next_node = NULL; 1825 1826 mutex_enter(&etp->e_ilock); 1827 e = list_head(&etp->e_ilist); 1828 while (e != NULL) { 1829 next_node = list_next(&etp->e_ilist, e); 1830 1831 mutex_enter(&e->e_lock); 1832 if (e->e_ref > 0) { 1833 mutex_exit(&e->e_lock); 1834 e = next_node; 1835 continue; 1836 } 1837 1838 ASSERT(e->e_ref == 0); 1839 if (e->e_itime > 0 && 1840 (e->e_itime + clnt_clts_endpoint_reap_interval) < 1841 gethrestime_sec()) { 1842 e->e_flags &= ~ENDPNT_BOUND; 1843 (void) t_kclose(e->e_tiptr, 1); 1844 e->e_tiptr = NULL; 1845 e->e_itime = 0; 1846 } 1847 mutex_exit(&e->e_lock); 1848 e = next_node; 1849 } 1850 etp->e_itimer = 0; 1851 if (--etp->e_async_count == 0) 1852 cv_signal(&etp->e_async_cv); 1853 mutex_exit(&etp->e_ilock); 1854 } 1855 1856 static void 1857 endpnt_reclaim(zoneid_t zoneid) 1858 { 1859 struct endpnt_type *np; 1860 struct endpnt *e; 1861 struct endpnt *next_node = NULL; 1862 list_t free_list; 1863 int rcnt = 0; 1864 1865 list_create(&free_list, sizeof (endpnt_t), offsetof(endpnt_t, e_node)); 1866 1867 RPCLOG0(1, "endpnt_reclaim: reclaim callback started\n"); 1868 rw_enter(&endpnt_type_lock, RW_READER); 1869 for (np = endpnt_type_list; np != NULL; np = np->e_next) { 1870 if (zoneid != ALL_ZONES && zoneid != np->e_zoneid) 1871 continue; 1872 1873 mutex_enter(&np->e_plock); 1874 RPCLOG(1, "endpnt_reclaim: protofmly %s, ", 1875 np->e_protofmly); 1876 RPCLOG(1, "rdev %ld\n", np->e_rdev); 1877 RPCLOG(1, "endpnt_reclaim: found %d endpoint(s)\n", 1878 np->e_cnt); 1879 1880 if (np->e_cnt == 0) { 1881 mutex_exit(&np->e_plock); 1882 continue; 1883 } 1884 1885 /* 1886 * The nice thing about maintaining an idle list is that if 1887 * there are any endpoints to reclaim, they are going to be 1888 * on this list. Just go through and reap the one's that 1889 * have ref counts of zero. 1890 */ 1891 mutex_enter(&np->e_ilock); 1892 e = list_head(&np->e_ilist); 1893 while (e != NULL) { 1894 next_node = list_next(&np->e_ilist, e); 1895 mutex_enter(&e->e_lock); 1896 if (e->e_ref > 0) { 1897 mutex_exit(&e->e_lock); 1898 e = next_node; 1899 continue; 1900 } 1901 ASSERT(e->e_ref == 0); 1902 mutex_exit(&e->e_lock); 1903 1904 list_remove(&np->e_ilist, e); 1905 list_remove(&np->e_pool, e); 1906 list_insert_head(&free_list, e); 1907 1908 rcnt++; 1909 np->e_cnt--; 1910 e = next_node; 1911 } 1912 mutex_exit(&np->e_ilock); 1913 /* 1914 * Reset the current pointer to be safe 1915 */ 1916 if ((e = (struct endpnt *)list_head(&np->e_pool)) != NULL) 1917 np->e_pcurr = e; 1918 else { 1919 ASSERT(np->e_cnt == 0); 1920 np->e_pcurr = NULL; 1921 } 1922 1923 mutex_exit(&np->e_plock); 1924 } 1925 rw_exit(&endpnt_type_lock); 1926 1927 while ((e = list_head(&free_list)) != NULL) { 1928 list_remove(&free_list, e); 1929 if (e->e_tiptr != NULL) 1930 (void) t_kclose(e->e_tiptr, 1); 1931 1932 cv_destroy(&e->e_cv); 1933 mutex_destroy(&e->e_lock); 1934 kmem_cache_free(endpnt_cache, e); 1935 } 1936 list_destroy(&free_list); 1937 RPCLOG(1, "endpnt_reclaim: reclaimed %d endpoint(s)\n", rcnt); 1938 } 1939 1940 /* 1941 * Endpoint reclaim zones destructor callback routine. 1942 * 1943 * After reclaiming any cached entries, we basically go through the endpnt_type 1944 * list, canceling outstanding timeouts and free'ing data structures. 1945 */ 1946 /* ARGSUSED */ 1947 static void 1948 endpnt_destructor(zoneid_t zoneid, void *a) 1949 { 1950 struct endpnt_type **npp; 1951 struct endpnt_type *np; 1952 struct endpnt_type *free_list = NULL; 1953 timeout_id_t t_id = 0; 1954 extern void clcleanup_zone(zoneid_t); 1955 extern void clcleanup4_zone(zoneid_t); 1956 1957 /* Make sure NFS client handles are released. */ 1958 clcleanup_zone(zoneid); 1959 clcleanup4_zone(zoneid); 1960 1961 endpnt_reclaim(zoneid); 1962 /* 1963 * We don't need to be holding on to any locks across the call to 1964 * endpnt_reclaim() and the code below; we know that no-one can 1965 * be holding open connections for this zone (all processes and kernel 1966 * threads are gone), so nothing could be adding anything to the list. 1967 */ 1968 rw_enter(&endpnt_type_lock, RW_WRITER); 1969 npp = &endpnt_type_list; 1970 while ((np = *npp) != NULL) { 1971 if (np->e_zoneid != zoneid) { 1972 npp = &np->e_next; 1973 continue; 1974 } 1975 mutex_enter(&np->e_plock); 1976 mutex_enter(&np->e_ilock); 1977 if (np->e_itimer != 0) { 1978 t_id = np->e_itimer; 1979 np->e_itimer = 0; 1980 } 1981 ASSERT(np->e_cnt == 0); 1982 ASSERT(list_head(&np->e_pool) == NULL); 1983 ASSERT(list_head(&np->e_ilist) == NULL); 1984 1985 mutex_exit(&np->e_ilock); 1986 mutex_exit(&np->e_plock); 1987 1988 /* 1989 * untimeout() any outstanding timers that have not yet fired. 1990 */ 1991 if (t_id != (timeout_id_t)0) 1992 (void) untimeout(t_id); 1993 *npp = np->e_next; 1994 np->e_next = free_list; 1995 free_list = np; 1996 } 1997 rw_exit(&endpnt_type_lock); 1998 1999 while (free_list != NULL) { 2000 np = free_list; 2001 free_list = free_list->e_next; 2002 /* 2003 * Wait for threads in endpnt_taskq trying to reap endpnt_ts in 2004 * the endpnt_type_t. 2005 */ 2006 mutex_enter(&np->e_ilock); 2007 while (np->e_async_count > 0) 2008 cv_wait(&np->e_async_cv, &np->e_ilock); 2009 cv_destroy(&np->e_async_cv); 2010 mutex_destroy(&np->e_plock); 2011 mutex_destroy(&np->e_ilock); 2012 list_destroy(&np->e_pool); 2013 list_destroy(&np->e_ilist); 2014 kmem_free(np, sizeof (endpnt_type_t)); 2015 } 2016 } 2017 2018 /* 2019 * Endpoint reclaim kmem callback routine. 2020 */ 2021 /* ARGSUSED */ 2022 static void 2023 endpnt_repossess(void *a) 2024 { 2025 /* 2026 * Reclaim idle endpnt's from all zones. 2027 */ 2028 if (endpnt_taskq != NULL) 2029 (void) taskq_dispatch(endpnt_taskq, 2030 (task_func_t *)endpnt_reclaim, (void *)ALL_ZONES, 2031 TQ_NOSLEEP); 2032 } 2033 2034 /* 2035 * RPC request dispatch routine. Constructs a datagram message and wraps it 2036 * around the RPC request to pass downstream. 2037 */ 2038 static int 2039 clnt_clts_dispatch_send(queue_t *q, mblk_t *mp, struct netbuf *addr, 2040 calllist_t *cp, uint_t xid) 2041 { 2042 mblk_t *bp; 2043 int msgsz; 2044 struct T_unitdata_req *udreq; 2045 2046 /* 2047 * Set up the call record. 2048 */ 2049 cp->call_wq = q; 2050 cp->call_xid = xid; 2051 cp->call_status = RPC_TIMEDOUT; 2052 cp->call_notified = FALSE; 2053 RPCLOG(64, 2054 "clnt_clts_dispatch_send: putting xid 0x%x on " 2055 "dispatch list\n", xid); 2056 cp->call_hash = call_hash(xid, clnt_clts_hash_size); 2057 cp->call_bucket = &clts_call_ht[cp->call_hash]; 2058 call_table_enter(cp); 2059 2060 /* 2061 * Construct the datagram 2062 */ 2063 msgsz = (int)TUNITDATAREQSZ; 2064 while (!(bp = allocb(msgsz + addr->len, BPRI_LO))) { 2065 if (strwaitbuf(msgsz + addr->len, BPRI_LO)) 2066 return (ENOSR); 2067 } 2068 2069 udreq = (struct T_unitdata_req *)bp->b_wptr; 2070 udreq->PRIM_type = T_UNITDATA_REQ; 2071 udreq->DEST_length = addr->len; 2072 2073 if (addr->len) { 2074 bcopy(addr->buf, bp->b_wptr + msgsz, addr->len); 2075 udreq->DEST_offset = (t_scalar_t)msgsz; 2076 msgsz += addr->len; 2077 } else 2078 udreq->DEST_offset = 0; 2079 udreq->OPT_length = 0; 2080 udreq->OPT_offset = 0; 2081 2082 bp->b_datap->db_type = M_PROTO; 2083 bp->b_wptr += msgsz; 2084 2085 /* 2086 * Link the datagram header with the actual data 2087 */ 2088 linkb(bp, mp); 2089 2090 /* 2091 * Send downstream. 2092 */ 2093 put(cp->call_wq, bp); 2094 2095 return (0); 2096 } 2097 2098 /* 2099 * RPC response delivery routine. Deliver the response to the waiting 2100 * thread by matching the xid. 2101 */ 2102 void 2103 clnt_clts_dispatch_notify(mblk_t *mp, int resp_off, zoneid_t zoneid) 2104 { 2105 calllist_t *e = NULL; 2106 call_table_t *chtp; 2107 uint32_t xid; 2108 uint_t hash; 2109 unsigned char *hdr_offset; 2110 mblk_t *resp; 2111 2112 /* 2113 * If the RPC response is not contained in the same mblk as the 2114 * datagram header, then move to the next mblk. 2115 */ 2116 hdr_offset = mp->b_rptr; 2117 resp = mp; 2118 if ((mp->b_wptr - (mp->b_rptr + resp_off)) == 0) 2119 resp = mp->b_cont; 2120 else 2121 resp->b_rptr += resp_off; 2122 2123 ASSERT(resp != NULL); 2124 2125 if ((IS_P2ALIGNED(resp->b_rptr, sizeof (uint32_t))) && 2126 (resp->b_wptr - resp->b_rptr) >= sizeof (xid)) 2127 xid = *((uint32_t *)resp->b_rptr); 2128 else { 2129 int i = 0; 2130 unsigned char *p = (unsigned char *)&xid; 2131 unsigned char *rptr; 2132 mblk_t *tmp = resp; 2133 2134 /* 2135 * Copy the xid, byte-by-byte into xid. 2136 */ 2137 while (tmp) { 2138 rptr = tmp->b_rptr; 2139 while (rptr < tmp->b_wptr) { 2140 *p++ = *rptr++; 2141 if (++i >= sizeof (xid)) 2142 goto done_xid_copy; 2143 } 2144 tmp = tmp->b_cont; 2145 } 2146 2147 /* 2148 * If we got here, we ran out of mblk space before the 2149 * xid could be copied. 2150 */ 2151 ASSERT(tmp == NULL && i < sizeof (xid)); 2152 2153 RPCLOG0(1, 2154 "clnt_dispatch_notify(clts): message less than " 2155 "size of xid\n"); 2156 2157 freemsg(mp); 2158 return; 2159 } 2160 2161 done_xid_copy: 2162 2163 /* 2164 * Reset the read pointer back to the beginning of the protocol 2165 * header if we moved it. 2166 */ 2167 if (mp->b_rptr != hdr_offset) 2168 mp->b_rptr = hdr_offset; 2169 2170 hash = call_hash(xid, clnt_clts_hash_size); 2171 chtp = &clts_call_ht[hash]; 2172 /* call_table_find returns with the hash bucket locked */ 2173 call_table_find(chtp, xid, e); 2174 2175 if (e != NULL) { 2176 mutex_enter(&e->call_lock); 2177 /* 2178 * found thread waiting for this reply. 2179 */ 2180 if (e->call_reply) { 2181 RPCLOG(8, 2182 "clnt_dispatch_notify (clts): discarding old " 2183 "reply for xid 0x%x\n", 2184 xid); 2185 freemsg(e->call_reply); 2186 } 2187 e->call_notified = TRUE; 2188 e->call_reply = mp; 2189 e->call_status = RPC_SUCCESS; 2190 cv_signal(&e->call_cv); 2191 mutex_exit(&e->call_lock); 2192 mutex_exit(&chtp->ct_lock); 2193 } else { 2194 zone_t *zone; 2195 struct rpcstat *rpcstat; 2196 2197 mutex_exit(&chtp->ct_lock); 2198 RPCLOG(8, "clnt_dispatch_notify (clts): no caller for reply " 2199 "0x%x\n", xid); 2200 freemsg(mp); 2201 /* 2202 * This is unfortunate, but we need to lookup the zone so we 2203 * can increment its "rcbadxids" counter. 2204 */ 2205 zone = zone_find_by_id(zoneid); 2206 if (zone == NULL) { 2207 /* 2208 * The zone went away... 2209 */ 2210 return; 2211 } 2212 rpcstat = zone_getspecific(rpcstat_zone_key, zone); 2213 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 2214 /* 2215 * Not interested 2216 */ 2217 zone_rele(zone); 2218 return; 2219 } 2220 RCSTAT_INCR(rpcstat->rpc_clts_client, rcbadxids); 2221 zone_rele(zone); 2222 } 2223 } 2224 2225 /* 2226 * Init routine. Called when rpcmod is loaded. 2227 */ 2228 void 2229 clnt_clts_init(void) 2230 { 2231 endpnt_cache = kmem_cache_create("clnt_clts_endpnt_cache", 2232 sizeof (struct endpnt), 0, NULL, NULL, endpnt_repossess, NULL, 2233 NULL, 0); 2234 2235 rw_init(&endpnt_type_lock, NULL, RW_DEFAULT, NULL); 2236 2237 /* 2238 * Perform simple bounds checking to make sure that the setting is 2239 * reasonable 2240 */ 2241 if (clnt_clts_max_endpoints <= 0) { 2242 if (clnt_clts_do_bindresvport) 2243 clnt_clts_max_endpoints = RESERVED_PORTSPACE; 2244 else 2245 clnt_clts_max_endpoints = NONRESERVED_PORTSPACE; 2246 } 2247 2248 if (clnt_clts_do_bindresvport && 2249 clnt_clts_max_endpoints > RESERVED_PORTSPACE) 2250 clnt_clts_max_endpoints = RESERVED_PORTSPACE; 2251 else if (clnt_clts_max_endpoints > NONRESERVED_PORTSPACE) 2252 clnt_clts_max_endpoints = NONRESERVED_PORTSPACE; 2253 2254 if (clnt_clts_hash_size < DEFAULT_MIN_HASH_SIZE) 2255 clnt_clts_hash_size = DEFAULT_MIN_HASH_SIZE; 2256 2257 /* 2258 * Defer creating the taskq until rpcmod gets pushed. If we are 2259 * in diskless boot mode, rpcmod will get loaded early even before 2260 * thread_create() is available. 2261 */ 2262 endpnt_taskq = NULL; 2263 taskq_created = FALSE; 2264 mutex_init(&endpnt_taskq_lock, NULL, MUTEX_DEFAULT, NULL); 2265 2266 if (clnt_clts_endpoint_reap_interval < DEFAULT_ENDPOINT_REAP_INTERVAL) 2267 clnt_clts_endpoint_reap_interval = 2268 DEFAULT_ENDPOINT_REAP_INTERVAL; 2269 2270 /* 2271 * Dispatch the taskq at an interval which is offset from the 2272 * interval that the endpoints should be reaped. 2273 */ 2274 clnt_clts_taskq_dispatch_interval = 2275 (clnt_clts_endpoint_reap_interval + DEFAULT_INTERVAL_SHIFT) 2276 * hz; 2277 2278 /* 2279 * Initialize the completion queue 2280 */ 2281 clts_call_ht = call_table_init(clnt_clts_hash_size); 2282 /* 2283 * Initialize the zone destructor callback. 2284 */ 2285 zone_key_create(&endpnt_destructor_key, NULL, NULL, endpnt_destructor); 2286 } 2287 2288 void 2289 clnt_clts_fini(void) 2290 { 2291 (void) zone_key_delete(endpnt_destructor_key); 2292 } 2293