1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 28 * All Rights Reserved 29 */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 37 /* 38 * Implements a kernel based, client side RPC over Connection Oriented 39 * Transports (COTS). 40 */ 41 42 /* 43 * Much of this file has been re-written to let NFS work better over slow 44 * transports. A description follows. 45 * 46 * One of the annoying things about kRPC/COTS is that it will temporarily 47 * create more than one connection between a client and server. This 48 * happens because when a connection is made, the end-points entry in the 49 * linked list of connections (headed by cm_hd), is removed so that other 50 * threads don't mess with it. Went ahead and bit the bullet by keeping 51 * the endpoint on the connection list and introducing state bits, 52 * condition variables etc. to the connection entry data structure (struct 53 * cm_xprt). 54 * 55 * Here is a summary of the changes to cm-xprt: 56 * 57 * x_ctime is the timestamp of when the endpoint was last 58 * connected or disconnected. If an end-point is ever disconnected 59 * or re-connected, then any outstanding RPC request is presumed 60 * lost, telling clnt_cots_kcallit that it needs to re-send the 61 * request, not just wait for the original request's reply to 62 * arrive. 63 * 64 * x_thread flag which tells us if a thread is doing a connection attempt. 65 * 66 * x_waitdis flag which tells us we are waiting a disconnect ACK. 67 * 68 * x_needdis flag which tells us we need to send a T_DISCONN_REQ 69 * to kill the connection. 70 * 71 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to 72 * gracefully close the connection. 73 * 74 * #defined bitmasks for the all the b_* bits so that more 75 * efficient (and at times less clumsy) masks can be used to 76 * manipulated state in cases where multiple bits have to 77 * set/cleared/checked in the same critical section. 78 * 79 * x_conn_cv and x_dis-_cv are new condition variables to let 80 * threads knows when the connection attempt is done, and to let 81 * the connecting thread know when the disconnect handshake is 82 * done. 83 * 84 * Added the CONN_HOLD() macro so that all reference holds have the same 85 * look and feel. 86 * 87 * In the private (cku_private) portion of the client handle, 88 * 89 * cku_flags replaces the cku_sent a boolean. cku_flags keeps 90 * track of whether a request as been sent, and whether the 91 * client's handles call record is on the dispatch list (so that 92 * the reply can be matched by XID to the right client handle). 93 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit() 94 * and still have the response find the right client handle so 95 * that the retry of CLNT_CALL() gets the result. Testing, found 96 * situations where if the timeout was increased, performance 97 * degraded. This was due to us hitting a window where the thread 98 * was back in rfscall() (probably printing server not responding) 99 * while the response came back but no place to put it. 100 * 101 * cku_ctime is just a cache of x_ctime. If they match, 102 * clnt_cots_kcallit() won't to send a retry (unless the maximum 103 * receive count limit as been reached). If the don't match, then 104 * we assume the request has been lost, and a retry of the request 105 * is needed. 106 * 107 * cku_recv_attempts counts the number of receive count attempts 108 * after one try is sent on the wire. 109 * 110 * Added the clnt_delay() routine so that interruptible and 111 * noninterruptible delays are possible. 112 * 113 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to 114 * control how long the client delays before returned after getting 115 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash 116 * a server that may be booting and not yet started nfsd. 117 * 118 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable) 119 * Why don't we just wait forever (receive an infinite # of times)? 120 * Because the server may have rebooted. More insidious is that some 121 * servers (ours) will drop NFS/TCP requests in some cases. This is bad, 122 * but it is a reality. 123 * 124 * The case of a server doing orderly release really messes up the 125 * client's recovery, especially if the server's TCP implementation is 126 * buggy. It was found was that the kRPC/COTS client was breaking some 127 * TPI rules, such as not waiting for the acknowledgement of a 128 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and 129 * T_DISCON_REQ in clnt_dispatch_notifyall()). 130 * 131 * One of things that we've seen is that a kRPC TCP endpoint goes into 132 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because 133 * that the TIMEWAIT state takes a while to finish. If a server sends a 134 * T_ORDREL_IND, there is little point in an RPC client doing a 135 * T_ORDREL_REQ, because the RPC request isn't going to make it (the 136 * server is saying that it won't accept any more data). So kRPC was 137 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the 138 * connection skips the TIMEWAIT state and goes straight to a bound state 139 * that kRPC can quickly switch to connected. 140 * 141 * Code that issues TPI request must use waitforack() to wait for the 142 * corresponding ack (assuming there is one) in any future modifications. 143 * This works around problems that may be introduced by breaking TPI rules 144 * (by submitting new calls before earlier requests have been acked) in the 145 * case of a signal or other early return. waitforack() depends on 146 * clnt_dispatch_notifyconn() to issue the wakeup when the ack 147 * arrives, so adding new TPI calls may require corresponding changes 148 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on 149 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure 150 * not to set it too low or TPI ACKS will be lost. 151 */ 152 153 #include <sys/param.h> 154 #include <sys/types.h> 155 #include <sys/user.h> 156 #include <sys/systm.h> 157 #include <sys/sysmacros.h> 158 #include <sys/proc.h> 159 #include <sys/socket.h> 160 #include <sys/file.h> 161 #include <sys/stream.h> 162 #include <sys/strsubr.h> 163 #include <sys/stropts.h> 164 #include <sys/strsun.h> 165 #include <sys/timod.h> 166 #include <sys/tiuser.h> 167 #include <sys/tihdr.h> 168 #include <sys/t_kuser.h> 169 #include <sys/fcntl.h> 170 #include <sys/errno.h> 171 #include <sys/kmem.h> 172 #include <sys/debug.h> 173 #include <sys/systm.h> 174 #include <sys/kstat.h> 175 #include <sys/t_lock.h> 176 #include <sys/ddi.h> 177 #include <sys/cmn_err.h> 178 #include <sys/time.h> 179 #include <sys/isa_defs.h> 180 #include <sys/callb.h> 181 #include <sys/sunddi.h> 182 #include <sys/atomic.h> 183 #include <sys/sdt.h> 184 185 #include <netinet/in.h> 186 #include <netinet/tcp.h> 187 188 #include <rpc/types.h> 189 #include <rpc/xdr.h> 190 #include <rpc/auth.h> 191 #include <rpc/clnt.h> 192 #include <rpc/rpc_msg.h> 193 194 #define COTS_DEFAULT_ALLOCSIZE 2048 195 196 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */ 197 #define MSG_OFFSET 128 /* offset of call into the mblk */ 198 199 const char *kinet_ntop6(uchar_t *, char *, size_t); 200 201 static int clnt_cots_ksettimers(CLIENT *, struct rpc_timers *, 202 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 203 static enum clnt_stat clnt_cots_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 204 caddr_t, xdrproc_t, caddr_t, struct timeval); 205 static void clnt_cots_kabort(CLIENT *); 206 static void clnt_cots_kerror(CLIENT *, struct rpc_err *); 207 static bool_t clnt_cots_kfreeres(CLIENT *, xdrproc_t, caddr_t); 208 static void clnt_cots_kdestroy(CLIENT *); 209 static bool_t clnt_cots_kcontrol(CLIENT *, int, char *); 210 211 212 /* List of transports managed by the connection manager. */ 213 struct cm_xprt { 214 TIUSER *x_tiptr; /* transport handle */ 215 queue_t *x_wq; /* send queue */ 216 clock_t x_time; /* last time we handed this xprt out */ 217 clock_t x_ctime; /* time we went to CONNECTED */ 218 int x_tidu_size; /* TIDU size of this transport */ 219 union { 220 struct { 221 unsigned int 222 #ifdef _BIT_FIELDS_HTOL 223 b_closing: 1, /* we've sent a ord rel on this conn */ 224 b_dead: 1, /* transport is closed or disconn */ 225 b_doomed: 1, /* too many conns, let this go idle */ 226 b_connected: 1, /* this connection is connected */ 227 228 b_ordrel: 1, /* do an orderly release? */ 229 b_thread: 1, /* thread doing connect */ 230 b_waitdis: 1, /* waiting for disconnect ACK */ 231 b_needdis: 1, /* need T_DISCON_REQ */ 232 233 b_needrel: 1, /* need T_ORDREL_REQ */ 234 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 235 /* disconnect during connect */ 236 237 b_pad: 22; 238 239 #endif 240 241 #ifdef _BIT_FIELDS_LTOH 242 b_pad: 22, 243 244 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 245 /* disconnect during connect */ 246 b_needrel: 1, /* need T_ORDREL_REQ */ 247 248 b_needdis: 1, /* need T_DISCON_REQ */ 249 b_waitdis: 1, /* waiting for disconnect ACK */ 250 b_thread: 1, /* thread doing connect */ 251 b_ordrel: 1, /* do an orderly release? */ 252 253 b_connected: 1, /* this connection is connected */ 254 b_doomed: 1, /* too many conns, let this go idle */ 255 b_dead: 1, /* transport is closed or disconn */ 256 b_closing: 1; /* we've sent a ord rel on this conn */ 257 #endif 258 } bit; unsigned int word; 259 260 #define x_closing x_state.bit.b_closing 261 #define x_dead x_state.bit.b_dead 262 #define x_doomed x_state.bit.b_doomed 263 #define x_connected x_state.bit.b_connected 264 265 #define x_ordrel x_state.bit.b_ordrel 266 #define x_thread x_state.bit.b_thread 267 #define x_waitdis x_state.bit.b_waitdis 268 #define x_needdis x_state.bit.b_needdis 269 270 #define x_needrel x_state.bit.b_needrel 271 #define x_early_disc x_state.bit.b_early_disc 272 273 #define x_state_flags x_state.word 274 275 #define X_CLOSING 0x80000000 276 #define X_DEAD 0x40000000 277 #define X_DOOMED 0x20000000 278 #define X_CONNECTED 0x10000000 279 280 #define X_ORDREL 0x08000000 281 #define X_THREAD 0x04000000 282 #define X_WAITDIS 0x02000000 283 #define X_NEEDDIS 0x01000000 284 285 #define X_NEEDREL 0x00800000 286 #define X_EARLYDISC 0x00400000 287 288 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED) 289 290 } x_state; 291 int x_ref; /* number of users of this xprt */ 292 int x_family; /* address family of transport */ 293 dev_t x_rdev; /* device number of transport */ 294 struct cm_xprt *x_next; 295 296 struct netbuf x_server; /* destination address */ 297 struct netbuf x_src; /* src address (for retries) */ 298 kmutex_t x_lock; /* lock on this entry */ 299 kcondvar_t x_cv; /* to signal when can be closed */ 300 kcondvar_t x_conn_cv; /* to signal when connection attempt */ 301 /* is complete */ 302 kstat_t *x_ksp; 303 304 kcondvar_t x_dis_cv; /* to signal when disconnect attempt */ 305 /* is complete */ 306 zoneid_t x_zoneid; /* zone this xprt belongs to */ 307 }; 308 309 typedef struct cm_kstat_xprt { 310 kstat_named_t x_wq; 311 kstat_named_t x_server; 312 kstat_named_t x_family; 313 kstat_named_t x_rdev; 314 kstat_named_t x_time; 315 kstat_named_t x_state; 316 kstat_named_t x_ref; 317 kstat_named_t x_port; 318 } cm_kstat_xprt_t; 319 320 static cm_kstat_xprt_t cm_kstat_template = { 321 { "write_queue", KSTAT_DATA_UINT32 }, 322 { "server", KSTAT_DATA_STRING }, 323 { "addr_family", KSTAT_DATA_UINT32 }, 324 { "device", KSTAT_DATA_UINT32 }, 325 { "time_stamp", KSTAT_DATA_UINT32 }, 326 { "status", KSTAT_DATA_UINT32 }, 327 { "ref_count", KSTAT_DATA_INT32 }, 328 { "port", KSTAT_DATA_UINT32 }, 329 }; 330 331 /* 332 * The inverse of this is connmgr_release(). 333 */ 334 #define CONN_HOLD(Cm_entry) {\ 335 mutex_enter(&(Cm_entry)->x_lock); \ 336 (Cm_entry)->x_ref++; \ 337 mutex_exit(&(Cm_entry)->x_lock); \ 338 } 339 340 341 /* 342 * Private data per rpc handle. This structure is allocated by 343 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy. 344 */ 345 typedef struct cku_private_s { 346 CLIENT cku_client; /* client handle */ 347 calllist_t cku_call; /* for dispatching calls */ 348 struct rpc_err cku_err; /* error status */ 349 350 struct netbuf cku_srcaddr; /* source address for retries */ 351 int cku_addrfmly; /* for binding port */ 352 struct netbuf cku_addr; /* remote address */ 353 dev_t cku_device; /* device to use */ 354 uint_t cku_flags; 355 #define CKU_ONQUEUE 0x1 356 #define CKU_SENT 0x2 357 358 bool_t cku_progress; /* for CLSET_PROGRESS */ 359 uint32_t cku_xid; /* current XID */ 360 clock_t cku_ctime; /* time stamp of when */ 361 /* connection was created */ 362 uint_t cku_recv_attempts; 363 XDR cku_outxdr; /* xdr routine for output */ 364 XDR cku_inxdr; /* xdr routine for input */ 365 char cku_rpchdr[WIRE_HDR_SIZE + 4]; 366 /* pre-serialized rpc header */ 367 368 uint_t cku_outbuflen; /* default output mblk length */ 369 struct cred *cku_cred; /* credentials */ 370 bool_t cku_nodelayonerr; 371 /* for CLSET_NODELAYONERR */ 372 int cku_useresvport; /* Use reserved port */ 373 struct rpc_cots_client *cku_stats; /* stats for zone */ 374 } cku_private_t; 375 376 static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *, 377 const struct timeval *, struct netbuf *, int, struct netbuf *, 378 struct rpc_err *, bool_t, bool_t, cred_t *); 379 380 static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *, 381 int, calllist_t *, int *, bool_t reconnect, 382 const struct timeval *, bool_t, cred_t *); 383 384 static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *, cred_t *cr); 385 static void connmgr_sndrel(struct cm_xprt *); 386 static void connmgr_snddis(struct cm_xprt *); 387 static void connmgr_close(struct cm_xprt *); 388 static void connmgr_release(struct cm_xprt *); 389 static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *, 390 cku_private_t *); 391 392 static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *, 393 struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t, 394 bool_t, int, cred_t *); 395 396 static void connmgr_cancelconn(struct cm_xprt *); 397 static enum clnt_stat connmgr_cwait(struct cm_xprt *, const struct timeval *, 398 bool_t); 399 static void connmgr_dis_and_wait(struct cm_xprt *); 400 401 static int clnt_dispatch_send(queue_t *, mblk_t *, calllist_t *, uint_t, 402 uint_t); 403 404 static int clnt_delay(clock_t, bool_t); 405 406 static int waitforack(calllist_t *, t_scalar_t, const struct timeval *, bool_t); 407 408 /* 409 * Operations vector for TCP/IP based RPC 410 */ 411 static struct clnt_ops tcp_ops = { 412 clnt_cots_kcallit, /* do rpc call */ 413 clnt_cots_kabort, /* abort call */ 414 clnt_cots_kerror, /* return error status */ 415 clnt_cots_kfreeres, /* free results */ 416 clnt_cots_kdestroy, /* destroy rpc handle */ 417 clnt_cots_kcontrol, /* the ioctl() of rpc */ 418 clnt_cots_ksettimers, /* set retry timers */ 419 }; 420 421 static int rpc_kstat_instance = 0; /* keeps the current instance */ 422 /* number for the next kstat_create */ 423 424 static struct cm_xprt *cm_hd = NULL; 425 static kmutex_t connmgr_lock; /* for connection mngr's list of transports */ 426 427 extern kmutex_t clnt_max_msg_lock; 428 429 static calllist_t *clnt_pending = NULL; 430 extern kmutex_t clnt_pending_lock; 431 432 static int clnt_cots_hash_size = DEFAULT_HASH_SIZE; 433 434 static call_table_t *cots_call_ht; 435 436 static const struct rpc_cots_client { 437 kstat_named_t rccalls; 438 kstat_named_t rcbadcalls; 439 kstat_named_t rcbadxids; 440 kstat_named_t rctimeouts; 441 kstat_named_t rcnewcreds; 442 kstat_named_t rcbadverfs; 443 kstat_named_t rctimers; 444 kstat_named_t rccantconn; 445 kstat_named_t rcnomem; 446 kstat_named_t rcintrs; 447 } cots_rcstat_tmpl = { 448 { "calls", KSTAT_DATA_UINT64 }, 449 { "badcalls", KSTAT_DATA_UINT64 }, 450 { "badxids", KSTAT_DATA_UINT64 }, 451 { "timeouts", KSTAT_DATA_UINT64 }, 452 { "newcreds", KSTAT_DATA_UINT64 }, 453 { "badverfs", KSTAT_DATA_UINT64 }, 454 { "timers", KSTAT_DATA_UINT64 }, 455 { "cantconn", KSTAT_DATA_UINT64 }, 456 { "nomem", KSTAT_DATA_UINT64 }, 457 { "interrupts", KSTAT_DATA_UINT64 } 458 }; 459 460 #define COTSRCSTAT_INCR(p, x) \ 461 atomic_add_64(&(p)->x.value.ui64, 1) 462 463 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */ 464 int clnt_max_conns = CLNT_MAX_CONNS; 465 466 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */ 467 /* connection reset */ 468 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */ 469 470 471 int clnt_cots_min_tout = CLNT_MIN_TIMEOUT; 472 int clnt_cots_min_conntout = CLNT_MIN_CONNTIMEOUT; 473 474 /* 475 * Limit the number of times we will attempt to receive a reply without 476 * re-sending a response. 477 */ 478 #define CLNT_MAXRECV_WITHOUT_RETRY 3 479 uint_t clnt_cots_maxrecv = CLNT_MAXRECV_WITHOUT_RETRY; 480 481 uint_t *clnt_max_msg_sizep; 482 void (*clnt_stop_idle)(queue_t *wq); 483 484 #define ptoh(p) (&((p)->cku_client)) 485 #define htop(h) ((cku_private_t *)((h)->cl_private)) 486 487 /* 488 * Times to retry 489 */ 490 #define REFRESHES 2 /* authentication refreshes */ 491 492 /* 493 * The following is used to determine the global default behavior for 494 * COTS when binding to a local port. 495 * 496 * If the value is set to 1 the default will be to select a reserved 497 * (aka privileged) port, if the value is zero the default will be to 498 * use non-reserved ports. Users of kRPC may override this by using 499 * CLNT_CONTROL() and CLSET_BINDRESVPORT. 500 */ 501 int clnt_cots_do_bindresvport = 1; 502 503 static zone_key_t zone_cots_key; 504 505 /* 506 * We need to do this after all kernel threads in the zone have exited. 507 */ 508 /* ARGSUSED */ 509 static void 510 clnt_zone_destroy(zoneid_t zoneid, void *unused) 511 { 512 struct cm_xprt **cmp; 513 struct cm_xprt *cm_entry; 514 struct cm_xprt *freelist = NULL; 515 516 mutex_enter(&connmgr_lock); 517 cmp = &cm_hd; 518 while ((cm_entry = *cmp) != NULL) { 519 if (cm_entry->x_zoneid == zoneid) { 520 *cmp = cm_entry->x_next; 521 cm_entry->x_next = freelist; 522 freelist = cm_entry; 523 } else { 524 cmp = &cm_entry->x_next; 525 } 526 } 527 mutex_exit(&connmgr_lock); 528 while ((cm_entry = freelist) != NULL) { 529 freelist = cm_entry->x_next; 530 connmgr_close(cm_entry); 531 } 532 } 533 534 int 535 clnt_cots_kcreate(dev_t dev, struct netbuf *addr, int family, rpcprog_t prog, 536 rpcvers_t vers, uint_t max_msgsize, cred_t *cred, CLIENT **ncl) 537 { 538 CLIENT *h; 539 cku_private_t *p; 540 struct rpc_msg call_msg; 541 struct rpcstat *rpcstat; 542 543 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog); 544 545 rpcstat = zone_getspecific(rpcstat_zone_key, rpc_zone()); 546 ASSERT(rpcstat != NULL); 547 548 /* Allocate and intialize the client handle. */ 549 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 550 551 h = ptoh(p); 552 553 h->cl_private = (caddr_t)p; 554 h->cl_auth = authkern_create(); 555 h->cl_ops = &tcp_ops; 556 557 cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL); 558 mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL); 559 560 /* 561 * If the current sanity check size in rpcmod is smaller 562 * than the size needed, then increase the sanity check. 563 */ 564 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 565 max_msgsize > *clnt_max_msg_sizep) { 566 mutex_enter(&clnt_max_msg_lock); 567 if (max_msgsize > *clnt_max_msg_sizep) 568 *clnt_max_msg_sizep = max_msgsize; 569 mutex_exit(&clnt_max_msg_lock); 570 } 571 572 p->cku_outbuflen = COTS_DEFAULT_ALLOCSIZE; 573 574 /* Preserialize the call message header */ 575 576 call_msg.rm_xid = 0; 577 call_msg.rm_direction = CALL; 578 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 579 call_msg.rm_call.cb_prog = prog; 580 call_msg.rm_call.cb_vers = vers; 581 582 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, WIRE_HDR_SIZE, XDR_ENCODE); 583 584 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 585 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization " 586 "error\n"); 587 auth_destroy(h->cl_auth); 588 kmem_free(p, sizeof (cku_private_t)); 589 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n"); 590 return (EINVAL); /* XXX */ 591 } 592 593 /* 594 * The zalloc initialized the fields below. 595 * p->cku_xid = 0; 596 * p->cku_flags = 0; 597 * p->cku_srcaddr.len = 0; 598 * p->cku_srcaddr.maxlen = 0; 599 */ 600 601 p->cku_cred = cred; 602 p->cku_device = dev; 603 p->cku_addrfmly = family; 604 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 605 p->cku_addr.maxlen = addr->maxlen; 606 p->cku_addr.len = addr->len; 607 bcopy(addr->buf, p->cku_addr.buf, addr->len); 608 p->cku_stats = rpcstat->rpc_cots_client; 609 p->cku_useresvport = -1; /* value is has not been set */ 610 611 *ncl = h; 612 return (0); 613 } 614 615 /*ARGSUSED*/ 616 static void 617 clnt_cots_kabort(CLIENT *h) 618 { 619 } 620 621 /* 622 * Return error info on this handle. 623 */ 624 static void 625 clnt_cots_kerror(CLIENT *h, struct rpc_err *err) 626 { 627 /* LINTED pointer alignment */ 628 cku_private_t *p = htop(h); 629 630 *err = p->cku_err; 631 } 632 633 static bool_t 634 clnt_cots_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 635 { 636 /* LINTED pointer alignment */ 637 cku_private_t *p = htop(h); 638 XDR *xdrs; 639 640 xdrs = &(p->cku_outxdr); 641 xdrs->x_op = XDR_FREE; 642 return ((*xdr_res)(xdrs, res_ptr)); 643 } 644 645 static bool_t 646 clnt_cots_kcontrol(CLIENT *h, int cmd, char *arg) 647 { 648 cku_private_t *p = htop(h); 649 650 switch (cmd) { 651 case CLSET_PROGRESS: 652 p->cku_progress = TRUE; 653 return (TRUE); 654 655 case CLSET_XID: 656 if (arg == NULL) 657 return (FALSE); 658 659 p->cku_xid = *((uint32_t *)arg); 660 return (TRUE); 661 662 case CLGET_XID: 663 if (arg == NULL) 664 return (FALSE); 665 666 *((uint32_t *)arg) = p->cku_xid; 667 return (TRUE); 668 669 case CLSET_NODELAYONERR: 670 if (arg == NULL) 671 return (FALSE); 672 673 if (*((bool_t *)arg) == TRUE) { 674 p->cku_nodelayonerr = TRUE; 675 return (TRUE); 676 } 677 if (*((bool_t *)arg) == FALSE) { 678 p->cku_nodelayonerr = FALSE; 679 return (TRUE); 680 } 681 return (FALSE); 682 683 case CLGET_NODELAYONERR: 684 if (arg == NULL) 685 return (FALSE); 686 687 *((bool_t *)arg) = p->cku_nodelayonerr; 688 return (TRUE); 689 690 case CLSET_BINDRESVPORT: 691 if (arg == NULL) 692 return (FALSE); 693 694 if (*(int *)arg != 1 && *(int *)arg != 0) 695 return (FALSE); 696 697 p->cku_useresvport = *(int *)arg; 698 699 return (TRUE); 700 701 case CLGET_BINDRESVPORT: 702 if (arg == NULL) 703 return (FALSE); 704 705 *(int *)arg = p->cku_useresvport; 706 707 return (TRUE); 708 709 default: 710 return (FALSE); 711 } 712 } 713 714 /* 715 * Destroy rpc handle. Frees the space used for output buffer, 716 * private data, and handle structure. 717 */ 718 static void 719 clnt_cots_kdestroy(CLIENT *h) 720 { 721 /* LINTED pointer alignment */ 722 cku_private_t *p = htop(h); 723 calllist_t *call = &p->cku_call; 724 725 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h); 726 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p->cku_xid); 727 728 if (p->cku_flags & CKU_ONQUEUE) { 729 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x " 730 "from dispatch list\n", p->cku_xid); 731 call_table_remove(call); 732 } 733 734 if (call->call_reply) 735 freemsg(call->call_reply); 736 cv_destroy(&call->call_cv); 737 mutex_destroy(&call->call_lock); 738 739 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen); 740 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 741 kmem_free(p, sizeof (*p)); 742 } 743 744 static int clnt_cots_pulls; 745 #define RM_HDR_SIZE 4 /* record mark header size */ 746 747 /* 748 * Call remote procedure. 749 */ 750 static enum clnt_stat 751 clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 752 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 753 { 754 /* LINTED pointer alignment */ 755 cku_private_t *p = htop(h); 756 calllist_t *call = &p->cku_call; 757 XDR *xdrs; 758 struct rpc_msg reply_msg; 759 mblk_t *mp; 760 #ifdef RPCDEBUG 761 clock_t time_sent; 762 #endif 763 struct netbuf *retryaddr; 764 struct cm_xprt *cm_entry = NULL; 765 queue_t *wq; 766 int len, waitsecs, max_waitsecs; 767 int mpsize; 768 int refreshes = REFRESHES; 769 int interrupted; 770 int tidu_size; 771 enum clnt_stat status; 772 struct timeval cwait; 773 bool_t delay_first = FALSE; 774 clock_t ticks; 775 776 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum); 777 COTSRCSTAT_INCR(p->cku_stats, rccalls); 778 779 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec); 780 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec); 781 /* 782 * Bug ID 1240234: 783 * Look out for zero length timeouts. We don't want to 784 * wait zero seconds for a connection to be established. 785 */ 786 if (wait.tv_sec < clnt_cots_min_conntout) { 787 cwait.tv_sec = clnt_cots_min_conntout; 788 cwait.tv_usec = 0; 789 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,", 790 wait.tv_sec); 791 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout); 792 } else { 793 cwait = wait; 794 } 795 796 call_again: 797 if (cm_entry) { 798 connmgr_release(cm_entry); 799 cm_entry = NULL; 800 } 801 802 mp = NULL; 803 804 /* 805 * If the call is not a retry, allocate a new xid and cache it 806 * for future retries. 807 * Bug ID 1246045: 808 * Treat call as a retry for purposes of binding the source 809 * port only if we actually attempted to send anything on 810 * the previous call. 811 */ 812 if (p->cku_xid == 0) { 813 p->cku_xid = alloc_xid(); 814 call->call_zoneid = rpc_zoneid(); 815 816 /* 817 * We need to ASSERT here that our xid != 0 because this 818 * determines whether or not our call record gets placed on 819 * the hash table or the linked list. By design, we mandate 820 * that RPC calls over cots must have xid's != 0, so we can 821 * ensure proper management of the hash table. 822 */ 823 ASSERT(p->cku_xid != 0); 824 825 retryaddr = NULL; 826 p->cku_flags &= ~CKU_SENT; 827 828 if (p->cku_flags & CKU_ONQUEUE) { 829 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old" 830 " one (%p)\n", (void *)call); 831 call_table_remove(call); 832 p->cku_flags &= ~CKU_ONQUEUE; 833 RPCLOG(64, "clnt_cots_kcallit: removing call from " 834 "dispatch list because xid was zero (now 0x%x)\n", 835 p->cku_xid); 836 } 837 838 if (call->call_reply != NULL) { 839 freemsg(call->call_reply); 840 call->call_reply = NULL; 841 } 842 } else if (p->cku_srcaddr.buf == NULL || p->cku_srcaddr.len == 0) { 843 retryaddr = NULL; 844 845 } else if (p->cku_flags & CKU_SENT) { 846 retryaddr = &p->cku_srcaddr; 847 848 } else { 849 /* 850 * Bug ID 1246045: Nothing was sent, so set retryaddr to 851 * NULL and let connmgr_get() bind to any source port it 852 * can get. 853 */ 854 retryaddr = NULL; 855 } 856 857 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p->cku_xid); 858 RPCLOG(64, " flags = 0x%x\n", p->cku_flags); 859 860 p->cku_err.re_status = RPC_TIMEDOUT; 861 p->cku_err.re_errno = p->cku_err.re_terrno = 0; 862 863 cm_entry = connmgr_wrapget(retryaddr, &cwait, p); 864 865 if (cm_entry == NULL) { 866 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n", 867 clnt_sperrno(p->cku_err.re_status)); 868 869 /* 870 * The reasons why we fail to create a connection are 871 * varied. In most cases we don't want the caller to 872 * immediately retry. This could have one or more 873 * bad effects. This includes flooding the net with 874 * connect requests to ports with no listener; a hard 875 * kernel loop due to all the "reserved" TCP ports being 876 * in use. 877 */ 878 delay_first = TRUE; 879 880 /* 881 * Even if we end up returning EINTR, we still count a 882 * a "can't connect", because the connection manager 883 * might have been committed to waiting for or timing out on 884 * a connection. 885 */ 886 COTSRCSTAT_INCR(p->cku_stats, rccantconn); 887 switch (p->cku_err.re_status) { 888 case RPC_INTR: 889 p->cku_err.re_errno = EINTR; 890 891 /* 892 * No need to delay because a UNIX signal(2) 893 * interrupted us. The caller likely won't 894 * retry the CLNT_CALL() and even if it does, 895 * we assume the caller knows what it is doing. 896 */ 897 delay_first = FALSE; 898 break; 899 900 case RPC_TIMEDOUT: 901 p->cku_err.re_errno = ETIMEDOUT; 902 903 /* 904 * No need to delay because timed out already 905 * on the connection request and assume that the 906 * transport time out is longer than our minimum 907 * timeout, or least not too much smaller. 908 */ 909 delay_first = FALSE; 910 break; 911 912 case RPC_SYSTEMERROR: 913 case RPC_TLIERROR: 914 /* 915 * We want to delay here because a transient 916 * system error has a better chance of going away 917 * if we delay a bit. If it's not transient, then 918 * we don't want end up in a hard kernel loop 919 * due to retries. 920 */ 921 ASSERT(p->cku_err.re_errno != 0); 922 break; 923 924 925 case RPC_CANTCONNECT: 926 /* 927 * RPC_CANTCONNECT is set on T_ERROR_ACK which 928 * implies some error down in the TCP layer or 929 * below. If cku_nodelayonerror is set then we 930 * assume the caller knows not to try too hard. 931 */ 932 RPCLOG0(8, "clnt_cots_kcallit: connection failed,"); 933 RPCLOG0(8, " re_status=RPC_CANTCONNECT,"); 934 RPCLOG(8, " re_errno=%d,", p->cku_err.re_errno); 935 RPCLOG(8, " cku_nodelayonerr=%d", p->cku_nodelayonerr); 936 if (p->cku_nodelayonerr == TRUE) 937 delay_first = FALSE; 938 939 p->cku_err.re_errno = EIO; 940 941 break; 942 943 case RPC_XPRTFAILED: 944 /* 945 * We want to delay here because we likely 946 * got a refused connection. 947 */ 948 if (p->cku_err.re_errno == 0) 949 p->cku_err.re_errno = EIO; 950 951 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n", 952 p->cku_err.re_errno); 953 954 break; 955 956 default: 957 /* 958 * We delay here because it is better to err 959 * on the side of caution. If we got here then 960 * status could have been RPC_SUCCESS, but we 961 * know that we did not get a connection, so 962 * force the rpc status to RPC_CANTCONNECT. 963 */ 964 p->cku_err.re_status = RPC_CANTCONNECT; 965 p->cku_err.re_errno = EIO; 966 break; 967 } 968 if (delay_first == TRUE) 969 ticks = clnt_cots_min_tout * drv_usectohz(1000000); 970 goto cots_done; 971 } 972 973 /* 974 * If we've never sent any request on this connection (send count 975 * is zero, or the connection has been reset), cache the 976 * the connection's create time and send a request (possibly a retry) 977 */ 978 if ((p->cku_flags & CKU_SENT) == 0 || 979 p->cku_ctime != cm_entry->x_ctime) { 980 p->cku_ctime = cm_entry->x_ctime; 981 982 } else if ((p->cku_flags & CKU_SENT) && (p->cku_flags & CKU_ONQUEUE) && 983 (call->call_reply != NULL || 984 p->cku_recv_attempts < clnt_cots_maxrecv)) { 985 986 /* 987 * If we've sent a request and our call is on the dispatch 988 * queue and we haven't made too many receive attempts, then 989 * don't re-send, just receive. 990 */ 991 p->cku_recv_attempts++; 992 goto read_again; 993 } 994 995 /* 996 * Now we create the RPC request in a STREAMS message. We have to do 997 * this after the call to connmgr_get so that we have the correct 998 * TIDU size for the transport. 999 */ 1000 tidu_size = cm_entry->x_tidu_size; 1001 len = MSG_OFFSET + MAX(tidu_size, RM_HDR_SIZE + WIRE_HDR_SIZE); 1002 1003 while ((mp = allocb(len, BPRI_MED)) == NULL) { 1004 if (strwaitbuf(len, BPRI_MED)) { 1005 p->cku_err.re_status = RPC_SYSTEMERROR; 1006 p->cku_err.re_errno = ENOSR; 1007 COTSRCSTAT_INCR(p->cku_stats, rcnomem); 1008 goto cots_done; 1009 } 1010 } 1011 xdrs = &p->cku_outxdr; 1012 xdrmblk_init(xdrs, mp, XDR_ENCODE, tidu_size); 1013 mpsize = MBLKSIZE(mp); 1014 ASSERT(mpsize >= len); 1015 ASSERT(mp->b_rptr == mp->b_datap->db_base); 1016 1017 /* 1018 * If the size of mblk is not appreciably larger than what we 1019 * asked, then resize the mblk to exactly len bytes. The reason for 1020 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 1021 * (from TCP over ethernet), and the arguments to the RPC require 1022 * 2800 bytes. Ideally we want the protocol to render two 1023 * ~1400 byte segments over the wire. However if allocb() gives us a 2k 1024 * mblk, and we allocate a second mblk for the remainder, the protocol 1025 * module may generate 3 segments over the wire: 1026 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and 1027 * 892 for the third. If we "waste" 448 bytes in the first mblk, 1028 * the XDR encoding will generate two ~1400 byte mblks, and the 1029 * protocol module is more likely to produce properly sized segments. 1030 */ 1031 if ((mpsize >> 1) <= len) 1032 mp->b_rptr += (mpsize - len); 1033 1034 /* 1035 * Adjust b_rptr to reserve space for the non-data protocol headers 1036 * any downstream modules might like to add, and for the 1037 * record marking header. 1038 */ 1039 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 1040 1041 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 1042 /* Copy in the preserialized RPC header information. */ 1043 bcopy(p->cku_rpchdr, mp->b_rptr, WIRE_HDR_SIZE); 1044 1045 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */ 1046 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base + 1047 WIRE_HDR_SIZE)); 1048 1049 ASSERT((mp->b_wptr - mp->b_rptr) == WIRE_HDR_SIZE); 1050 1051 /* Serialize the procedure number and the arguments. */ 1052 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 1053 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 1054 (!(*xdr_args)(xdrs, argsp))) { 1055 p->cku_err.re_status = RPC_CANTENCODEARGS; 1056 p->cku_err.re_errno = EIO; 1057 goto cots_done; 1058 } 1059 1060 (*(uint32_t *)(mp->b_rptr)) = p->cku_xid; 1061 } else { 1062 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[WIRE_HDR_SIZE]; 1063 IXDR_PUT_U_INT32(uproc, procnum); 1064 1065 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 1066 1067 /* Use XDR_SETPOS() to set the b_wptr. */ 1068 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 1069 1070 /* Serialize the procedure number and the arguments. */ 1071 if (!AUTH_WRAP(h->cl_auth, p->cku_rpchdr, WIRE_HDR_SIZE+4, 1072 xdrs, xdr_args, argsp)) { 1073 p->cku_err.re_status = RPC_CANTENCODEARGS; 1074 p->cku_err.re_errno = EIO; 1075 goto cots_done; 1076 } 1077 } 1078 1079 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n", 1080 tidu_size); 1081 1082 wq = cm_entry->x_wq; 1083 waitsecs = 0; 1084 1085 dispatch_again: 1086 status = clnt_dispatch_send(wq, mp, call, p->cku_xid, 1087 (p->cku_flags & CKU_ONQUEUE)); 1088 1089 if ((status == RPC_CANTSEND) && (call->call_reason == ENOBUFS)) { 1090 /* 1091 * QFULL condition, allow some time for queue to drain 1092 * and try again. Give up after waiting for all timeout 1093 * specified for the call, or zone is going away. 1094 */ 1095 max_waitsecs = wait.tv_sec ? wait.tv_sec : clnt_cots_min_tout; 1096 if ((waitsecs++ < max_waitsecs) && 1097 !(zone_status_get(curproc->p_zone) >= 1098 ZONE_IS_SHUTTING_DOWN)) { 1099 1100 /* wait 1 sec for queue to drain */ 1101 if (clnt_delay(drv_usectohz(1000000), 1102 h->cl_nosignal) == EINTR) { 1103 p->cku_err.re_errno = EINTR; 1104 p->cku_err.re_status = RPC_INTR; 1105 1106 goto cots_done; 1107 } 1108 1109 /* and try again */ 1110 goto dispatch_again; 1111 } 1112 p->cku_err.re_status = status; 1113 p->cku_err.re_errno = call->call_reason; 1114 DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend); 1115 1116 goto cots_done; 1117 } 1118 1119 if (waitsecs) { 1120 /* adjust timeout to account for time wait to send */ 1121 wait.tv_sec -= waitsecs; 1122 if (wait.tv_sec < 0) { 1123 /* pick up reply on next retry */ 1124 wait.tv_sec = 0; 1125 } 1126 DTRACE_PROBE2(clnt_cots__sendwait, CLIENT *, h, 1127 int, waitsecs); 1128 } 1129 1130 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n", 1131 (uint_t)p->cku_xid); 1132 p->cku_flags = (CKU_ONQUEUE|CKU_SENT); 1133 p->cku_recv_attempts = 1; 1134 1135 #ifdef RPCDEBUG 1136 time_sent = lbolt; 1137 #endif 1138 1139 /* 1140 * Wait for a reply or a timeout. If there is no error or timeout, 1141 * (both indicated by call_status), call->call_reply will contain 1142 * the RPC reply message. 1143 */ 1144 read_again: 1145 mutex_enter(&call->call_lock); 1146 interrupted = 0; 1147 if (call->call_status == RPC_TIMEDOUT) { 1148 /* 1149 * Indicate that the lwp is not to be stopped while waiting 1150 * for this network traffic. This is to avoid deadlock while 1151 * debugging a process via /proc and also to avoid recursive 1152 * mutex_enter()s due to NFS page faults while stopping 1153 * (NFS holds locks when it calls here). 1154 */ 1155 clock_t cv_wait_ret; 1156 clock_t timout; 1157 clock_t oldlbolt; 1158 1159 klwp_t *lwp = ttolwp(curthread); 1160 1161 if (lwp != NULL) 1162 lwp->lwp_nostop++; 1163 1164 oldlbolt = lbolt; 1165 timout = wait.tv_sec * drv_usectohz(1000000) + 1166 drv_usectohz(wait.tv_usec) + oldlbolt; 1167 /* 1168 * Iterate until the call_status is changed to something 1169 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns 1170 * something <=0 zero. The latter means that we timed 1171 * out. 1172 */ 1173 if (h->cl_nosignal) 1174 while ((cv_wait_ret = cv_timedwait(&call->call_cv, 1175 &call->call_lock, timout)) > 0 && 1176 call->call_status == RPC_TIMEDOUT) 1177 ; 1178 else 1179 while ((cv_wait_ret = cv_timedwait_sig( 1180 &call->call_cv, 1181 &call->call_lock, timout)) > 0 && 1182 call->call_status == RPC_TIMEDOUT) 1183 ; 1184 1185 switch (cv_wait_ret) { 1186 case 0: 1187 /* 1188 * If we got out of the above loop with 1189 * cv_timedwait_sig() returning 0, then we were 1190 * interrupted regardless what call_status is. 1191 */ 1192 interrupted = 1; 1193 break; 1194 case -1: 1195 /* cv_timedwait_sig() timed out */ 1196 break; 1197 default: 1198 1199 /* 1200 * We were cv_signaled(). If we didn't 1201 * get a successful call_status and returned 1202 * before time expired, delay up to clnt_cots_min_tout 1203 * seconds so that the caller doesn't immediately 1204 * try to call us again and thus force the 1205 * same condition that got us here (such 1206 * as a RPC_XPRTFAILED due to the server not 1207 * listening on the end-point. 1208 */ 1209 if (call->call_status != RPC_SUCCESS) { 1210 clock_t curlbolt; 1211 clock_t diff; 1212 1213 curlbolt = ddi_get_lbolt(); 1214 ticks = clnt_cots_min_tout * 1215 drv_usectohz(1000000); 1216 diff = curlbolt - oldlbolt; 1217 if (diff < ticks) { 1218 delay_first = TRUE; 1219 if (diff > 0) 1220 ticks -= diff; 1221 } 1222 } 1223 break; 1224 } 1225 1226 if (lwp != NULL) 1227 lwp->lwp_nostop--; 1228 } 1229 /* 1230 * Get the reply message, if any. This will be freed at the end 1231 * whether or not an error occurred. 1232 */ 1233 mp = call->call_reply; 1234 call->call_reply = NULL; 1235 1236 /* 1237 * call_err is the error info when the call is on dispatch queue. 1238 * cku_err is the error info returned to the caller. 1239 * Sync cku_err with call_err for local message processing. 1240 */ 1241 1242 status = call->call_status; 1243 p->cku_err = call->call_err; 1244 mutex_exit(&call->call_lock); 1245 1246 if (status != RPC_SUCCESS) { 1247 switch (status) { 1248 case RPC_TIMEDOUT: 1249 if (interrupted) { 1250 COTSRCSTAT_INCR(p->cku_stats, rcintrs); 1251 p->cku_err.re_status = RPC_INTR; 1252 p->cku_err.re_errno = EINTR; 1253 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x", 1254 p->cku_xid); 1255 RPCLOG(1, "signal interrupted at %ld", lbolt); 1256 RPCLOG(1, ", was sent at %ld\n", time_sent); 1257 } else { 1258 COTSRCSTAT_INCR(p->cku_stats, rctimeouts); 1259 p->cku_err.re_errno = ETIMEDOUT; 1260 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld", 1261 lbolt); 1262 RPCLOG(1, ", was sent at %ld\n", time_sent); 1263 } 1264 break; 1265 1266 case RPC_XPRTFAILED: 1267 if (p->cku_err.re_errno == 0) 1268 p->cku_err.re_errno = EIO; 1269 1270 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n", 1271 p->cku_err.re_errno); 1272 break; 1273 1274 case RPC_SYSTEMERROR: 1275 ASSERT(p->cku_err.re_errno); 1276 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n", 1277 p->cku_err.re_errno); 1278 break; 1279 1280 default: 1281 p->cku_err.re_status = RPC_SYSTEMERROR; 1282 p->cku_err.re_errno = EIO; 1283 RPCLOG(1, "clnt_cots_kcallit: error: %s\n", 1284 clnt_sperrno(status)); 1285 break; 1286 } 1287 if (p->cku_err.re_status != RPC_TIMEDOUT) { 1288 1289 if (p->cku_flags & CKU_ONQUEUE) { 1290 call_table_remove(call); 1291 p->cku_flags &= ~CKU_ONQUEUE; 1292 } 1293 1294 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x " 1295 "taken off dispatch list\n", p->cku_xid); 1296 if (call->call_reply) { 1297 freemsg(call->call_reply); 1298 call->call_reply = NULL; 1299 } 1300 } else if (wait.tv_sec != 0) { 1301 /* 1302 * We've sent the request over TCP and so we have 1303 * every reason to believe it will get 1304 * delivered. In which case returning a timeout is not 1305 * appropriate. 1306 */ 1307 if (p->cku_progress == TRUE && 1308 p->cku_recv_attempts < clnt_cots_maxrecv) { 1309 p->cku_err.re_status = RPC_INPROGRESS; 1310 } 1311 } 1312 goto cots_done; 1313 } 1314 1315 xdrs = &p->cku_inxdr; 1316 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 1317 1318 reply_msg.rm_direction = REPLY; 1319 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1320 reply_msg.acpted_rply.ar_stat = SUCCESS; 1321 1322 reply_msg.acpted_rply.ar_verf = _null_auth; 1323 /* 1324 * xdr_results will be done in AUTH_UNWRAP. 1325 */ 1326 reply_msg.acpted_rply.ar_results.where = NULL; 1327 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1328 1329 if (xdr_replymsg(xdrs, &reply_msg)) { 1330 enum clnt_stat re_status; 1331 1332 _seterr_reply(&reply_msg, &p->cku_err); 1333 1334 re_status = p->cku_err.re_status; 1335 if (re_status == RPC_SUCCESS) { 1336 /* 1337 * Reply is good, check auth. 1338 */ 1339 if (!AUTH_VALIDATE(h->cl_auth, 1340 &reply_msg.acpted_rply.ar_verf)) { 1341 COTSRCSTAT_INCR(p->cku_stats, rcbadverfs); 1342 RPCLOG0(1, "clnt_cots_kcallit: validation " 1343 "failure\n"); 1344 freemsg(mp); 1345 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1346 mutex_enter(&call->call_lock); 1347 if (call->call_reply == NULL) 1348 call->call_status = RPC_TIMEDOUT; 1349 mutex_exit(&call->call_lock); 1350 goto read_again; 1351 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs, 1352 xdr_results, resultsp)) { 1353 RPCLOG0(1, "clnt_cots_kcallit: validation " 1354 "failure (unwrap)\n"); 1355 p->cku_err.re_status = RPC_CANTDECODERES; 1356 p->cku_err.re_errno = EIO; 1357 } 1358 } else { 1359 /* set errno in case we can't recover */ 1360 if (re_status != RPC_VERSMISMATCH && 1361 re_status != RPC_AUTHERROR && 1362 re_status != RPC_PROGVERSMISMATCH) 1363 p->cku_err.re_errno = EIO; 1364 1365 if (re_status == RPC_AUTHERROR) { 1366 /* 1367 * Maybe our credential need to be refreshed 1368 */ 1369 if (cm_entry) { 1370 /* 1371 * There is the potential that the 1372 * cm_entry has/will be marked dead, 1373 * so drop the connection altogether, 1374 * force REFRESH to establish new 1375 * connection. 1376 */ 1377 connmgr_cancelconn(cm_entry); 1378 cm_entry = NULL; 1379 } 1380 1381 if ((refreshes > 0) && 1382 AUTH_REFRESH(h->cl_auth, &reply_msg, 1383 p->cku_cred)) { 1384 refreshes--; 1385 (void) xdr_rpc_free_verifier(xdrs, 1386 &reply_msg); 1387 freemsg(mp); 1388 mp = NULL; 1389 1390 if (p->cku_flags & CKU_ONQUEUE) { 1391 call_table_remove(call); 1392 p->cku_flags &= ~CKU_ONQUEUE; 1393 } 1394 1395 RPCLOG(64, 1396 "clnt_cots_kcallit: AUTH_ERROR, xid" 1397 " 0x%x removed off dispatch list\n", 1398 p->cku_xid); 1399 if (call->call_reply) { 1400 freemsg(call->call_reply); 1401 call->call_reply = NULL; 1402 } 1403 1404 COTSRCSTAT_INCR(p->cku_stats, 1405 rcbadcalls); 1406 COTSRCSTAT_INCR(p->cku_stats, 1407 rcnewcreds); 1408 goto call_again; 1409 } 1410 1411 /* 1412 * We have used the client handle to 1413 * do an AUTH_REFRESH and the RPC status may 1414 * be set to RPC_SUCCESS; Let's make sure to 1415 * set it to RPC_AUTHERROR. 1416 */ 1417 p->cku_err.re_status = RPC_AUTHERROR; 1418 1419 /* 1420 * Map recoverable and unrecoverable 1421 * authentication errors to appropriate errno 1422 */ 1423 switch (p->cku_err.re_why) { 1424 case AUTH_TOOWEAK: 1425 /* 1426 * This could be a failure where the 1427 * server requires use of a reserved 1428 * port, check and optionally set the 1429 * client handle useresvport trying 1430 * one more time. Next go round we 1431 * fall out with the tooweak error. 1432 */ 1433 if (p->cku_useresvport != 1) { 1434 p->cku_useresvport = 1; 1435 p->cku_xid = 0; 1436 (void) xdr_rpc_free_verifier 1437 (xdrs, &reply_msg); 1438 freemsg(mp); 1439 goto call_again; 1440 } 1441 /* FALLTHRU */ 1442 case AUTH_BADCRED: 1443 case AUTH_BADVERF: 1444 case AUTH_INVALIDRESP: 1445 case AUTH_FAILED: 1446 case RPCSEC_GSS_NOCRED: 1447 case RPCSEC_GSS_FAILED: 1448 p->cku_err.re_errno = EACCES; 1449 break; 1450 case AUTH_REJECTEDCRED: 1451 case AUTH_REJECTEDVERF: 1452 default: p->cku_err.re_errno = EIO; 1453 break; 1454 } 1455 RPCLOG(1, "clnt_cots_kcallit : authentication" 1456 " failed with RPC_AUTHERROR of type %d\n", 1457 (int)p->cku_err.re_why); 1458 } 1459 } 1460 } else { 1461 /* reply didn't decode properly. */ 1462 p->cku_err.re_status = RPC_CANTDECODERES; 1463 p->cku_err.re_errno = EIO; 1464 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n"); 1465 } 1466 1467 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1468 1469 if (p->cku_flags & CKU_ONQUEUE) { 1470 call_table_remove(call); 1471 p->cku_flags &= ~CKU_ONQUEUE; 1472 } 1473 1474 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list", 1475 p->cku_xid); 1476 RPCLOG(64, " status is %s\n", clnt_sperrno(p->cku_err.re_status)); 1477 cots_done: 1478 if (cm_entry) 1479 connmgr_release(cm_entry); 1480 1481 if (mp != NULL) 1482 freemsg(mp); 1483 if ((p->cku_flags & CKU_ONQUEUE) == 0 && call->call_reply) { 1484 freemsg(call->call_reply); 1485 call->call_reply = NULL; 1486 } 1487 if (p->cku_err.re_status != RPC_SUCCESS) { 1488 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n"); 1489 COTSRCSTAT_INCR(p->cku_stats, rcbadcalls); 1490 } 1491 1492 /* 1493 * No point in delaying if the zone is going away. 1494 */ 1495 if (delay_first == TRUE && 1496 !(zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)) { 1497 if (clnt_delay(ticks, h->cl_nosignal) == EINTR) { 1498 p->cku_err.re_errno = EINTR; 1499 p->cku_err.re_status = RPC_INTR; 1500 } 1501 } 1502 return (p->cku_err.re_status); 1503 } 1504 1505 /* 1506 * Kinit routine for cots. This sets up the correct operations in 1507 * the client handle, as the handle may have previously been a clts 1508 * handle, and clears the xid field so there is no way a new call 1509 * could be mistaken for a retry. It also sets in the handle the 1510 * information that is passed at create/kinit time but needed at 1511 * call time, as cots creates the transport at call time - device, 1512 * address of the server, protocol family. 1513 */ 1514 void 1515 clnt_cots_kinit(CLIENT *h, dev_t dev, int family, struct netbuf *addr, 1516 int max_msgsize, cred_t *cred) 1517 { 1518 /* LINTED pointer alignment */ 1519 cku_private_t *p = htop(h); 1520 calllist_t *call = &p->cku_call; 1521 1522 h->cl_ops = &tcp_ops; 1523 if (p->cku_flags & CKU_ONQUEUE) { 1524 call_table_remove(call); 1525 p->cku_flags &= ~CKU_ONQUEUE; 1526 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from" 1527 " dispatch list\n", p->cku_xid); 1528 } 1529 1530 if (call->call_reply != NULL) { 1531 freemsg(call->call_reply); 1532 call->call_reply = NULL; 1533 } 1534 1535 call->call_bucket = NULL; 1536 call->call_hash = 0; 1537 1538 /* 1539 * We don't clear cku_flags here, because clnt_cots_kcallit() 1540 * takes care of handling the cku_flags reset. 1541 */ 1542 p->cku_xid = 0; 1543 p->cku_device = dev; 1544 p->cku_addrfmly = family; 1545 p->cku_cred = cred; 1546 1547 if (p->cku_addr.maxlen < addr->len) { 1548 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 1549 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 1550 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 1551 p->cku_addr.maxlen = addr->maxlen; 1552 } 1553 1554 p->cku_addr.len = addr->len; 1555 bcopy(addr->buf, p->cku_addr.buf, addr->len); 1556 1557 /* 1558 * If the current sanity check size in rpcmod is smaller 1559 * than the size needed, then increase the sanity check. 1560 */ 1561 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 1562 max_msgsize > *clnt_max_msg_sizep) { 1563 mutex_enter(&clnt_max_msg_lock); 1564 if (max_msgsize > *clnt_max_msg_sizep) 1565 *clnt_max_msg_sizep = max_msgsize; 1566 mutex_exit(&clnt_max_msg_lock); 1567 } 1568 } 1569 1570 /* 1571 * ksettimers is a no-op for cots, with the exception of setting the xid. 1572 */ 1573 /* ARGSUSED */ 1574 static int 1575 clnt_cots_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1576 int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg, 1577 uint32_t xid) 1578 { 1579 /* LINTED pointer alignment */ 1580 cku_private_t *p = htop(h); 1581 1582 if (xid) 1583 p->cku_xid = xid; 1584 COTSRCSTAT_INCR(p->cku_stats, rctimers); 1585 return (0); 1586 } 1587 1588 extern void rpc_poptimod(struct vnode *); 1589 extern int kstr_push(struct vnode *, char *); 1590 1591 int 1592 conn_kstat_update(kstat_t *ksp, int rw) 1593 { 1594 struct cm_xprt *cm_entry; 1595 struct cm_kstat_xprt *cm_ksp_data; 1596 uchar_t *b; 1597 char *fbuf; 1598 1599 if (rw == KSTAT_WRITE) 1600 return (EACCES); 1601 if (ksp == NULL || ksp->ks_private == NULL) 1602 return (EIO); 1603 cm_entry = (struct cm_xprt *)ksp->ks_private; 1604 cm_ksp_data = (struct cm_kstat_xprt *)ksp->ks_data; 1605 1606 cm_ksp_data->x_wq.value.ui32 = (uint32_t)(uintptr_t)cm_entry->x_wq; 1607 cm_ksp_data->x_family.value.ui32 = cm_entry->x_family; 1608 cm_ksp_data->x_rdev.value.ui32 = (uint32_t)cm_entry->x_rdev; 1609 cm_ksp_data->x_time.value.ui32 = cm_entry->x_time; 1610 cm_ksp_data->x_ref.value.ui32 = cm_entry->x_ref; 1611 cm_ksp_data->x_state.value.ui32 = cm_entry->x_state_flags; 1612 1613 if (cm_entry->x_server.buf) { 1614 fbuf = cm_ksp_data->x_server.value.str.addr.ptr; 1615 if (cm_entry->x_family == AF_INET && 1616 cm_entry->x_server.len == 1617 sizeof (struct sockaddr_in)) { 1618 struct sockaddr_in *sa; 1619 sa = (struct sockaddr_in *) 1620 cm_entry->x_server.buf; 1621 b = (uchar_t *)&sa->sin_addr; 1622 (void) sprintf(fbuf, 1623 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1624 b[2] & 0xFF, b[3] & 0xFF); 1625 cm_ksp_data->x_port.value.ui32 = 1626 (uint32_t)sa->sin_port; 1627 } else if (cm_entry->x_family == AF_INET6 && 1628 cm_entry->x_server.len >= 1629 sizeof (struct sockaddr_in6)) { 1630 /* extract server IP address & port */ 1631 struct sockaddr_in6 *sin6; 1632 sin6 = (struct sockaddr_in6 *)cm_entry->x_server.buf; 1633 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, fbuf, 1634 INET6_ADDRSTRLEN); 1635 cm_ksp_data->x_port.value.ui32 = sin6->sin6_port; 1636 } else { 1637 struct sockaddr_in *sa; 1638 1639 sa = (struct sockaddr_in *)cm_entry->x_server.buf; 1640 b = (uchar_t *)&sa->sin_addr; 1641 (void) sprintf(fbuf, 1642 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1643 b[2] & 0xFF, b[3] & 0xFF); 1644 } 1645 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data->x_server) = 1646 strlen(fbuf) + 1; 1647 } 1648 1649 return (0); 1650 } 1651 1652 1653 /* 1654 * We want a version of delay which is interruptible by a UNIX signal 1655 * Return EINTR if an interrupt occured. 1656 */ 1657 static int 1658 clnt_delay(clock_t ticks, bool_t nosignal) 1659 { 1660 if (nosignal == TRUE) { 1661 delay(ticks); 1662 return (0); 1663 } 1664 return (delay_sig(ticks)); 1665 } 1666 1667 /* 1668 * Wait for a connection until a timeout, or until we are 1669 * signalled that there has been a connection state change. 1670 */ 1671 static enum clnt_stat 1672 connmgr_cwait(struct cm_xprt *cm_entry, const struct timeval *waitp, 1673 bool_t nosignal) 1674 { 1675 bool_t interrupted; 1676 clock_t timout, cv_stat; 1677 enum clnt_stat clstat; 1678 unsigned int old_state; 1679 1680 ASSERT(MUTEX_HELD(&connmgr_lock)); 1681 /* 1682 * We wait for the transport connection to be made, or an 1683 * indication that it could not be made. 1684 */ 1685 clstat = RPC_TIMEDOUT; 1686 interrupted = FALSE; 1687 1688 old_state = cm_entry->x_state_flags; 1689 /* 1690 * Now loop until cv_timedwait{_sig} returns because of 1691 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be 1692 * cv_signalled for various other reasons too. So loop 1693 * until there is a state change on the connection. 1694 */ 1695 1696 timout = waitp->tv_sec * drv_usectohz(1000000) + 1697 drv_usectohz(waitp->tv_usec) + lbolt; 1698 1699 if (nosignal) { 1700 while ((cv_stat = cv_timedwait(&cm_entry->x_conn_cv, 1701 &connmgr_lock, timout)) > 0 && 1702 cm_entry->x_state_flags == old_state) 1703 ; 1704 } else { 1705 while ((cv_stat = cv_timedwait_sig(&cm_entry->x_conn_cv, 1706 &connmgr_lock, timout)) > 0 && 1707 cm_entry->x_state_flags == old_state) 1708 ; 1709 1710 if (cv_stat == 0) /* got intr signal? */ 1711 interrupted = TRUE; 1712 } 1713 1714 if ((cm_entry->x_state_flags & (X_BADSTATES|X_CONNECTED)) == 1715 X_CONNECTED) { 1716 clstat = RPC_SUCCESS; 1717 } else { 1718 if (interrupted == TRUE) 1719 clstat = RPC_INTR; 1720 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n", 1721 clnt_sperrno(clstat)); 1722 } 1723 1724 return (clstat); 1725 } 1726 1727 /* 1728 * Primary interface for how RPC grabs a connection. 1729 */ 1730 static struct cm_xprt * 1731 connmgr_wrapget( 1732 struct netbuf *retryaddr, 1733 const struct timeval *waitp, 1734 cku_private_t *p) 1735 { 1736 struct cm_xprt *cm_entry; 1737 1738 cm_entry = connmgr_get(retryaddr, waitp, &p->cku_addr, p->cku_addrfmly, 1739 &p->cku_srcaddr, &p->cku_err, p->cku_device, 1740 p->cku_client.cl_nosignal, p->cku_useresvport, p->cku_cred); 1741 1742 if (cm_entry == NULL) { 1743 /* 1744 * Re-map the call status to RPC_INTR if the err code is 1745 * EINTR. This can happen if calls status is RPC_TLIERROR. 1746 * However, don't re-map if signalling has been turned off. 1747 * XXX Really need to create a separate thread whenever 1748 * there isn't an existing connection. 1749 */ 1750 if (p->cku_err.re_errno == EINTR) { 1751 if (p->cku_client.cl_nosignal == TRUE) 1752 p->cku_err.re_errno = EIO; 1753 else 1754 p->cku_err.re_status = RPC_INTR; 1755 } 1756 } 1757 1758 return (cm_entry); 1759 } 1760 1761 /* 1762 * Obtains a transport to the server specified in addr. If a suitable transport 1763 * does not already exist in the list of cached transports, a new connection 1764 * is created, connected, and added to the list. The connection is for sending 1765 * only - the reply message may come back on another transport connection. 1766 * 1767 * To implement round-robin load balancing with multiple client connections, 1768 * the last entry on the list is always selected. Once the entry is selected 1769 * it's re-inserted to the head of the list. 1770 */ 1771 static struct cm_xprt * 1772 connmgr_get( 1773 struct netbuf *retryaddr, 1774 const struct timeval *waitp, /* changed to a ptr to converse stack */ 1775 struct netbuf *destaddr, 1776 int addrfmly, 1777 struct netbuf *srcaddr, 1778 struct rpc_err *rpcerr, 1779 dev_t device, 1780 bool_t nosignal, 1781 int useresvport, 1782 cred_t *cr) 1783 { 1784 struct cm_xprt *cm_entry; 1785 struct cm_xprt *lru_entry; 1786 struct cm_xprt **cmp, **prev; 1787 queue_t *wq; 1788 TIUSER *tiptr; 1789 int i; 1790 int retval; 1791 int tidu_size; 1792 bool_t connected; 1793 zoneid_t zoneid = rpc_zoneid(); 1794 1795 /* 1796 * If the call is not a retry, look for a transport entry that 1797 * goes to the server of interest. 1798 */ 1799 mutex_enter(&connmgr_lock); 1800 1801 if (retryaddr == NULL) { 1802 use_new_conn: 1803 i = 0; 1804 cm_entry = lru_entry = NULL; 1805 1806 prev = cmp = &cm_hd; 1807 while ((cm_entry = *cmp) != NULL) { 1808 ASSERT(cm_entry != cm_entry->x_next); 1809 /* 1810 * Garbage collect conections that are marked 1811 * for needs disconnect. 1812 */ 1813 if (cm_entry->x_needdis) { 1814 CONN_HOLD(cm_entry); 1815 connmgr_dis_and_wait(cm_entry); 1816 connmgr_release(cm_entry); 1817 /* 1818 * connmgr_lock could have been 1819 * dropped for the disconnect 1820 * processing so start over. 1821 */ 1822 goto use_new_conn; 1823 } 1824 1825 /* 1826 * Garbage collect the dead connections that have 1827 * no threads working on them. 1828 */ 1829 if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) == 1830 X_DEAD) { 1831 mutex_enter(&cm_entry->x_lock); 1832 if (cm_entry->x_ref != 0) { 1833 /* 1834 * Currently in use. 1835 * Cleanup later. 1836 */ 1837 cmp = &cm_entry->x_next; 1838 mutex_exit(&cm_entry->x_lock); 1839 continue; 1840 } 1841 mutex_exit(&cm_entry->x_lock); 1842 *cmp = cm_entry->x_next; 1843 mutex_exit(&connmgr_lock); 1844 connmgr_close(cm_entry); 1845 mutex_enter(&connmgr_lock); 1846 goto use_new_conn; 1847 } 1848 1849 1850 if ((cm_entry->x_state_flags & X_BADSTATES) == 0 && 1851 cm_entry->x_zoneid == zoneid && 1852 cm_entry->x_rdev == device && 1853 destaddr->len == cm_entry->x_server.len && 1854 bcmp(destaddr->buf, cm_entry->x_server.buf, 1855 destaddr->len) == 0) { 1856 /* 1857 * If the matching entry isn't connected, 1858 * attempt to reconnect it. 1859 */ 1860 if (cm_entry->x_connected == FALSE) { 1861 /* 1862 * We don't go through trying 1863 * to find the least recently 1864 * used connected because 1865 * connmgr_reconnect() briefly 1866 * dropped the connmgr_lock, 1867 * allowing a window for our 1868 * accounting to be messed up. 1869 * In any case, a re-connected 1870 * connection is as good as 1871 * a LRU connection. 1872 */ 1873 return (connmgr_wrapconnect(cm_entry, 1874 waitp, destaddr, addrfmly, srcaddr, 1875 rpcerr, TRUE, nosignal, cr)); 1876 } 1877 i++; 1878 1879 /* keep track of the last entry */ 1880 lru_entry = cm_entry; 1881 prev = cmp; 1882 } 1883 cmp = &cm_entry->x_next; 1884 } 1885 1886 if (i > clnt_max_conns) { 1887 RPCLOG(8, "connmgr_get: too many conns, dooming entry" 1888 " %p\n", (void *)lru_entry->x_tiptr); 1889 lru_entry->x_doomed = TRUE; 1890 goto use_new_conn; 1891 } 1892 1893 /* 1894 * If we are at the maximum number of connections to 1895 * the server, hand back the least recently used one. 1896 */ 1897 if (i == clnt_max_conns) { 1898 /* 1899 * Copy into the handle the source address of 1900 * the connection, which we will use in case of 1901 * a later retry. 1902 */ 1903 if (srcaddr->len != lru_entry->x_src.len) { 1904 if (srcaddr->len > 0) 1905 kmem_free(srcaddr->buf, 1906 srcaddr->maxlen); 1907 srcaddr->buf = kmem_zalloc( 1908 lru_entry->x_src.len, KM_SLEEP); 1909 srcaddr->maxlen = srcaddr->len = 1910 lru_entry->x_src.len; 1911 } 1912 bcopy(lru_entry->x_src.buf, srcaddr->buf, srcaddr->len); 1913 RPCLOG(2, "connmgr_get: call going out on %p\n", 1914 (void *)lru_entry); 1915 lru_entry->x_time = lbolt; 1916 CONN_HOLD(lru_entry); 1917 1918 if ((i > 1) && (prev != &cm_hd)) { 1919 /* 1920 * remove and re-insert entry at head of list. 1921 */ 1922 *prev = lru_entry->x_next; 1923 lru_entry->x_next = cm_hd; 1924 cm_hd = lru_entry; 1925 } 1926 1927 mutex_exit(&connmgr_lock); 1928 return (lru_entry); 1929 } 1930 1931 } else { 1932 /* 1933 * This is the retry case (retryaddr != NULL). Retries must 1934 * be sent on the same source port as the original call. 1935 */ 1936 1937 /* 1938 * Walk the list looking for a connection with a source address 1939 * that matches the retry address. 1940 */ 1941 start_retry_loop: 1942 cmp = &cm_hd; 1943 while ((cm_entry = *cmp) != NULL) { 1944 ASSERT(cm_entry != cm_entry->x_next); 1945 1946 /* 1947 * determine if this connection matches the passed 1948 * in retry address. If it does not match, advance 1949 * to the next element on the list. 1950 */ 1951 if (zoneid != cm_entry->x_zoneid || 1952 device != cm_entry->x_rdev || 1953 retryaddr->len != cm_entry->x_src.len || 1954 bcmp(retryaddr->buf, cm_entry->x_src.buf, 1955 retryaddr->len) != 0) { 1956 cmp = &cm_entry->x_next; 1957 continue; 1958 } 1959 /* 1960 * Garbage collect conections that are marked 1961 * for needs disconnect. 1962 */ 1963 if (cm_entry->x_needdis) { 1964 CONN_HOLD(cm_entry); 1965 connmgr_dis_and_wait(cm_entry); 1966 connmgr_release(cm_entry); 1967 /* 1968 * connmgr_lock could have been 1969 * dropped for the disconnect 1970 * processing so start over. 1971 */ 1972 goto start_retry_loop; 1973 } 1974 /* 1975 * Garbage collect the dead connections that have 1976 * no threads working on them. 1977 */ 1978 if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) == 1979 X_DEAD) { 1980 mutex_enter(&cm_entry->x_lock); 1981 if (cm_entry->x_ref != 0) { 1982 /* 1983 * Currently in use. 1984 * Cleanup later. 1985 */ 1986 cmp = &cm_entry->x_next; 1987 mutex_exit(&cm_entry->x_lock); 1988 continue; 1989 } 1990 mutex_exit(&cm_entry->x_lock); 1991 *cmp = cm_entry->x_next; 1992 mutex_exit(&connmgr_lock); 1993 connmgr_close(cm_entry); 1994 mutex_enter(&connmgr_lock); 1995 goto start_retry_loop; 1996 } 1997 1998 /* 1999 * Sanity check: if the connection with our source 2000 * port is going to some other server, something went 2001 * wrong, as we never delete connections (i.e. release 2002 * ports) unless they have been idle. In this case, 2003 * it is probably better to send the call out using 2004 * a new source address than to fail it altogether, 2005 * since that port may never be released. 2006 */ 2007 if (destaddr->len != cm_entry->x_server.len || 2008 bcmp(destaddr->buf, cm_entry->x_server.buf, 2009 destaddr->len) != 0) { 2010 RPCLOG(1, "connmgr_get: tiptr %p" 2011 " is going to a different server" 2012 " with the port that belongs" 2013 " to us!\n", (void *)cm_entry->x_tiptr); 2014 retryaddr = NULL; 2015 goto use_new_conn; 2016 } 2017 2018 /* 2019 * If the connection of interest is not connected and we 2020 * can't reconnect it, then the server is probably 2021 * still down. Return NULL to the caller and let it 2022 * retry later if it wants to. We have a delay so the 2023 * machine doesn't go into a tight retry loop. If the 2024 * entry was already connected, or the reconnected was 2025 * successful, return this entry. 2026 */ 2027 if (cm_entry->x_connected == FALSE) { 2028 return (connmgr_wrapconnect(cm_entry, 2029 waitp, destaddr, addrfmly, NULL, 2030 rpcerr, TRUE, nosignal, cr)); 2031 } else { 2032 CONN_HOLD(cm_entry); 2033 2034 cm_entry->x_time = lbolt; 2035 mutex_exit(&connmgr_lock); 2036 RPCLOG(2, "connmgr_get: found old " 2037 "transport %p for retry\n", 2038 (void *)cm_entry); 2039 return (cm_entry); 2040 } 2041 } 2042 2043 /* 2044 * We cannot find an entry in the list for this retry. 2045 * Either the entry has been removed temporarily to be 2046 * reconnected by another thread, or the original call 2047 * got a port but never got connected, 2048 * and hence the transport never got put in the 2049 * list. Fall through to the "create new connection" code - 2050 * the former case will fail there trying to rebind the port, 2051 * and the later case (and any other pathological cases) will 2052 * rebind and reconnect and not hang the client machine. 2053 */ 2054 RPCLOG0(8, "connmgr_get: no entry in list for retry\n"); 2055 } 2056 /* 2057 * Set up a transport entry in the connection manager's list. 2058 */ 2059 cm_entry = (struct cm_xprt *) 2060 kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP); 2061 2062 cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP); 2063 bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len); 2064 cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len; 2065 2066 cm_entry->x_state_flags = X_THREAD; 2067 cm_entry->x_ref = 1; 2068 cm_entry->x_family = addrfmly; 2069 cm_entry->x_rdev = device; 2070 cm_entry->x_zoneid = zoneid; 2071 mutex_init(&cm_entry->x_lock, NULL, MUTEX_DEFAULT, NULL); 2072 cv_init(&cm_entry->x_cv, NULL, CV_DEFAULT, NULL); 2073 cv_init(&cm_entry->x_conn_cv, NULL, CV_DEFAULT, NULL); 2074 cv_init(&cm_entry->x_dis_cv, NULL, CV_DEFAULT, NULL); 2075 2076 /* 2077 * Note that we add this partially initialized entry to the 2078 * connection list. This is so that we don't have connections to 2079 * the same server. 2080 * 2081 * Note that x_src is not initialized at this point. This is because 2082 * retryaddr might be NULL in which case x_src is whatever 2083 * t_kbind/bindresvport gives us. If another thread wants a 2084 * connection to the same server, seemingly we have an issue, but we 2085 * don't. If the other thread comes in with retryaddr == NULL, then it 2086 * will never look at x_src, and it will end up waiting in 2087 * connmgr_cwait() for the first thread to finish the connection 2088 * attempt. If the other thread comes in with retryaddr != NULL, then 2089 * that means there was a request sent on a connection, in which case 2090 * the the connection should already exist. Thus the first thread 2091 * never gets here ... it finds the connection it its server in the 2092 * connection list. 2093 * 2094 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd 2095 * thread will skip us because x_src.len == 0. 2096 */ 2097 cm_entry->x_next = cm_hd; 2098 cm_hd = cm_entry; 2099 mutex_exit(&connmgr_lock); 2100 2101 /* 2102 * Either we didn't find an entry to the server of interest, or we 2103 * don't have the maximum number of connections to that server - 2104 * create a new connection. 2105 */ 2106 RPCLOG0(8, "connmgr_get: creating new connection\n"); 2107 rpcerr->re_status = RPC_TLIERROR; 2108 2109 i = t_kopen(NULL, device, FREAD|FWRITE|FNDELAY, &tiptr, zone_kcred()); 2110 if (i) { 2111 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i); 2112 rpcerr->re_errno = i; 2113 connmgr_cancelconn(cm_entry); 2114 return (NULL); 2115 } 2116 rpc_poptimod(tiptr->fp->f_vnode); 2117 2118 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0, 2119 K_TO_K, kcred, &retval)) { 2120 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i); 2121 (void) t_kclose(tiptr, 1); 2122 rpcerr->re_errno = i; 2123 connmgr_cancelconn(cm_entry); 2124 return (NULL); 2125 } 2126 2127 if (i = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K, 2128 kcred, &retval)) { 2129 RPCLOG(1, "connmgr_get: can't set client status with cots " 2130 "module, %d\n", i); 2131 (void) t_kclose(tiptr, 1); 2132 rpcerr->re_errno = i; 2133 connmgr_cancelconn(cm_entry); 2134 return (NULL); 2135 } 2136 2137 mutex_enter(&connmgr_lock); 2138 2139 wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next; 2140 cm_entry->x_wq = wq; 2141 2142 mutex_exit(&connmgr_lock); 2143 2144 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0, 2145 K_TO_K, kcred, &retval)) { 2146 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i); 2147 (void) t_kclose(tiptr, 1); 2148 rpcerr->re_errno = i; 2149 connmgr_cancelconn(cm_entry); 2150 return (NULL); 2151 } 2152 2153 /* 2154 * If the caller has not specified reserved port usage then 2155 * take the system default. 2156 */ 2157 if (useresvport == -1) 2158 useresvport = clnt_cots_do_bindresvport; 2159 2160 if ((useresvport || retryaddr != NULL) && 2161 (addrfmly == AF_INET || addrfmly == AF_INET6)) { 2162 bool_t alloc_src = FALSE; 2163 2164 if (srcaddr->len != destaddr->len) { 2165 kmem_free(srcaddr->buf, srcaddr->maxlen); 2166 srcaddr->buf = kmem_zalloc(destaddr->len, KM_SLEEP); 2167 srcaddr->maxlen = destaddr->len; 2168 srcaddr->len = destaddr->len; 2169 alloc_src = TRUE; 2170 } 2171 2172 if ((i = bindresvport(tiptr, retryaddr, srcaddr, TRUE)) != 0) { 2173 (void) t_kclose(tiptr, 1); 2174 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: " 2175 "%p\n", (void *)retryaddr); 2176 2177 /* 2178 * 1225408: If we allocated a source address, then it 2179 * is either garbage or all zeroes. In that case 2180 * we need to clear srcaddr. 2181 */ 2182 if (alloc_src == TRUE) { 2183 kmem_free(srcaddr->buf, srcaddr->maxlen); 2184 srcaddr->maxlen = srcaddr->len = 0; 2185 srcaddr->buf = NULL; 2186 } 2187 rpcerr->re_errno = i; 2188 connmgr_cancelconn(cm_entry); 2189 return (NULL); 2190 } 2191 } else { 2192 if ((i = t_kbind(tiptr, NULL, NULL)) != 0) { 2193 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i); 2194 (void) t_kclose(tiptr, 1); 2195 rpcerr->re_errno = i; 2196 connmgr_cancelconn(cm_entry); 2197 return (NULL); 2198 } 2199 } 2200 2201 { 2202 /* 2203 * Keep the kernel stack lean. Don't move this call 2204 * declaration to the top of this function because a 2205 * call is declared in connmgr_wrapconnect() 2206 */ 2207 calllist_t call; 2208 2209 bzero(&call, sizeof (call)); 2210 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2211 2212 /* 2213 * This is a bound end-point so don't close it's stream. 2214 */ 2215 connected = connmgr_connect(cm_entry, wq, destaddr, addrfmly, 2216 &call, &tidu_size, FALSE, waitp, nosignal, cr); 2217 *rpcerr = call.call_err; 2218 cv_destroy(&call.call_cv); 2219 2220 } 2221 2222 mutex_enter(&connmgr_lock); 2223 2224 /* 2225 * Set up a transport entry in the connection manager's list. 2226 */ 2227 cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP); 2228 bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len); 2229 cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len; 2230 2231 cm_entry->x_tiptr = tiptr; 2232 cm_entry->x_time = lbolt; 2233 2234 if (tiptr->tp_info.servtype == T_COTS_ORD) 2235 cm_entry->x_ordrel = TRUE; 2236 else 2237 cm_entry->x_ordrel = FALSE; 2238 2239 cm_entry->x_tidu_size = tidu_size; 2240 2241 if (cm_entry->x_early_disc) { 2242 /* 2243 * We need to check if a disconnect request has come 2244 * while we are connected, if so, then we need to 2245 * set rpcerr->re_status appropriately before returning 2246 * NULL to caller. 2247 */ 2248 if (rpcerr->re_status == RPC_SUCCESS) 2249 rpcerr->re_status = RPC_XPRTFAILED; 2250 cm_entry->x_connected = FALSE; 2251 } else 2252 cm_entry->x_connected = connected; 2253 2254 /* 2255 * There could be a discrepancy here such that 2256 * x_early_disc is TRUE yet connected is TRUE as well 2257 * and the connection is actually connected. In that case 2258 * lets be conservative and declare the connection as not 2259 * connected. 2260 */ 2261 cm_entry->x_early_disc = FALSE; 2262 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2263 cm_entry->x_ctime = lbolt; 2264 2265 /* 2266 * Notify any threads waiting that the connection attempt is done. 2267 */ 2268 cm_entry->x_thread = FALSE; 2269 cv_broadcast(&cm_entry->x_conn_cv); 2270 2271 if (cm_entry->x_connected == FALSE) { 2272 mutex_exit(&connmgr_lock); 2273 connmgr_release(cm_entry); 2274 return (NULL); 2275 } 2276 2277 mutex_exit(&connmgr_lock); 2278 2279 return (cm_entry); 2280 } 2281 2282 /* 2283 * Keep the cm_xprt entry on the connecton list when making a connection. This 2284 * is to prevent multiple connections to a slow server from appearing. 2285 * We use the bit field x_thread to tell if a thread is doing a connection 2286 * which keeps other interested threads from messing with connection. 2287 * Those other threads just wait if x_thread is set. 2288 * 2289 * If x_thread is not set, then we do the actual work of connecting via 2290 * connmgr_connect(). 2291 * 2292 * mutex convention: called with connmgr_lock held, returns with it released. 2293 */ 2294 static struct cm_xprt * 2295 connmgr_wrapconnect( 2296 struct cm_xprt *cm_entry, 2297 const struct timeval *waitp, 2298 struct netbuf *destaddr, 2299 int addrfmly, 2300 struct netbuf *srcaddr, 2301 struct rpc_err *rpcerr, 2302 bool_t reconnect, 2303 bool_t nosignal, 2304 cred_t *cr) 2305 { 2306 ASSERT(MUTEX_HELD(&connmgr_lock)); 2307 /* 2308 * Hold this entry as we are about to drop connmgr_lock. 2309 */ 2310 CONN_HOLD(cm_entry); 2311 2312 /* 2313 * If there is a thread already making a connection for us, then 2314 * wait for it to complete the connection. 2315 */ 2316 if (cm_entry->x_thread == TRUE) { 2317 rpcerr->re_status = connmgr_cwait(cm_entry, waitp, nosignal); 2318 2319 if (rpcerr->re_status != RPC_SUCCESS) { 2320 mutex_exit(&connmgr_lock); 2321 connmgr_release(cm_entry); 2322 return (NULL); 2323 } 2324 } else { 2325 bool_t connected; 2326 calllist_t call; 2327 2328 cm_entry->x_thread = TRUE; 2329 2330 while (cm_entry->x_needrel == TRUE) { 2331 cm_entry->x_needrel = FALSE; 2332 2333 connmgr_sndrel(cm_entry); 2334 delay(drv_usectohz(1000000)); 2335 2336 mutex_enter(&connmgr_lock); 2337 } 2338 2339 /* 2340 * If we need to send a T_DISCON_REQ, send one. 2341 */ 2342 connmgr_dis_and_wait(cm_entry); 2343 2344 mutex_exit(&connmgr_lock); 2345 2346 bzero(&call, sizeof (call)); 2347 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2348 2349 connected = connmgr_connect(cm_entry, cm_entry->x_wq, 2350 destaddr, addrfmly, &call, &cm_entry->x_tidu_size, 2351 reconnect, waitp, nosignal, cr); 2352 2353 *rpcerr = call.call_err; 2354 cv_destroy(&call.call_cv); 2355 2356 mutex_enter(&connmgr_lock); 2357 2358 2359 if (cm_entry->x_early_disc) { 2360 /* 2361 * We need to check if a disconnect request has come 2362 * while we are connected, if so, then we need to 2363 * set rpcerr->re_status appropriately before returning 2364 * NULL to caller. 2365 */ 2366 if (rpcerr->re_status == RPC_SUCCESS) 2367 rpcerr->re_status = RPC_XPRTFAILED; 2368 cm_entry->x_connected = FALSE; 2369 } else 2370 cm_entry->x_connected = connected; 2371 2372 /* 2373 * There could be a discrepancy here such that 2374 * x_early_disc is TRUE yet connected is TRUE as well 2375 * and the connection is actually connected. In that case 2376 * lets be conservative and declare the connection as not 2377 * connected. 2378 */ 2379 2380 cm_entry->x_early_disc = FALSE; 2381 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2382 2383 2384 /* 2385 * connmgr_connect() may have given up before the connection 2386 * actually timed out. So ensure that before the next 2387 * connection attempt we do a disconnect. 2388 */ 2389 cm_entry->x_ctime = lbolt; 2390 cm_entry->x_thread = FALSE; 2391 2392 cv_broadcast(&cm_entry->x_conn_cv); 2393 2394 if (cm_entry->x_connected == FALSE) { 2395 mutex_exit(&connmgr_lock); 2396 connmgr_release(cm_entry); 2397 return (NULL); 2398 } 2399 } 2400 2401 if (srcaddr != NULL) { 2402 /* 2403 * Copy into the handle the 2404 * source address of the 2405 * connection, which we will use 2406 * in case of a later retry. 2407 */ 2408 if (srcaddr->len != cm_entry->x_src.len) { 2409 if (srcaddr->maxlen > 0) 2410 kmem_free(srcaddr->buf, srcaddr->maxlen); 2411 srcaddr->buf = kmem_zalloc(cm_entry->x_src.len, 2412 KM_SLEEP); 2413 srcaddr->maxlen = srcaddr->len = 2414 cm_entry->x_src.len; 2415 } 2416 bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len); 2417 } 2418 cm_entry->x_time = lbolt; 2419 mutex_exit(&connmgr_lock); 2420 return (cm_entry); 2421 } 2422 2423 /* 2424 * If we need to send a T_DISCON_REQ, send one. 2425 */ 2426 static void 2427 connmgr_dis_and_wait(struct cm_xprt *cm_entry) 2428 { 2429 ASSERT(MUTEX_HELD(&connmgr_lock)); 2430 for (;;) { 2431 while (cm_entry->x_needdis == TRUE) { 2432 RPCLOG(8, "connmgr_dis_and_wait: need " 2433 "T_DISCON_REQ for connection 0x%p\n", 2434 (void *)cm_entry); 2435 cm_entry->x_needdis = FALSE; 2436 cm_entry->x_waitdis = TRUE; 2437 2438 connmgr_snddis(cm_entry); 2439 2440 mutex_enter(&connmgr_lock); 2441 } 2442 2443 if (cm_entry->x_waitdis == TRUE) { 2444 clock_t curlbolt; 2445 clock_t timout; 2446 2447 RPCLOG(8, "connmgr_dis_and_wait waiting for " 2448 "T_DISCON_REQ's ACK for connection %p\n", 2449 (void *)cm_entry); 2450 curlbolt = ddi_get_lbolt(); 2451 2452 timout = clnt_cots_min_conntout * 2453 drv_usectohz(1000000) + curlbolt; 2454 2455 /* 2456 * The TPI spec says that the T_DISCON_REQ 2457 * will get acknowledged, but in practice 2458 * the ACK may never get sent. So don't 2459 * block forever. 2460 */ 2461 (void) cv_timedwait(&cm_entry->x_dis_cv, 2462 &connmgr_lock, timout); 2463 } 2464 /* 2465 * If we got the ACK, break. If we didn't, 2466 * then send another T_DISCON_REQ. 2467 */ 2468 if (cm_entry->x_waitdis == FALSE) { 2469 break; 2470 } else { 2471 RPCLOG(8, "connmgr_dis_and_wait: did" 2472 "not get T_DISCON_REQ's ACK for " 2473 "connection %p\n", (void *)cm_entry); 2474 cm_entry->x_needdis = TRUE; 2475 } 2476 } 2477 } 2478 2479 static void 2480 connmgr_cancelconn(struct cm_xprt *cm_entry) 2481 { 2482 /* 2483 * Mark the connection table entry as dead; the next thread that 2484 * goes through connmgr_release() will notice this and deal with it. 2485 */ 2486 mutex_enter(&connmgr_lock); 2487 cm_entry->x_dead = TRUE; 2488 2489 /* 2490 * Notify any threads waiting for the connection that it isn't 2491 * going to happen. 2492 */ 2493 cm_entry->x_thread = FALSE; 2494 cv_broadcast(&cm_entry->x_conn_cv); 2495 mutex_exit(&connmgr_lock); 2496 2497 connmgr_release(cm_entry); 2498 } 2499 2500 static void 2501 connmgr_close(struct cm_xprt *cm_entry) 2502 { 2503 mutex_enter(&cm_entry->x_lock); 2504 while (cm_entry->x_ref != 0) { 2505 /* 2506 * Must be a noninterruptible wait. 2507 */ 2508 cv_wait(&cm_entry->x_cv, &cm_entry->x_lock); 2509 } 2510 2511 if (cm_entry->x_tiptr != NULL) 2512 (void) t_kclose(cm_entry->x_tiptr, 1); 2513 2514 mutex_exit(&cm_entry->x_lock); 2515 if (cm_entry->x_ksp != NULL) { 2516 mutex_enter(&connmgr_lock); 2517 cm_entry->x_ksp->ks_private = NULL; 2518 mutex_exit(&connmgr_lock); 2519 2520 /* 2521 * Must free the buffer we allocated for the 2522 * server address in the update function 2523 */ 2524 if (((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2525 x_server.value.str.addr.ptr != NULL) 2526 kmem_free(((struct cm_kstat_xprt *)(cm_entry->x_ksp-> 2527 ks_data))->x_server.value.str.addr.ptr, 2528 INET6_ADDRSTRLEN); 2529 kmem_free(cm_entry->x_ksp->ks_data, 2530 cm_entry->x_ksp->ks_data_size); 2531 kstat_delete(cm_entry->x_ksp); 2532 } 2533 2534 mutex_destroy(&cm_entry->x_lock); 2535 cv_destroy(&cm_entry->x_cv); 2536 cv_destroy(&cm_entry->x_conn_cv); 2537 cv_destroy(&cm_entry->x_dis_cv); 2538 2539 if (cm_entry->x_server.buf != NULL) 2540 kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen); 2541 if (cm_entry->x_src.buf != NULL) 2542 kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen); 2543 kmem_free(cm_entry, sizeof (struct cm_xprt)); 2544 } 2545 2546 /* 2547 * Called by KRPC after sending the call message to release the connection 2548 * it was using. 2549 */ 2550 static void 2551 connmgr_release(struct cm_xprt *cm_entry) 2552 { 2553 mutex_enter(&cm_entry->x_lock); 2554 cm_entry->x_ref--; 2555 if (cm_entry->x_ref == 0) 2556 cv_signal(&cm_entry->x_cv); 2557 mutex_exit(&cm_entry->x_lock); 2558 } 2559 2560 /* 2561 * Given an open stream, connect to the remote. Returns true if connected, 2562 * false otherwise. 2563 */ 2564 static bool_t 2565 connmgr_connect( 2566 struct cm_xprt *cm_entry, 2567 queue_t *wq, 2568 struct netbuf *addr, 2569 int addrfmly, 2570 calllist_t *e, 2571 int *tidu_ptr, 2572 bool_t reconnect, 2573 const struct timeval *waitp, 2574 bool_t nosignal, 2575 cred_t *cr) 2576 { 2577 mblk_t *mp; 2578 struct T_conn_req *tcr; 2579 struct T_info_ack *tinfo; 2580 int interrupted, error; 2581 int tidu_size, kstat_instance; 2582 2583 /* if it's a reconnect, flush any lingering data messages */ 2584 if (reconnect) 2585 (void) putctl1(wq, M_FLUSH, FLUSHRW); 2586 2587 /* 2588 * Note: if the receiver uses SCM_UCRED/getpeerucred the pid will 2589 * appear as -1. 2590 */ 2591 mp = allocb_cred(sizeof (*tcr) + addr->len, cr, NOPID); 2592 if (mp == NULL) { 2593 /* 2594 * This is unfortunate, but we need to look up the stats for 2595 * this zone to increment the "memory allocation failed" 2596 * counter. curproc->p_zone is safe since we're initiating a 2597 * connection and not in some strange streams context. 2598 */ 2599 struct rpcstat *rpcstat; 2600 2601 rpcstat = zone_getspecific(rpcstat_zone_key, rpc_zone()); 2602 ASSERT(rpcstat != NULL); 2603 2604 RPCLOG0(1, "connmgr_connect: cannot alloc mp for " 2605 "sending conn request\n"); 2606 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem); 2607 e->call_status = RPC_SYSTEMERROR; 2608 e->call_reason = ENOSR; 2609 return (FALSE); 2610 } 2611 2612 mp->b_datap->db_type = M_PROTO; 2613 tcr = (struct T_conn_req *)mp->b_rptr; 2614 bzero(tcr, sizeof (*tcr)); 2615 tcr->PRIM_type = T_CONN_REQ; 2616 tcr->DEST_length = addr->len; 2617 tcr->DEST_offset = sizeof (struct T_conn_req); 2618 mp->b_wptr = mp->b_rptr + sizeof (*tcr); 2619 2620 bcopy(addr->buf, mp->b_wptr, tcr->DEST_length); 2621 mp->b_wptr += tcr->DEST_length; 2622 2623 RPCLOG(8, "connmgr_connect: sending conn request on queue " 2624 "%p", (void *)wq); 2625 RPCLOG(8, " call %p\n", (void *)wq); 2626 /* 2627 * We use the entry in the handle that is normally used for 2628 * waiting for RPC replies to wait for the connection accept. 2629 */ 2630 if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) { 2631 DTRACE_PROBE(krpc__e__connmgr__connect__cantsend); 2632 freemsg(mp); 2633 return (FALSE); 2634 } 2635 2636 mutex_enter(&clnt_pending_lock); 2637 2638 /* 2639 * We wait for the transport connection to be made, or an 2640 * indication that it could not be made. 2641 */ 2642 interrupted = 0; 2643 2644 /* 2645 * waitforack should have been called with T_OK_ACK, but the 2646 * present implementation needs to be passed T_INFO_ACK to 2647 * work correctly. 2648 */ 2649 error = waitforack(e, T_INFO_ACK, waitp, nosignal); 2650 if (error == EINTR) 2651 interrupted = 1; 2652 if (zone_status_get(curproc->p_zone) >= ZONE_IS_EMPTY) { 2653 /* 2654 * No time to lose; we essentially have been signaled to 2655 * quit. 2656 */ 2657 interrupted = 1; 2658 } 2659 #ifdef RPCDEBUG 2660 if (error == ETIME) 2661 RPCLOG0(8, "connmgr_connect: giving up " 2662 "on connection attempt; " 2663 "clnt_dispatch notifyconn " 2664 "diagnostic 'no one waiting for " 2665 "connection' should not be " 2666 "unexpected\n"); 2667 #endif 2668 if (e->call_prev) 2669 e->call_prev->call_next = e->call_next; 2670 else 2671 clnt_pending = e->call_next; 2672 if (e->call_next) 2673 e->call_next->call_prev = e->call_prev; 2674 mutex_exit(&clnt_pending_lock); 2675 2676 if (e->call_status != RPC_SUCCESS || error != 0) { 2677 if (interrupted) 2678 e->call_status = RPC_INTR; 2679 else if (error == ETIME) 2680 e->call_status = RPC_TIMEDOUT; 2681 else if (error == EPROTO) { 2682 e->call_status = RPC_SYSTEMERROR; 2683 e->call_reason = EPROTO; 2684 } 2685 2686 RPCLOG(8, "connmgr_connect: can't connect, status: " 2687 "%s\n", clnt_sperrno(e->call_status)); 2688 2689 if (e->call_reply) { 2690 freemsg(e->call_reply); 2691 e->call_reply = NULL; 2692 } 2693 2694 return (FALSE); 2695 } 2696 /* 2697 * The result of the "connection accept" is a T_info_ack 2698 * in the call_reply field. 2699 */ 2700 ASSERT(e->call_reply != NULL); 2701 mp = e->call_reply; 2702 e->call_reply = NULL; 2703 tinfo = (struct T_info_ack *)mp->b_rptr; 2704 2705 tidu_size = tinfo->TIDU_size; 2706 tidu_size -= (tidu_size % BYTES_PER_XDR_UNIT); 2707 if (tidu_size > COTS_DEFAULT_ALLOCSIZE || (tidu_size <= 0)) 2708 tidu_size = COTS_DEFAULT_ALLOCSIZE; 2709 *tidu_ptr = tidu_size; 2710 2711 freemsg(mp); 2712 2713 /* 2714 * Set up the pertinent options. NODELAY is so the transport doesn't 2715 * buffer up RPC messages on either end. This may not be valid for 2716 * all transports. Failure to set this option is not cause to 2717 * bail out so we return success anyway. Note that lack of NODELAY 2718 * or some other way to flush the message on both ends will cause 2719 * lots of retries and terrible performance. 2720 */ 2721 if (addrfmly == AF_INET || addrfmly == AF_INET6) { 2722 (void) connmgr_setopt(wq, IPPROTO_TCP, TCP_NODELAY, e, cr); 2723 if (e->call_status == RPC_XPRTFAILED) 2724 return (FALSE); 2725 } 2726 2727 /* 2728 * Since we have a connection, we now need to figure out if 2729 * we need to create a kstat. If x_ksp is not NULL then we 2730 * are reusing a connection and so we do not need to create 2731 * another kstat -- lets just return. 2732 */ 2733 if (cm_entry->x_ksp != NULL) 2734 return (TRUE); 2735 2736 /* 2737 * We need to increment rpc_kstat_instance atomically to prevent 2738 * two kstats being created with the same instance. 2739 */ 2740 kstat_instance = atomic_add_32_nv((uint32_t *)&rpc_kstat_instance, 1); 2741 2742 if ((cm_entry->x_ksp = kstat_create_zone("unix", kstat_instance, 2743 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED, 2744 (uint_t)(sizeof (cm_kstat_xprt_t) / sizeof (kstat_named_t)), 2745 KSTAT_FLAG_VIRTUAL, cm_entry->x_zoneid)) == NULL) { 2746 return (TRUE); 2747 } 2748 2749 cm_entry->x_ksp->ks_lock = &connmgr_lock; 2750 cm_entry->x_ksp->ks_private = cm_entry; 2751 cm_entry->x_ksp->ks_data_size = ((INET6_ADDRSTRLEN * sizeof (char)) 2752 + sizeof (cm_kstat_template)); 2753 cm_entry->x_ksp->ks_data = kmem_alloc(cm_entry->x_ksp->ks_data_size, 2754 KM_SLEEP); 2755 bcopy(&cm_kstat_template, cm_entry->x_ksp->ks_data, 2756 cm_entry->x_ksp->ks_data_size); 2757 ((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2758 x_server.value.str.addr.ptr = 2759 kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP); 2760 2761 cm_entry->x_ksp->ks_update = conn_kstat_update; 2762 kstat_install(cm_entry->x_ksp); 2763 return (TRUE); 2764 } 2765 2766 /* 2767 * Called by connmgr_connect to set an option on the new stream. 2768 */ 2769 static bool_t 2770 connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr) 2771 { 2772 mblk_t *mp; 2773 struct opthdr *opt; 2774 struct T_optmgmt_req *tor; 2775 struct timeval waitp; 2776 int error; 2777 2778 mp = allocb_cred(sizeof (struct T_optmgmt_req) + 2779 sizeof (struct opthdr) + sizeof (int), cr, NOPID); 2780 if (mp == NULL) { 2781 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option " 2782 "request\n"); 2783 return (FALSE); 2784 } 2785 2786 mp->b_datap->db_type = M_PROTO; 2787 tor = (struct T_optmgmt_req *)(mp->b_rptr); 2788 tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 2789 tor->MGMT_flags = T_NEGOTIATE; 2790 tor->OPT_length = sizeof (struct opthdr) + sizeof (int); 2791 tor->OPT_offset = sizeof (struct T_optmgmt_req); 2792 2793 opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); 2794 opt->level = level; 2795 opt->name = name; 2796 opt->len = sizeof (int); 2797 *(int *)((char *)opt + sizeof (*opt)) = 1; 2798 mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2799 sizeof (int); 2800 2801 /* 2802 * We will use this connection regardless 2803 * of whether or not the option is settable. 2804 */ 2805 if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) { 2806 DTRACE_PROBE(krpc__e__connmgr__setopt__cantsend); 2807 freemsg(mp); 2808 return (FALSE); 2809 } 2810 2811 mutex_enter(&clnt_pending_lock); 2812 2813 waitp.tv_sec = clnt_cots_min_conntout; 2814 waitp.tv_usec = 0; 2815 error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1); 2816 2817 if (e->call_prev) 2818 e->call_prev->call_next = e->call_next; 2819 else 2820 clnt_pending = e->call_next; 2821 if (e->call_next) 2822 e->call_next->call_prev = e->call_prev; 2823 mutex_exit(&clnt_pending_lock); 2824 2825 if (e->call_reply != NULL) { 2826 freemsg(e->call_reply); 2827 e->call_reply = NULL; 2828 } 2829 2830 if (e->call_status != RPC_SUCCESS || error != 0) { 2831 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name); 2832 return (FALSE); 2833 } 2834 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name); 2835 return (TRUE); 2836 } 2837 2838 #ifdef DEBUG 2839 2840 /* 2841 * This is a knob to let us force code coverage in allocation failure 2842 * case. 2843 */ 2844 static int connmgr_failsnd; 2845 #define CONN_SND_ALLOC(Size, Pri) \ 2846 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri)) 2847 2848 #else 2849 2850 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri) 2851 2852 #endif 2853 2854 /* 2855 * Sends an orderly release on the specified queue. 2856 * Entered with connmgr_lock. Exited without connmgr_lock 2857 */ 2858 static void 2859 connmgr_sndrel(struct cm_xprt *cm_entry) 2860 { 2861 struct T_ordrel_req *torr; 2862 mblk_t *mp; 2863 queue_t *q = cm_entry->x_wq; 2864 ASSERT(MUTEX_HELD(&connmgr_lock)); 2865 mp = CONN_SND_ALLOC(sizeof (struct T_ordrel_req), BPRI_LO); 2866 if (mp == NULL) { 2867 cm_entry->x_needrel = TRUE; 2868 mutex_exit(&connmgr_lock); 2869 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel " 2870 "to queue %p\n", (void *)q); 2871 return; 2872 } 2873 mutex_exit(&connmgr_lock); 2874 2875 mp->b_datap->db_type = M_PROTO; 2876 torr = (struct T_ordrel_req *)(mp->b_rptr); 2877 torr->PRIM_type = T_ORDREL_REQ; 2878 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_req); 2879 2880 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q); 2881 put(q, mp); 2882 } 2883 2884 /* 2885 * Sends an disconnect on the specified queue. 2886 * Entered with connmgr_lock. Exited without connmgr_lock 2887 */ 2888 static void 2889 connmgr_snddis(struct cm_xprt *cm_entry) 2890 { 2891 struct T_discon_req *tdis; 2892 mblk_t *mp; 2893 queue_t *q = cm_entry->x_wq; 2894 2895 ASSERT(MUTEX_HELD(&connmgr_lock)); 2896 mp = CONN_SND_ALLOC(sizeof (*tdis), BPRI_LO); 2897 if (mp == NULL) { 2898 cm_entry->x_needdis = TRUE; 2899 mutex_exit(&connmgr_lock); 2900 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon " 2901 "to queue %p\n", (void *)q); 2902 return; 2903 } 2904 mutex_exit(&connmgr_lock); 2905 2906 mp->b_datap->db_type = M_PROTO; 2907 tdis = (struct T_discon_req *)mp->b_rptr; 2908 tdis->PRIM_type = T_DISCON_REQ; 2909 mp->b_wptr = mp->b_rptr + sizeof (*tdis); 2910 2911 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q); 2912 put(q, mp); 2913 } 2914 2915 /* 2916 * Sets up the entry for receiving replies, and calls rpcmod's write put proc 2917 * (through put) to send the call. 2918 */ 2919 static int 2920 clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid, 2921 uint_t queue_flag) 2922 { 2923 ASSERT(e != NULL); 2924 2925 e->call_status = RPC_TIMEDOUT; /* optimistic, eh? */ 2926 e->call_reason = 0; 2927 e->call_wq = q; 2928 e->call_xid = xid; 2929 e->call_notified = FALSE; 2930 2931 if (!canput(q)) { 2932 e->call_status = RPC_CANTSEND; 2933 e->call_reason = ENOBUFS; 2934 return (RPC_CANTSEND); 2935 } 2936 2937 /* 2938 * If queue_flag is set then the calllist_t is already on the hash 2939 * queue. In this case just send the message and return. 2940 */ 2941 if (queue_flag) { 2942 put(q, mp); 2943 return (RPC_SUCCESS); 2944 2945 } 2946 2947 /* 2948 * Set up calls for RPC requests (with XID != 0) on the hash 2949 * queue for fast lookups and place other calls (i.e. 2950 * connection management) on the linked list. 2951 */ 2952 if (xid != 0) { 2953 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on " 2954 "dispatch list\n", xid); 2955 e->call_hash = call_hash(xid, clnt_cots_hash_size); 2956 e->call_bucket = &cots_call_ht[e->call_hash]; 2957 call_table_enter(e); 2958 } else { 2959 mutex_enter(&clnt_pending_lock); 2960 if (clnt_pending) 2961 clnt_pending->call_prev = e; 2962 e->call_next = clnt_pending; 2963 e->call_prev = NULL; 2964 clnt_pending = e; 2965 mutex_exit(&clnt_pending_lock); 2966 } 2967 2968 put(q, mp); 2969 return (RPC_SUCCESS); 2970 } 2971 2972 /* 2973 * Called by rpcmod to notify a client with a clnt_pending call that its reply 2974 * has arrived. If we can't find a client waiting for this reply, we log 2975 * the error and return. 2976 */ 2977 bool_t 2978 clnt_dispatch_notify(mblk_t *mp, zoneid_t zoneid) 2979 { 2980 calllist_t *e = NULL; 2981 call_table_t *chtp; 2982 uint32_t xid; 2983 uint_t hash; 2984 2985 if ((IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) && 2986 (mp->b_wptr - mp->b_rptr) >= sizeof (xid)) 2987 xid = *((uint32_t *)mp->b_rptr); 2988 else { 2989 int i = 0; 2990 unsigned char *p = (unsigned char *)&xid; 2991 unsigned char *rptr; 2992 mblk_t *tmp = mp; 2993 2994 /* 2995 * Copy the xid, byte-by-byte into xid. 2996 */ 2997 while (tmp) { 2998 rptr = tmp->b_rptr; 2999 while (rptr < tmp->b_wptr) { 3000 *p++ = *rptr++; 3001 if (++i >= sizeof (xid)) 3002 goto done_xid_copy; 3003 } 3004 tmp = tmp->b_cont; 3005 } 3006 3007 /* 3008 * If we got here, we ran out of mblk space before the 3009 * xid could be copied. 3010 */ 3011 ASSERT(tmp == NULL && i < sizeof (xid)); 3012 3013 RPCLOG0(1, 3014 "clnt_dispatch_notify: message less than size of xid\n"); 3015 return (FALSE); 3016 3017 } 3018 done_xid_copy: 3019 3020 hash = call_hash(xid, clnt_cots_hash_size); 3021 chtp = &cots_call_ht[hash]; 3022 /* call_table_find returns with the hash bucket locked */ 3023 call_table_find(chtp, xid, e); 3024 3025 if (e != NULL) { 3026 /* 3027 * Found thread waiting for this reply 3028 */ 3029 mutex_enter(&e->call_lock); 3030 3031 /* 3032 * verify that the reply is coming in on 3033 * the same zone that it was sent from. 3034 */ 3035 if (e->call_zoneid != zoneid) { 3036 mutex_exit(&e->call_lock); 3037 mutex_exit(&chtp->ct_lock); 3038 RPCLOG0(1, "clnt_dispatch_notify: incorrect zoneid\n"); 3039 return (FALSE); 3040 } 3041 3042 if (e->call_reply) 3043 /* 3044 * This can happen under the following scenario: 3045 * clnt_cots_kcallit() times out on the response, 3046 * rfscall() repeats the CLNT_CALL() with 3047 * the same xid, clnt_cots_kcallit() sends the retry, 3048 * thereby putting the clnt handle on the pending list, 3049 * the first response arrives, signalling the thread 3050 * in clnt_cots_kcallit(). Before that thread is 3051 * dispatched, the second response arrives as well, 3052 * and clnt_dispatch_notify still finds the handle on 3053 * the pending list, with call_reply set. So free the 3054 * old reply now. 3055 * 3056 * It is also possible for a response intended for 3057 * an RPC call with a different xid to reside here. 3058 * This can happen if the thread that owned this 3059 * client handle prior to the current owner bailed 3060 * out and left its call record on the dispatch 3061 * queue. A window exists where the response can 3062 * arrive before the current owner dispatches its 3063 * RPC call. 3064 * 3065 * In any case, this is the very last point where we 3066 * can safely check the call_reply field before 3067 * placing the new response there. 3068 */ 3069 freemsg(e->call_reply); 3070 e->call_reply = mp; 3071 e->call_status = RPC_SUCCESS; 3072 e->call_notified = TRUE; 3073 cv_signal(&e->call_cv); 3074 mutex_exit(&e->call_lock); 3075 mutex_exit(&chtp->ct_lock); 3076 return (TRUE); 3077 } else { 3078 zone_t *zone; 3079 struct rpcstat *rpcstat; 3080 3081 mutex_exit(&chtp->ct_lock); 3082 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n", 3083 xid); 3084 /* 3085 * This is unfortunate, but we need to lookup the zone so we 3086 * can increment its "rcbadxids" counter. 3087 */ 3088 zone = zone_find_by_id(zoneid); 3089 if (zone == NULL) { 3090 /* 3091 * The zone went away... 3092 */ 3093 return (FALSE); 3094 } 3095 rpcstat = zone_getspecific(rpcstat_zone_key, zone); 3096 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 3097 /* 3098 * Not interested 3099 */ 3100 zone_rele(zone); 3101 return (FALSE); 3102 } 3103 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcbadxids); 3104 zone_rele(zone); 3105 } 3106 return (FALSE); 3107 } 3108 3109 /* 3110 * Called by rpcmod when a non-data indication arrives. The ones in which we 3111 * are interested are connection indications and options acks. We dispatch 3112 * based on the queue the indication came in on. If we are not interested in 3113 * what came in, we return false to rpcmod, who will then pass it upstream. 3114 */ 3115 bool_t 3116 clnt_dispatch_notifyconn(queue_t *q, mblk_t *mp) 3117 { 3118 calllist_t *e; 3119 int type; 3120 3121 ASSERT((q->q_flag & QREADR) == 0); 3122 3123 type = ((union T_primitives *)mp->b_rptr)->type; 3124 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n", 3125 rpc_tpiprim2name(type)); 3126 mutex_enter(&clnt_pending_lock); 3127 for (e = clnt_pending; /* NO CONDITION */; e = e->call_next) { 3128 if (e == NULL) { 3129 mutex_exit(&clnt_pending_lock); 3130 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting " 3131 "for connection on queue 0x%p\n", (void *)q); 3132 return (FALSE); 3133 } 3134 if (e->call_wq == q) 3135 break; 3136 } 3137 3138 switch (type) { 3139 case T_CONN_CON: 3140 /* 3141 * The transport is now connected, send a T_INFO_REQ to get 3142 * the tidu size. 3143 */ 3144 mutex_exit(&clnt_pending_lock); 3145 ASSERT(mp->b_datap->db_lim - mp->b_datap->db_base >= 3146 sizeof (struct T_info_req)); 3147 mp->b_rptr = mp->b_datap->db_base; 3148 ((union T_primitives *)mp->b_rptr)->type = T_INFO_REQ; 3149 mp->b_wptr = mp->b_rptr + sizeof (struct T_info_req); 3150 mp->b_datap->db_type = M_PCPROTO; 3151 put(q, mp); 3152 return (TRUE); 3153 case T_INFO_ACK: 3154 case T_OPTMGMT_ACK: 3155 e->call_status = RPC_SUCCESS; 3156 e->call_reply = mp; 3157 e->call_notified = TRUE; 3158 cv_signal(&e->call_cv); 3159 break; 3160 case T_ERROR_ACK: 3161 e->call_status = RPC_CANTCONNECT; 3162 e->call_reply = mp; 3163 e->call_notified = TRUE; 3164 cv_signal(&e->call_cv); 3165 break; 3166 case T_OK_ACK: 3167 /* 3168 * Great, but we are really waiting for a T_CONN_CON 3169 */ 3170 freemsg(mp); 3171 break; 3172 default: 3173 mutex_exit(&clnt_pending_lock); 3174 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type); 3175 return (FALSE); 3176 } 3177 3178 mutex_exit(&clnt_pending_lock); 3179 return (TRUE); 3180 } 3181 3182 /* 3183 * Called by rpcmod when the transport is (or should be) going away. Informs 3184 * all callers waiting for replies and marks the entry in the connection 3185 * manager's list as unconnected, and either closing (close handshake in 3186 * progress) or dead. 3187 */ 3188 void 3189 clnt_dispatch_notifyall(queue_t *q, int32_t msg_type, int32_t reason) 3190 { 3191 calllist_t *e; 3192 call_table_t *ctp; 3193 struct cm_xprt *cm_entry; 3194 int have_connmgr_lock; 3195 int i; 3196 3197 ASSERT((q->q_flag & QREADR) == 0); 3198 3199 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q); 3200 RPCLOG(1, " received a notifcation prim type [%s]", 3201 rpc_tpiprim2name(msg_type)); 3202 RPCLOG(1, " and reason %d\n", reason); 3203 3204 /* 3205 * Find the transport entry in the connection manager's list, close 3206 * the transport and delete the entry. In the case where rpcmod's 3207 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we 3208 * should gracefully close the connection. 3209 */ 3210 have_connmgr_lock = 1; 3211 mutex_enter(&connmgr_lock); 3212 for (cm_entry = cm_hd; cm_entry; cm_entry = cm_entry->x_next) { 3213 ASSERT(cm_entry != cm_entry->x_next); 3214 if (cm_entry->x_wq == q) { 3215 ASSERT(MUTEX_HELD(&connmgr_lock)); 3216 ASSERT(have_connmgr_lock == 1); 3217 switch (msg_type) { 3218 case T_ORDREL_REQ: 3219 3220 if (cm_entry->x_dead) { 3221 RPCLOG(1, "idle timeout on dead " 3222 "connection: %p\n", 3223 (void *)cm_entry); 3224 if (clnt_stop_idle != NULL) 3225 (*clnt_stop_idle)(q); 3226 break; 3227 } 3228 3229 /* 3230 * Only mark the connection as dead if it is 3231 * connected and idle. 3232 * An unconnected connection has probably 3233 * gone idle because the server is down, 3234 * and when it comes back up there will be 3235 * retries that need to use that connection. 3236 */ 3237 if (cm_entry->x_connected || 3238 cm_entry->x_doomed) { 3239 if (cm_entry->x_ordrel) { 3240 if (cm_entry->x_closing == 3241 TRUE) { 3242 /* 3243 * The connection is 3244 * obviously wedged due 3245 * to a bug or problem 3246 * with the transport. 3247 * Mark it as dead. 3248 * Otherwise we can 3249 * leak connections. 3250 */ 3251 cm_entry->x_dead = TRUE; 3252 mutex_exit( 3253 &connmgr_lock); 3254 have_connmgr_lock = 0; 3255 if (clnt_stop_idle != 3256 NULL) 3257 (*clnt_stop_idle)(q); 3258 break; 3259 } 3260 cm_entry->x_closing = TRUE; 3261 connmgr_sndrel(cm_entry); 3262 have_connmgr_lock = 0; 3263 } else { 3264 cm_entry->x_dead = TRUE; 3265 mutex_exit(&connmgr_lock); 3266 have_connmgr_lock = 0; 3267 if (clnt_stop_idle != NULL) 3268 (*clnt_stop_idle)(q); 3269 } 3270 } else { 3271 /* 3272 * We don't mark the connection 3273 * as dead, but we turn off the 3274 * idle timer. 3275 */ 3276 mutex_exit(&connmgr_lock); 3277 have_connmgr_lock = 0; 3278 if (clnt_stop_idle != NULL) 3279 (*clnt_stop_idle)(q); 3280 RPCLOG(1, "clnt_dispatch_notifyall:" 3281 " ignoring timeout from rpcmod" 3282 " (q %p) because we are not " 3283 " connected\n", (void *)q); 3284 } 3285 break; 3286 case T_ORDREL_IND: 3287 /* 3288 * If this entry is marked closing, then we are 3289 * completing a close handshake, and the 3290 * connection is dead. Otherwise, the server is 3291 * trying to close. Since the server will not 3292 * be sending any more RPC replies, we abort 3293 * the connection, including flushing 3294 * any RPC requests that are in-transit. 3295 * In either case, mark the entry as dead so 3296 * that it can be closed by the connection 3297 * manager's garbage collector. 3298 */ 3299 cm_entry->x_dead = TRUE; 3300 if (cm_entry->x_closing) { 3301 mutex_exit(&connmgr_lock); 3302 have_connmgr_lock = 0; 3303 if (clnt_stop_idle != NULL) 3304 (*clnt_stop_idle)(q); 3305 } else { 3306 /* 3307 * if we're getting a disconnect 3308 * before we've finished our 3309 * connect attempt, mark it for 3310 * later processing 3311 */ 3312 if (cm_entry->x_thread) 3313 cm_entry->x_early_disc = TRUE; 3314 else 3315 cm_entry->x_connected = FALSE; 3316 cm_entry->x_waitdis = TRUE; 3317 connmgr_snddis(cm_entry); 3318 have_connmgr_lock = 0; 3319 } 3320 break; 3321 3322 case T_ERROR_ACK: 3323 case T_OK_ACK: 3324 cm_entry->x_waitdis = FALSE; 3325 cv_signal(&cm_entry->x_dis_cv); 3326 mutex_exit(&connmgr_lock); 3327 return; 3328 3329 case T_DISCON_REQ: 3330 if (cm_entry->x_thread) 3331 cm_entry->x_early_disc = TRUE; 3332 else 3333 cm_entry->x_connected = FALSE; 3334 cm_entry->x_waitdis = TRUE; 3335 3336 connmgr_snddis(cm_entry); 3337 have_connmgr_lock = 0; 3338 break; 3339 3340 case T_DISCON_IND: 3341 default: 3342 /* 3343 * if we're getting a disconnect before 3344 * we've finished our connect attempt, 3345 * mark it for later processing 3346 */ 3347 if (cm_entry->x_closing) { 3348 cm_entry->x_dead = TRUE; 3349 mutex_exit(&connmgr_lock); 3350 have_connmgr_lock = 0; 3351 if (clnt_stop_idle != NULL) 3352 (*clnt_stop_idle)(q); 3353 } else { 3354 if (cm_entry->x_thread) { 3355 cm_entry->x_early_disc = TRUE; 3356 } else { 3357 cm_entry->x_dead = TRUE; 3358 cm_entry->x_connected = FALSE; 3359 } 3360 } 3361 break; 3362 } 3363 break; 3364 } 3365 } 3366 3367 if (have_connmgr_lock) 3368 mutex_exit(&connmgr_lock); 3369 3370 if (msg_type == T_ERROR_ACK || msg_type == T_OK_ACK) { 3371 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find " 3372 "connmgr entry for discon ack\n", (void *)q); 3373 return; 3374 } 3375 3376 /* 3377 * Then kick all the clnt_pending calls out of their wait. There 3378 * should be no clnt_pending calls in the case of rpcmod's idle 3379 * timer firing. 3380 */ 3381 for (i = 0; i < clnt_cots_hash_size; i++) { 3382 ctp = &cots_call_ht[i]; 3383 mutex_enter(&ctp->ct_lock); 3384 for (e = ctp->ct_call_next; 3385 e != (calllist_t *)ctp; 3386 e = e->call_next) { 3387 if (e->call_wq == q && e->call_notified == FALSE) { 3388 RPCLOG(1, 3389 "clnt_dispatch_notifyall for queue %p ", 3390 (void *)q); 3391 RPCLOG(1, "aborting clnt_pending call %p\n", 3392 (void *)e); 3393 3394 if (msg_type == T_DISCON_IND) 3395 e->call_reason = reason; 3396 e->call_notified = TRUE; 3397 e->call_status = RPC_XPRTFAILED; 3398 cv_signal(&e->call_cv); 3399 } 3400 } 3401 mutex_exit(&ctp->ct_lock); 3402 } 3403 3404 mutex_enter(&clnt_pending_lock); 3405 for (e = clnt_pending; e; e = e->call_next) { 3406 /* 3407 * Only signal those RPC handles that haven't been 3408 * signalled yet. Otherwise we can get a bogus call_reason. 3409 * This can happen if thread A is making a call over a 3410 * connection. If the server is killed, it will cause 3411 * reset, and reason will default to EIO as a result of 3412 * a T_ORDREL_IND. Thread B then attempts to recreate 3413 * the connection but gets a T_DISCON_IND. If we set the 3414 * call_reason code for all threads, then if thread A 3415 * hasn't been dispatched yet, it will get the wrong 3416 * reason. The bogus call_reason can make it harder to 3417 * discriminate between calls that fail because the 3418 * connection attempt failed versus those where the call 3419 * may have been executed on the server. 3420 */ 3421 if (e->call_wq == q && e->call_notified == FALSE) { 3422 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ", 3423 (void *)q); 3424 RPCLOG(1, " aborting clnt_pending call %p\n", 3425 (void *)e); 3426 3427 if (msg_type == T_DISCON_IND) 3428 e->call_reason = reason; 3429 e->call_notified = TRUE; 3430 /* 3431 * Let the caller timeout, else he will retry 3432 * immediately. 3433 */ 3434 e->call_status = RPC_XPRTFAILED; 3435 3436 /* 3437 * We used to just signal those threads 3438 * waiting for a connection, (call_xid = 0). 3439 * That meant that threads waiting for a response 3440 * waited till their timeout expired. This 3441 * could be a long time if they've specified a 3442 * maximum timeout. (2^31 - 1). So we 3443 * Signal all threads now. 3444 */ 3445 cv_signal(&e->call_cv); 3446 } 3447 } 3448 mutex_exit(&clnt_pending_lock); 3449 } 3450 3451 3452 /*ARGSUSED*/ 3453 /* 3454 * after resuming a system that's been suspended for longer than the 3455 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall() 3456 * generates "NFS server X not responding" and "NFS server X ok" messages; 3457 * here we reset inet connections to cause a re-connect and avoid those 3458 * NFS messages. see 4045054 3459 */ 3460 boolean_t 3461 connmgr_cpr_reset(void *arg, int code) 3462 { 3463 struct cm_xprt *cxp; 3464 3465 if (code == CB_CODE_CPR_CHKPT) 3466 return (B_TRUE); 3467 3468 if (mutex_tryenter(&connmgr_lock) == 0) 3469 return (B_FALSE); 3470 for (cxp = cm_hd; cxp; cxp = cxp->x_next) { 3471 if ((cxp->x_family == AF_INET || cxp->x_family == AF_INET6) && 3472 cxp->x_connected == TRUE) { 3473 if (cxp->x_thread) 3474 cxp->x_early_disc = TRUE; 3475 else 3476 cxp->x_connected = FALSE; 3477 cxp->x_needdis = TRUE; 3478 } 3479 } 3480 mutex_exit(&connmgr_lock); 3481 return (B_TRUE); 3482 } 3483 3484 void 3485 clnt_cots_stats_init(zoneid_t zoneid, struct rpc_cots_client **statsp) 3486 { 3487 3488 *statsp = (struct rpc_cots_client *)rpcstat_zone_init_common(zoneid, 3489 "unix", "rpc_cots_client", (const kstat_named_t *)&cots_rcstat_tmpl, 3490 sizeof (cots_rcstat_tmpl)); 3491 } 3492 3493 void 3494 clnt_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_client **statsp) 3495 { 3496 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_client"); 3497 kmem_free(*statsp, sizeof (cots_rcstat_tmpl)); 3498 } 3499 3500 void 3501 clnt_cots_init(void) 3502 { 3503 mutex_init(&connmgr_lock, NULL, MUTEX_DEFAULT, NULL); 3504 mutex_init(&clnt_pending_lock, NULL, MUTEX_DEFAULT, NULL); 3505 3506 if (clnt_cots_hash_size < DEFAULT_MIN_HASH_SIZE) 3507 clnt_cots_hash_size = DEFAULT_MIN_HASH_SIZE; 3508 3509 cots_call_ht = call_table_init(clnt_cots_hash_size); 3510 zone_key_create(&zone_cots_key, NULL, NULL, clnt_zone_destroy); 3511 } 3512 3513 void 3514 clnt_cots_fini(void) 3515 { 3516 (void) zone_key_delete(zone_cots_key); 3517 } 3518 3519 /* 3520 * Wait for TPI ack, returns success only if expected ack is received 3521 * within timeout period. 3522 */ 3523 3524 static int 3525 waitforack(calllist_t *e, t_scalar_t ack_prim, const struct timeval *waitp, 3526 bool_t nosignal) 3527 { 3528 union T_primitives *tpr; 3529 clock_t timout; 3530 int cv_stat = 1; 3531 3532 ASSERT(MUTEX_HELD(&clnt_pending_lock)); 3533 while (e->call_reply == NULL) { 3534 if (waitp != NULL) { 3535 timout = waitp->tv_sec * drv_usectohz(MICROSEC) + 3536 drv_usectohz(waitp->tv_usec) + lbolt; 3537 if (nosignal) 3538 cv_stat = cv_timedwait(&e->call_cv, 3539 &clnt_pending_lock, timout); 3540 else 3541 cv_stat = cv_timedwait_sig(&e->call_cv, 3542 &clnt_pending_lock, timout); 3543 } else { 3544 if (nosignal) 3545 cv_wait(&e->call_cv, &clnt_pending_lock); 3546 else 3547 cv_stat = cv_wait_sig(&e->call_cv, 3548 &clnt_pending_lock); 3549 } 3550 if (cv_stat == -1) 3551 return (ETIME); 3552 if (cv_stat == 0) 3553 return (EINTR); 3554 /* 3555 * if we received an error from the server and we know a reply 3556 * is not going to be sent, do not wait for the full timeout, 3557 * return now. 3558 */ 3559 if (e->call_status == RPC_XPRTFAILED) 3560 return (e->call_reason); 3561 } 3562 tpr = (union T_primitives *)e->call_reply->b_rptr; 3563 if (tpr->type == ack_prim) 3564 return (0); /* Success */ 3565 3566 if (tpr->type == T_ERROR_ACK) { 3567 if (tpr->error_ack.TLI_error == TSYSERR) 3568 return (tpr->error_ack.UNIX_error); 3569 else 3570 return (t_tlitosyserr(tpr->error_ack.TLI_error)); 3571 } 3572 3573 return (EPROTO); /* unknown or unexpected primitive */ 3574 } 3575