1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 28 * All Rights Reserved 29 */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 #pragma ident "%Z%%M% %I% %E% SMI" 37 38 /* 39 * Implements a kernel based, client side RPC over Connection Oriented 40 * Transports (COTS). 41 */ 42 43 /* 44 * Much of this file has been re-written to let NFS work better over slow 45 * transports. A description follows. 46 * 47 * One of the annoying things about kRPC/COTS is that it will temporarily 48 * create more than one connection between a client and server. This 49 * happens because when a connection is made, the end-points entry in the 50 * linked list of connections (headed by cm_hd), is removed so that other 51 * threads don't mess with it. Went ahead and bit the bullet by keeping 52 * the endpoint on the connection list and introducing state bits, 53 * condition variables etc. to the connection entry data structure (struct 54 * cm_xprt). 55 * 56 * Here is a summary of the changes to cm-xprt: 57 * 58 * x_ctime is the timestamp of when the endpoint was last 59 * connected or disconnected. If an end-point is ever disconnected 60 * or re-connected, then any outstanding RPC request is presumed 61 * lost, telling clnt_cots_kcallit that it needs to re-send the 62 * request, not just wait for the original request's reply to 63 * arrive. 64 * 65 * x_thread flag which tells us if a thread is doing a connection attempt. 66 * 67 * x_waitdis flag which tells us we are waiting a disconnect ACK. 68 * 69 * x_needdis flag which tells us we need to send a T_DISCONN_REQ 70 * to kill the connection. 71 * 72 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to 73 * gracefully close the connection. 74 * 75 * #defined bitmasks for the all the b_* bits so that more 76 * efficient (and at times less clumsy) masks can be used to 77 * manipulated state in cases where multiple bits have to 78 * set/cleared/checked in the same critical section. 79 * 80 * x_conn_cv and x_dis-_cv are new condition variables to let 81 * threads knows when the connection attempt is done, and to let 82 * the connecting thread know when the disconnect handshake is 83 * done. 84 * 85 * Added the CONN_HOLD() macro so that all reference holds have the same 86 * look and feel. 87 * 88 * In the private (cku_private) portion of the client handle, 89 * 90 * cku_flags replaces the cku_sent a boolean. cku_flags keeps 91 * track of whether a request as been sent, and whether the 92 * client's handles call record is on the dispatch list (so that 93 * the reply can be matched by XID to the right client handle). 94 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit() 95 * and still have the response find the right client handle so 96 * that the retry of CLNT_CALL() gets the result. Testing, found 97 * situations where if the timeout was increased, performance 98 * degraded. This was due to us hitting a window where the thread 99 * was back in rfscall() (probably printing server not responding) 100 * while the response came back but no place to put it. 101 * 102 * cku_ctime is just a cache of x_ctime. If they match, 103 * clnt_cots_kcallit() won't to send a retry (unless the maximum 104 * receive count limit as been reached). If the don't match, then 105 * we assume the request has been lost, and a retry of the request 106 * is needed. 107 * 108 * cku_recv_attempts counts the number of receive count attempts 109 * after one try is sent on the wire. 110 * 111 * Added the clnt_delay() routine so that interruptible and 112 * noninterruptible delays are possible. 113 * 114 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to 115 * control how long the client delays before returned after getting 116 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash 117 * a server that may be booting and not yet started nfsd. 118 * 119 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable) 120 * Why don't we just wait forever (receive an infinite # of times)? 121 * Because the server may have rebooted. More insidious is that some 122 * servers (ours) will drop NFS/TCP requests in some cases. This is bad, 123 * but it is a reality. 124 * 125 * The case of a server doing orderly release really messes up the 126 * client's recovery, especially if the server's TCP implementation is 127 * buggy. It was found was that the kRPC/COTS client was breaking some 128 * TPI rules, such as not waiting for the acknowledgement of a 129 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and 130 * T_DISCON_REQ in clnt_dispatch_notifyall()). 131 * 132 * One of things that we've seen is that a kRPC TCP endpoint goes into 133 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because 134 * that the TIMEWAIT state takes a while to finish. If a server sends a 135 * T_ORDREL_IND, there is little point in an RPC client doing a 136 * T_ORDREL_REQ, because the RPC request isn't going to make it (the 137 * server is saying that it won't accept any more data). So kRPC was 138 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the 139 * connection skips the TIMEWAIT state and goes straight to a bound state 140 * that kRPC can quickly switch to connected. 141 * 142 * Code that issues TPI request must use waitforack() to wait for the 143 * corresponding ack (assuming there is one) in any future modifications. 144 * This works around problems that may be introduced by breaking TPI rules 145 * (by submitting new calls before earlier requests have been acked) in the 146 * case of a signal or other early return. waitforack() depends on 147 * clnt_dispatch_notifyconn() to issue the wakeup when the ack 148 * arrives, so adding new TPI calls may require corresponding changes 149 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on 150 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure 151 * not to set it too low or TPI ACKS will be lost. 152 */ 153 154 #include <sys/param.h> 155 #include <sys/types.h> 156 #include <sys/user.h> 157 #include <sys/systm.h> 158 #include <sys/sysmacros.h> 159 #include <sys/proc.h> 160 #include <sys/socket.h> 161 #include <sys/file.h> 162 #include <sys/stream.h> 163 #include <sys/strsubr.h> 164 #include <sys/stropts.h> 165 #include <sys/strsun.h> 166 #include <sys/timod.h> 167 #include <sys/tiuser.h> 168 #include <sys/tihdr.h> 169 #include <sys/t_kuser.h> 170 #include <sys/fcntl.h> 171 #include <sys/errno.h> 172 #include <sys/kmem.h> 173 #include <sys/debug.h> 174 #include <sys/systm.h> 175 #include <sys/kstat.h> 176 #include <sys/t_lock.h> 177 #include <sys/ddi.h> 178 #include <sys/cmn_err.h> 179 #include <sys/time.h> 180 #include <sys/isa_defs.h> 181 #include <sys/callb.h> 182 #include <sys/sunddi.h> 183 #include <sys/atomic.h> 184 185 #include <netinet/in.h> 186 #include <netinet/tcp.h> 187 188 #include <rpc/types.h> 189 #include <rpc/xdr.h> 190 #include <rpc/auth.h> 191 #include <rpc/clnt.h> 192 #include <rpc/rpc_msg.h> 193 194 #define COTS_DEFAULT_ALLOCSIZE 2048 195 196 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */ 197 #define MSG_OFFSET 128 /* offset of call into the mblk */ 198 199 const char *kinet_ntop6(uchar_t *, char *, size_t); 200 201 static int clnt_cots_ksettimers(CLIENT *, struct rpc_timers *, 202 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 203 static enum clnt_stat clnt_cots_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 204 caddr_t, xdrproc_t, caddr_t, struct timeval); 205 static void clnt_cots_kabort(CLIENT *); 206 static void clnt_cots_kerror(CLIENT *, struct rpc_err *); 207 static bool_t clnt_cots_kfreeres(CLIENT *, xdrproc_t, caddr_t); 208 static void clnt_cots_kdestroy(CLIENT *); 209 static bool_t clnt_cots_kcontrol(CLIENT *, int, char *); 210 211 212 /* List of transports managed by the connection manager. */ 213 struct cm_xprt { 214 TIUSER *x_tiptr; /* transport handle */ 215 queue_t *x_wq; /* send queue */ 216 clock_t x_time; /* last time we handed this xprt out */ 217 clock_t x_ctime; /* time we went to CONNECTED */ 218 int x_tidu_size; /* TIDU size of this transport */ 219 union { 220 struct { 221 unsigned int 222 #ifdef _BIT_FIELDS_HTOL 223 b_closing: 1, /* we've sent a ord rel on this conn */ 224 b_dead: 1, /* transport is closed or disconn */ 225 b_doomed: 1, /* too many conns, let this go idle */ 226 b_connected: 1, /* this connection is connected */ 227 228 b_ordrel: 1, /* do an orderly release? */ 229 b_thread: 1, /* thread doing connect */ 230 b_waitdis: 1, /* waiting for disconnect ACK */ 231 b_needdis: 1, /* need T_DISCON_REQ */ 232 233 b_needrel: 1, /* need T_ORDREL_REQ */ 234 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 235 /* disconnect during connect */ 236 237 b_pad: 22; 238 239 #endif 240 241 #ifdef _BIT_FIELDS_LTOH 242 b_pad: 22, 243 244 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 245 /* disconnect during connect */ 246 b_needrel: 1, /* need T_ORDREL_REQ */ 247 248 b_needdis: 1, /* need T_DISCON_REQ */ 249 b_waitdis: 1, /* waiting for disconnect ACK */ 250 b_thread: 1, /* thread doing connect */ 251 b_ordrel: 1, /* do an orderly release? */ 252 253 b_connected: 1, /* this connection is connected */ 254 b_doomed: 1, /* too many conns, let this go idle */ 255 b_dead: 1, /* transport is closed or disconn */ 256 b_closing: 1; /* we've sent a ord rel on this conn */ 257 #endif 258 } bit; unsigned int word; 259 260 #define x_closing x_state.bit.b_closing 261 #define x_dead x_state.bit.b_dead 262 #define x_doomed x_state.bit.b_doomed 263 #define x_connected x_state.bit.b_connected 264 265 #define x_ordrel x_state.bit.b_ordrel 266 #define x_thread x_state.bit.b_thread 267 #define x_waitdis x_state.bit.b_waitdis 268 #define x_needdis x_state.bit.b_needdis 269 270 #define x_needrel x_state.bit.b_needrel 271 #define x_early_disc x_state.bit.b_early_disc 272 273 #define x_state_flags x_state.word 274 275 #define X_CLOSING 0x80000000 276 #define X_DEAD 0x40000000 277 #define X_DOOMED 0x20000000 278 #define X_CONNECTED 0x10000000 279 280 #define X_ORDREL 0x08000000 281 #define X_THREAD 0x04000000 282 #define X_WAITDIS 0x02000000 283 #define X_NEEDDIS 0x01000000 284 285 #define X_NEEDREL 0x00800000 286 #define X_EARLYDISC 0x00400000 287 288 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED) 289 290 } x_state; 291 int x_ref; /* number of users of this xprt */ 292 int x_family; /* address family of transport */ 293 dev_t x_rdev; /* device number of transport */ 294 struct cm_xprt *x_next; 295 296 struct netbuf x_server; /* destination address */ 297 struct netbuf x_src; /* src address (for retries) */ 298 kmutex_t x_lock; /* lock on this entry */ 299 kcondvar_t x_cv; /* to signal when can be closed */ 300 kcondvar_t x_conn_cv; /* to signal when connection attempt */ 301 /* is complete */ 302 kstat_t *x_ksp; 303 304 kcondvar_t x_dis_cv; /* to signal when disconnect attempt */ 305 /* is complete */ 306 zoneid_t x_zoneid; /* zone this xprt belongs to */ 307 }; 308 309 typedef struct cm_kstat_xprt { 310 kstat_named_t x_wq; 311 kstat_named_t x_server; 312 kstat_named_t x_family; 313 kstat_named_t x_rdev; 314 kstat_named_t x_time; 315 kstat_named_t x_state; 316 kstat_named_t x_ref; 317 kstat_named_t x_port; 318 } cm_kstat_xprt_t; 319 320 static cm_kstat_xprt_t cm_kstat_template = { 321 { "write_queue", KSTAT_DATA_UINT32 }, 322 { "server", KSTAT_DATA_STRING }, 323 { "addr_family", KSTAT_DATA_UINT32 }, 324 { "device", KSTAT_DATA_UINT32 }, 325 { "time_stamp", KSTAT_DATA_UINT32 }, 326 { "status", KSTAT_DATA_UINT32 }, 327 { "ref_count", KSTAT_DATA_INT32 }, 328 { "port", KSTAT_DATA_UINT32 }, 329 }; 330 331 /* 332 * The inverse of this is connmgr_release(). 333 */ 334 #define CONN_HOLD(Cm_entry) {\ 335 mutex_enter(&(Cm_entry)->x_lock); \ 336 (Cm_entry)->x_ref++; \ 337 mutex_exit(&(Cm_entry)->x_lock); \ 338 } 339 340 341 /* 342 * Private data per rpc handle. This structure is allocated by 343 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy. 344 */ 345 typedef struct cku_private_s { 346 CLIENT cku_client; /* client handle */ 347 calllist_t cku_call; /* for dispatching calls */ 348 struct rpc_err cku_err; /* error status */ 349 350 struct netbuf cku_srcaddr; /* source address for retries */ 351 int cku_addrfmly; /* for binding port */ 352 struct netbuf cku_addr; /* remote address */ 353 dev_t cku_device; /* device to use */ 354 uint_t cku_flags; 355 #define CKU_ONQUEUE 0x1 356 #define CKU_SENT 0x2 357 358 bool_t cku_progress; /* for CLSET_PROGRESS */ 359 uint32_t cku_xid; /* current XID */ 360 clock_t cku_ctime; /* time stamp of when */ 361 /* connection was created */ 362 uint_t cku_recv_attempts; 363 XDR cku_outxdr; /* xdr routine for output */ 364 XDR cku_inxdr; /* xdr routine for input */ 365 char cku_rpchdr[WIRE_HDR_SIZE + 4]; 366 /* pre-serialized rpc header */ 367 368 uint_t cku_outbuflen; /* default output mblk length */ 369 struct cred *cku_cred; /* credentials */ 370 bool_t cku_nodelayonerr; 371 /* for CLSET_NODELAYONERR */ 372 int cku_useresvport; /* Use reserved port */ 373 struct rpc_cots_client *cku_stats; /* stats for zone */ 374 } cku_private_t; 375 376 static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *, 377 const struct timeval *, struct netbuf *, int, struct netbuf *, 378 struct rpc_err *, bool_t, bool_t); 379 380 static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *, 381 int, calllist_t *, int *, bool_t reconnect, 382 const struct timeval *, bool_t); 383 384 static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *); 385 static void connmgr_sndrel(struct cm_xprt *); 386 static void connmgr_snddis(struct cm_xprt *); 387 static void connmgr_close(struct cm_xprt *); 388 static void connmgr_release(struct cm_xprt *); 389 static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *, 390 cku_private_t *); 391 392 static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *, 393 struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t, 394 bool_t, int); 395 396 static void connmgr_cancelconn(struct cm_xprt *); 397 static enum clnt_stat connmgr_cwait(struct cm_xprt *, const struct timeval *, 398 bool_t); 399 static void connmgr_dis_and_wait(struct cm_xprt *); 400 401 static void clnt_dispatch_send(queue_t *, mblk_t *, calllist_t *, uint_t, 402 uint_t); 403 404 static int clnt_delay(clock_t, bool_t); 405 406 static int waitforack(calllist_t *, t_scalar_t, const struct timeval *, bool_t); 407 408 /* 409 * Operations vector for TCP/IP based RPC 410 */ 411 static struct clnt_ops tcp_ops = { 412 clnt_cots_kcallit, /* do rpc call */ 413 clnt_cots_kabort, /* abort call */ 414 clnt_cots_kerror, /* return error status */ 415 clnt_cots_kfreeres, /* free results */ 416 clnt_cots_kdestroy, /* destroy rpc handle */ 417 clnt_cots_kcontrol, /* the ioctl() of rpc */ 418 clnt_cots_ksettimers, /* set retry timers */ 419 }; 420 421 static int rpc_kstat_instance = 0; /* keeps the current instance */ 422 /* number for the next kstat_create */ 423 424 static struct cm_xprt *cm_hd = NULL; 425 static kmutex_t connmgr_lock; /* for connection mngr's list of transports */ 426 427 extern kmutex_t clnt_max_msg_lock; 428 429 static calllist_t *clnt_pending = NULL; 430 extern kmutex_t clnt_pending_lock; 431 432 static int clnt_cots_hash_size = DEFAULT_HASH_SIZE; 433 434 static call_table_t *cots_call_ht; 435 436 static const struct rpc_cots_client { 437 kstat_named_t rccalls; 438 kstat_named_t rcbadcalls; 439 kstat_named_t rcbadxids; 440 kstat_named_t rctimeouts; 441 kstat_named_t rcnewcreds; 442 kstat_named_t rcbadverfs; 443 kstat_named_t rctimers; 444 kstat_named_t rccantconn; 445 kstat_named_t rcnomem; 446 kstat_named_t rcintrs; 447 } cots_rcstat_tmpl = { 448 { "calls", KSTAT_DATA_UINT64 }, 449 { "badcalls", KSTAT_DATA_UINT64 }, 450 { "badxids", KSTAT_DATA_UINT64 }, 451 { "timeouts", KSTAT_DATA_UINT64 }, 452 { "newcreds", KSTAT_DATA_UINT64 }, 453 { "badverfs", KSTAT_DATA_UINT64 }, 454 { "timers", KSTAT_DATA_UINT64 }, 455 { "cantconn", KSTAT_DATA_UINT64 }, 456 { "nomem", KSTAT_DATA_UINT64 }, 457 { "interrupts", KSTAT_DATA_UINT64 } 458 }; 459 460 #define COTSRCSTAT_INCR(p, x) \ 461 atomic_add_64(&(p)->x.value.ui64, 1) 462 463 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */ 464 static int clnt_max_conns = CLNT_MAX_CONNS; 465 466 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */ 467 /* connection reset */ 468 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */ 469 470 471 static int clnt_cots_min_tout = CLNT_MIN_TIMEOUT; 472 static int clnt_cots_min_conntout = CLNT_MIN_CONNTIMEOUT; 473 474 /* 475 * Limit the number of times we will attempt to receive a reply without 476 * re-sending a response. 477 */ 478 #define CLNT_MAXRECV_WITHOUT_RETRY 3 479 static uint_t clnt_cots_maxrecv = CLNT_MAXRECV_WITHOUT_RETRY; 480 481 uint_t *clnt_max_msg_sizep; 482 void (*clnt_stop_idle)(queue_t *wq); 483 484 #define ptoh(p) (&((p)->cku_client)) 485 #define htop(h) ((cku_private_t *)((h)->cl_private)) 486 487 /* 488 * Times to retry 489 */ 490 #define REFRESHES 2 /* authentication refreshes */ 491 492 /* 493 * The following is used to determine the global default behavior for 494 * COTS when binding to a local port. 495 * 496 * If the value is set to 1 the default will be to select a reserved 497 * (aka privileged) port, if the value is zero the default will be to 498 * use non-reserved ports. Users of kRPC may override this by using 499 * CLNT_CONTROL() and CLSET_BINDRESVPORT. 500 */ 501 static int clnt_cots_do_bindresvport = 1; 502 503 static zone_key_t zone_cots_key; 504 505 /* 506 * We need to do this after all kernel threads in the zone have exited. 507 */ 508 /* ARGSUSED */ 509 static void 510 clnt_zone_destroy(zoneid_t zoneid, void *unused) 511 { 512 struct cm_xprt **cmp; 513 struct cm_xprt *cm_entry; 514 struct cm_xprt *freelist = NULL; 515 516 mutex_enter(&connmgr_lock); 517 cmp = &cm_hd; 518 while ((cm_entry = *cmp) != NULL) { 519 if (cm_entry->x_zoneid == zoneid) { 520 *cmp = cm_entry->x_next; 521 cm_entry->x_next = freelist; 522 freelist = cm_entry; 523 } else { 524 cmp = &cm_entry->x_next; 525 } 526 } 527 mutex_exit(&connmgr_lock); 528 while ((cm_entry = freelist) != NULL) { 529 freelist = cm_entry->x_next; 530 connmgr_close(cm_entry); 531 } 532 } 533 534 int 535 clnt_cots_kcreate(dev_t dev, struct netbuf *addr, int family, rpcprog_t prog, 536 rpcvers_t vers, uint_t max_msgsize, cred_t *cred, CLIENT **ncl) 537 { 538 CLIENT *h; 539 cku_private_t *p; 540 struct rpc_msg call_msg; 541 struct rpcstat *rpcstat; 542 543 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog); 544 545 rpcstat = zone_getspecific(rpcstat_zone_key, rpc_zone()); 546 ASSERT(rpcstat != NULL); 547 548 /* Allocate and intialize the client handle. */ 549 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 550 551 h = ptoh(p); 552 553 h->cl_private = (caddr_t)p; 554 h->cl_auth = authkern_create(); 555 h->cl_ops = &tcp_ops; 556 557 cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL); 558 mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL); 559 560 /* 561 * If the current sanity check size in rpcmod is smaller 562 * than the size needed, then increase the sanity check. 563 */ 564 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 565 max_msgsize > *clnt_max_msg_sizep) { 566 mutex_enter(&clnt_max_msg_lock); 567 if (max_msgsize > *clnt_max_msg_sizep) 568 *clnt_max_msg_sizep = max_msgsize; 569 mutex_exit(&clnt_max_msg_lock); 570 } 571 572 p->cku_outbuflen = COTS_DEFAULT_ALLOCSIZE; 573 574 /* Preserialize the call message header */ 575 576 call_msg.rm_xid = 0; 577 call_msg.rm_direction = CALL; 578 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 579 call_msg.rm_call.cb_prog = prog; 580 call_msg.rm_call.cb_vers = vers; 581 582 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, WIRE_HDR_SIZE, XDR_ENCODE); 583 584 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 585 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization " 586 "error\n"); 587 auth_destroy(h->cl_auth); 588 kmem_free(p, sizeof (cku_private_t)); 589 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n"); 590 return (EINVAL); /* XXX */ 591 } 592 593 /* 594 * The zalloc initialized the fields below. 595 * p->cku_xid = 0; 596 * p->cku_flags = 0; 597 * p->cku_srcaddr.len = 0; 598 * p->cku_srcaddr.maxlen = 0; 599 */ 600 601 p->cku_cred = cred; 602 p->cku_device = dev; 603 p->cku_addrfmly = family; 604 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 605 p->cku_addr.maxlen = addr->maxlen; 606 p->cku_addr.len = addr->len; 607 bcopy(addr->buf, p->cku_addr.buf, addr->len); 608 p->cku_stats = rpcstat->rpc_cots_client; 609 p->cku_useresvport = -1; /* value is has not been set */ 610 611 *ncl = h; 612 return (0); 613 } 614 615 /*ARGSUSED*/ 616 static void 617 clnt_cots_kabort(CLIENT *h) 618 { 619 } 620 621 /* 622 * Return error info on this handle. 623 */ 624 static void 625 clnt_cots_kerror(CLIENT *h, struct rpc_err *err) 626 { 627 /* LINTED pointer alignment */ 628 cku_private_t *p = htop(h); 629 630 *err = p->cku_err; 631 } 632 633 static bool_t 634 clnt_cots_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 635 { 636 /* LINTED pointer alignment */ 637 cku_private_t *p = htop(h); 638 XDR *xdrs; 639 640 xdrs = &(p->cku_outxdr); 641 xdrs->x_op = XDR_FREE; 642 return ((*xdr_res)(xdrs, res_ptr)); 643 } 644 645 static bool_t 646 clnt_cots_kcontrol(CLIENT *h, int cmd, char *arg) 647 { 648 cku_private_t *p = htop(h); 649 650 switch (cmd) { 651 case CLSET_PROGRESS: 652 p->cku_progress = TRUE; 653 return (TRUE); 654 655 case CLSET_XID: 656 if (arg == NULL) 657 return (FALSE); 658 659 p->cku_xid = *((uint32_t *)arg); 660 return (TRUE); 661 662 case CLGET_XID: 663 if (arg == NULL) 664 return (FALSE); 665 666 *((uint32_t *)arg) = p->cku_xid; 667 return (TRUE); 668 669 case CLSET_NODELAYONERR: 670 if (arg == NULL) 671 return (FALSE); 672 673 if (*((bool_t *)arg) == TRUE) { 674 p->cku_nodelayonerr = TRUE; 675 return (TRUE); 676 } 677 if (*((bool_t *)arg) == FALSE) { 678 p->cku_nodelayonerr = FALSE; 679 return (TRUE); 680 } 681 return (FALSE); 682 683 case CLGET_NODELAYONERR: 684 if (arg == NULL) 685 return (FALSE); 686 687 *((bool_t *)arg) = p->cku_nodelayonerr; 688 return (TRUE); 689 690 case CLSET_BINDRESVPORT: 691 if (arg == NULL) 692 return (FALSE); 693 694 if (*(int *)arg != 1 && *(int *)arg != 0) 695 return (FALSE); 696 697 p->cku_useresvport = *(int *)arg; 698 699 return (TRUE); 700 701 case CLGET_BINDRESVPORT: 702 if (arg == NULL) 703 return (FALSE); 704 705 *(int *)arg = p->cku_useresvport; 706 707 return (TRUE); 708 709 default: 710 return (FALSE); 711 } 712 } 713 714 /* 715 * Destroy rpc handle. Frees the space used for output buffer, 716 * private data, and handle structure. 717 */ 718 static void 719 clnt_cots_kdestroy(CLIENT *h) 720 { 721 /* LINTED pointer alignment */ 722 cku_private_t *p = htop(h); 723 calllist_t *call = &p->cku_call; 724 725 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h); 726 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p->cku_xid); 727 728 if (p->cku_flags & CKU_ONQUEUE) { 729 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x " 730 "from dispatch list\n", p->cku_xid); 731 call_table_remove(call); 732 } 733 734 if (call->call_reply) 735 freemsg(call->call_reply); 736 cv_destroy(&call->call_cv); 737 mutex_destroy(&call->call_lock); 738 739 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen); 740 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 741 kmem_free(p, sizeof (*p)); 742 } 743 744 static int clnt_cots_pulls; 745 #define RM_HDR_SIZE 4 /* record mark header size */ 746 747 /* 748 * Call remote procedure. 749 */ 750 static enum clnt_stat 751 clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 752 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 753 { 754 /* LINTED pointer alignment */ 755 cku_private_t *p = htop(h); 756 calllist_t *call = &p->cku_call; 757 XDR *xdrs; 758 struct rpc_msg reply_msg; 759 mblk_t *mp; 760 #ifdef RPCDEBUG 761 clock_t time_sent; 762 #endif 763 struct netbuf *retryaddr; 764 struct cm_xprt *cm_entry = NULL; 765 queue_t *wq; 766 int len; 767 int mpsize; 768 int refreshes = REFRESHES; 769 int interrupted; 770 int tidu_size; 771 enum clnt_stat status; 772 struct timeval cwait; 773 bool_t delay_first = FALSE; 774 clock_t ticks; 775 776 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum); 777 COTSRCSTAT_INCR(p->cku_stats, rccalls); 778 779 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec); 780 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec); 781 782 /* 783 * Bug ID 1240234: 784 * Look out for zero length timeouts. We don't want to 785 * wait zero seconds for a connection to be established. 786 */ 787 if (wait.tv_sec < clnt_cots_min_conntout) { 788 cwait.tv_sec = clnt_cots_min_conntout; 789 cwait.tv_usec = 0; 790 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,", 791 wait.tv_sec); 792 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout); 793 } else { 794 cwait = wait; 795 } 796 797 call_again: 798 if (cm_entry) { 799 connmgr_release(cm_entry); 800 cm_entry = NULL; 801 } 802 803 mp = NULL; 804 805 /* 806 * If the call is not a retry, allocate a new xid and cache it 807 * for future retries. 808 * Bug ID 1246045: 809 * Treat call as a retry for purposes of binding the source 810 * port only if we actually attempted to send anything on 811 * the previous call. 812 */ 813 if (p->cku_xid == 0) { 814 p->cku_xid = alloc_xid(); 815 /* 816 * We need to ASSERT here that our xid != 0 because this 817 * determines whether or not our call record gets placed on 818 * the hash table or the linked list. By design, we mandate 819 * that RPC calls over cots must have xid's != 0, so we can 820 * ensure proper management of the hash table. 821 */ 822 ASSERT(p->cku_xid != 0); 823 824 retryaddr = NULL; 825 p->cku_flags &= ~CKU_SENT; 826 827 if (p->cku_flags & CKU_ONQUEUE) { 828 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old" 829 " one (%p)\n", (void *)call); 830 call_table_remove(call); 831 p->cku_flags &= ~CKU_ONQUEUE; 832 RPCLOG(64, "clnt_cots_kcallit: removing call from " 833 "dispatch list because xid was zero (now 0x%x)\n", 834 p->cku_xid); 835 } 836 837 if (call->call_reply != NULL) { 838 freemsg(call->call_reply); 839 call->call_reply = NULL; 840 } 841 } else if (p->cku_srcaddr.buf == NULL || p->cku_srcaddr.len == 0) { 842 retryaddr = NULL; 843 844 } else if (p->cku_flags & CKU_SENT) { 845 retryaddr = &p->cku_srcaddr; 846 847 } else { 848 /* 849 * Bug ID 1246045: Nothing was sent, so set retryaddr to 850 * NULL and let connmgr_get() bind to any source port it 851 * can get. 852 */ 853 retryaddr = NULL; 854 } 855 856 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p->cku_xid); 857 RPCLOG(64, " flags = 0x%x\n", p->cku_flags); 858 859 p->cku_err.re_status = RPC_TIMEDOUT; 860 p->cku_err.re_errno = p->cku_err.re_terrno = 0; 861 862 cm_entry = connmgr_wrapget(retryaddr, &cwait, p); 863 864 if (cm_entry == NULL) { 865 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n", 866 clnt_sperrno(p->cku_err.re_status)); 867 868 /* 869 * The reasons why we fail to create a connection are 870 * varied. In most cases we don't want the caller to 871 * immediately retry. This could have one or more 872 * bad effects. This includes flooding the net with 873 * connect requests to ports with no listener; a hard 874 * kernel loop due to all the "reserved" TCP ports being 875 * in use. 876 */ 877 delay_first = TRUE; 878 879 /* 880 * Even if we end up returning EINTR, we still count a 881 * a "can't connect", because the connection manager 882 * might have been committed to waiting for or timing out on 883 * a connection. 884 */ 885 COTSRCSTAT_INCR(p->cku_stats, rccantconn); 886 switch (p->cku_err.re_status) { 887 case RPC_INTR: 888 p->cku_err.re_errno = EINTR; 889 890 /* 891 * No need to delay because a UNIX signal(2) 892 * interrupted us. The caller likely won't 893 * retry the CLNT_CALL() and even if it does, 894 * we assume the caller knows what it is doing. 895 */ 896 delay_first = FALSE; 897 break; 898 899 case RPC_TIMEDOUT: 900 p->cku_err.re_errno = ETIMEDOUT; 901 902 /* 903 * No need to delay because timed out already 904 * on the connection request and assume that the 905 * transport time out is longer than our minimum 906 * timeout, or least not too much smaller. 907 */ 908 delay_first = FALSE; 909 break; 910 911 case RPC_SYSTEMERROR: 912 case RPC_TLIERROR: 913 /* 914 * We want to delay here because a transient 915 * system error has a better chance of going away 916 * if we delay a bit. If it's not transient, then 917 * we don't want end up in a hard kernel loop 918 * due to retries. 919 */ 920 ASSERT(p->cku_err.re_errno != 0); 921 break; 922 923 924 case RPC_CANTCONNECT: 925 /* 926 * RPC_CANTCONNECT is set on T_ERROR_ACK which 927 * implies some error down in the TCP layer or 928 * below. If cku_nodelayonerror is set then we 929 * assume the caller knows not to try too hard. 930 */ 931 RPCLOG0(8, "clnt_cots_kcallit: connection failed,"); 932 RPCLOG0(8, " re_status=RPC_CANTCONNECT,"); 933 RPCLOG(8, " re_errno=%d,", p->cku_err.re_errno); 934 RPCLOG(8, " cku_nodelayonerr=%d", p->cku_nodelayonerr); 935 if (p->cku_nodelayonerr == TRUE) 936 delay_first = FALSE; 937 938 p->cku_err.re_errno = EIO; 939 940 break; 941 942 case RPC_XPRTFAILED: 943 /* 944 * We want to delay here because we likely 945 * got a refused connection. 946 */ 947 if (p->cku_err.re_errno != 0) 948 break; 949 950 /* fall thru */ 951 952 default: 953 /* 954 * We delay here because it is better to err 955 * on the side of caution. If we got here then 956 * status could have been RPC_SUCCESS, but we 957 * know that we did not get a connection, so 958 * force the rpc status to RPC_CANTCONNECT. 959 */ 960 p->cku_err.re_status = RPC_CANTCONNECT; 961 p->cku_err.re_errno = EIO; 962 break; 963 } 964 if (delay_first == TRUE) 965 ticks = clnt_cots_min_tout * drv_usectohz(1000000); 966 goto cots_done; 967 } 968 969 /* 970 * If we've never sent any request on this connection (send count 971 * is zero, or the connection has been reset), cache the 972 * the connection's create time and send a request (possibly a retry) 973 */ 974 if ((p->cku_flags & CKU_SENT) == 0 || 975 p->cku_ctime != cm_entry->x_ctime) { 976 p->cku_ctime = cm_entry->x_ctime; 977 978 } else if ((p->cku_flags & CKU_SENT) && (p->cku_flags & CKU_ONQUEUE) && 979 (call->call_reply != NULL || 980 p->cku_recv_attempts < clnt_cots_maxrecv)) { 981 982 /* 983 * If we've sent a request and our call is on the dispatch 984 * queue and we haven't made too many receive attempts, then 985 * don't re-send, just receive. 986 */ 987 p->cku_recv_attempts++; 988 goto read_again; 989 } 990 991 /* 992 * Now we create the RPC request in a STREAMS message. We have to do 993 * this after the call to connmgr_get so that we have the correct 994 * TIDU size for the transport. 995 */ 996 tidu_size = cm_entry->x_tidu_size; 997 len = MSG_OFFSET + MAX(tidu_size, RM_HDR_SIZE + WIRE_HDR_SIZE); 998 999 while ((mp = allocb(len, BPRI_MED)) == NULL) { 1000 if (strwaitbuf(len, BPRI_MED)) { 1001 p->cku_err.re_status = RPC_SYSTEMERROR; 1002 p->cku_err.re_errno = ENOSR; 1003 COTSRCSTAT_INCR(p->cku_stats, rcnomem); 1004 goto cots_done; 1005 } 1006 } 1007 xdrs = &p->cku_outxdr; 1008 xdrmblk_init(xdrs, mp, XDR_ENCODE, tidu_size); 1009 mpsize = MBLKSIZE(mp); 1010 ASSERT(mpsize >= len); 1011 ASSERT(mp->b_rptr == mp->b_datap->db_base); 1012 1013 /* 1014 * If the size of mblk is not appreciably larger than what we 1015 * asked, then resize the mblk to exactly len bytes. The reason for 1016 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 1017 * (from TCP over ethernet), and the arguments to the RPC require 1018 * 2800 bytes. Ideally we want the protocol to render two 1019 * ~1400 byte segments over the wire. However if allocb() gives us a 2k 1020 * mblk, and we allocate a second mblk for the remainder, the protocol 1021 * module may generate 3 segments over the wire: 1022 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and 1023 * 892 for the third. If we "waste" 448 bytes in the first mblk, 1024 * the XDR encoding will generate two ~1400 byte mblks, and the 1025 * protocol module is more likely to produce properly sized segments. 1026 */ 1027 if ((mpsize >> 1) <= len) 1028 mp->b_rptr += (mpsize - len); 1029 1030 /* 1031 * Adjust b_rptr to reserve space for the non-data protocol headers 1032 * any downstream modules might like to add, and for the 1033 * record marking header. 1034 */ 1035 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 1036 1037 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 1038 /* Copy in the preserialized RPC header information. */ 1039 bcopy(p->cku_rpchdr, mp->b_rptr, WIRE_HDR_SIZE); 1040 1041 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */ 1042 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base + 1043 WIRE_HDR_SIZE)); 1044 1045 ASSERT((mp->b_wptr - mp->b_rptr) == WIRE_HDR_SIZE); 1046 1047 /* Serialize the procedure number and the arguments. */ 1048 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 1049 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 1050 (!(*xdr_args)(xdrs, argsp))) { 1051 p->cku_err.re_status = RPC_CANTENCODEARGS; 1052 p->cku_err.re_errno = EIO; 1053 goto cots_done; 1054 } 1055 1056 (*(uint32_t *)(mp->b_rptr)) = p->cku_xid; 1057 } else { 1058 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[WIRE_HDR_SIZE]; 1059 IXDR_PUT_U_INT32(uproc, procnum); 1060 1061 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 1062 1063 /* Use XDR_SETPOS() to set the b_wptr. */ 1064 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 1065 1066 /* Serialize the procedure number and the arguments. */ 1067 if (!AUTH_WRAP(h->cl_auth, p->cku_rpchdr, WIRE_HDR_SIZE+4, 1068 xdrs, xdr_args, argsp)) { 1069 p->cku_err.re_status = RPC_CANTENCODEARGS; 1070 p->cku_err.re_errno = EIO; 1071 goto cots_done; 1072 } 1073 } 1074 1075 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n", 1076 tidu_size); 1077 1078 wq = cm_entry->x_wq; 1079 clnt_dispatch_send(wq, mp, call, p->cku_xid, 1080 (p->cku_flags & CKU_ONQUEUE)); 1081 1082 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n", 1083 (uint_t)p->cku_xid); 1084 p->cku_flags = (CKU_ONQUEUE|CKU_SENT); 1085 p->cku_recv_attempts = 1; 1086 1087 #ifdef RPCDEBUG 1088 time_sent = lbolt; 1089 #endif 1090 1091 /* 1092 * Wait for a reply or a timeout. If there is no error or timeout, 1093 * (both indicated by call_status), call->call_reply will contain 1094 * the RPC reply message. 1095 */ 1096 read_again: 1097 mutex_enter(&call->call_lock); 1098 interrupted = 0; 1099 if (call->call_status == RPC_TIMEDOUT) { 1100 /* 1101 * Indicate that the lwp is not to be stopped while waiting 1102 * for this network traffic. This is to avoid deadlock while 1103 * debugging a process via /proc and also to avoid recursive 1104 * mutex_enter()s due to NFS page faults while stopping 1105 * (NFS holds locks when it calls here). 1106 */ 1107 clock_t cv_wait_ret; 1108 clock_t timout; 1109 clock_t oldlbolt; 1110 1111 klwp_t *lwp = ttolwp(curthread); 1112 1113 if (lwp != NULL) 1114 lwp->lwp_nostop++; 1115 1116 oldlbolt = lbolt; 1117 timout = wait.tv_sec * drv_usectohz(1000000) + 1118 drv_usectohz(wait.tv_usec) + oldlbolt; 1119 /* 1120 * Iterate until the call_status is changed to something 1121 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns 1122 * something <=0 zero. The latter means that we timed 1123 * out. 1124 */ 1125 if (h->cl_nosignal) 1126 while ((cv_wait_ret = cv_timedwait(&call->call_cv, 1127 &call->call_lock, timout)) > 0 && 1128 call->call_status == RPC_TIMEDOUT); 1129 else 1130 while ((cv_wait_ret = cv_timedwait_sig( 1131 &call->call_cv, 1132 &call->call_lock, timout)) > 0 && 1133 call->call_status == RPC_TIMEDOUT); 1134 1135 switch (cv_wait_ret) { 1136 case 0: 1137 /* 1138 * If we got out of the above loop with 1139 * cv_timedwait_sig() returning 0, then we were 1140 * interrupted regardless what call_status is. 1141 */ 1142 interrupted = 1; 1143 break; 1144 case -1: 1145 /* cv_timedwait_sig() timed out */ 1146 break; 1147 default: 1148 1149 /* 1150 * We were cv_signaled(). If we didn't 1151 * get a successful call_status and returned 1152 * before time expired, delay up to clnt_cots_min_tout 1153 * seconds so that the caller doesn't immediately 1154 * try to call us again and thus force the 1155 * same condition that got us here (such 1156 * as a RPC_XPRTFAILED due to the server not 1157 * listening on the end-point. 1158 */ 1159 if (call->call_status != RPC_SUCCESS) { 1160 clock_t curlbolt; 1161 clock_t diff; 1162 1163 curlbolt = ddi_get_lbolt(); 1164 ticks = clnt_cots_min_tout * 1165 drv_usectohz(1000000); 1166 diff = curlbolt - oldlbolt; 1167 if (diff < ticks) { 1168 delay_first = TRUE; 1169 if (diff > 0) 1170 ticks -= diff; 1171 } 1172 } 1173 break; 1174 } 1175 1176 if (lwp != NULL) 1177 lwp->lwp_nostop--; 1178 } 1179 /* 1180 * Get the reply message, if any. This will be freed at the end 1181 * whether or not an error occurred. 1182 */ 1183 mp = call->call_reply; 1184 call->call_reply = NULL; 1185 1186 /* 1187 * call_err is the error info when the call is on dispatch queue. 1188 * cku_err is the error info returned to the caller. 1189 * Sync cku_err with call_err for local message processing. 1190 */ 1191 1192 status = call->call_status; 1193 p->cku_err = call->call_err; 1194 mutex_exit(&call->call_lock); 1195 1196 if (status != RPC_SUCCESS) { 1197 switch (status) { 1198 case RPC_TIMEDOUT: 1199 if (interrupted) { 1200 COTSRCSTAT_INCR(p->cku_stats, rcintrs); 1201 p->cku_err.re_status = RPC_INTR; 1202 p->cku_err.re_errno = EINTR; 1203 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x", 1204 p->cku_xid); 1205 RPCLOG(1, "signal interrupted at %ld", lbolt); 1206 RPCLOG(1, ", was sent at %ld\n", time_sent); 1207 } else { 1208 COTSRCSTAT_INCR(p->cku_stats, rctimeouts); 1209 p->cku_err.re_errno = ETIMEDOUT; 1210 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld", 1211 lbolt); 1212 RPCLOG(1, ", was sent at %ld\n", time_sent); 1213 } 1214 break; 1215 1216 case RPC_XPRTFAILED: 1217 if (p->cku_err.re_errno == 0) 1218 p->cku_err.re_errno = EIO; 1219 1220 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n", 1221 p->cku_err.re_errno); 1222 break; 1223 1224 case RPC_SYSTEMERROR: 1225 ASSERT(p->cku_err.re_errno); 1226 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n", 1227 p->cku_err.re_errno); 1228 break; 1229 1230 default: 1231 p->cku_err.re_status = RPC_SYSTEMERROR; 1232 p->cku_err.re_errno = EIO; 1233 RPCLOG(1, "clnt_cots_kcallit: error: %s\n", 1234 clnt_sperrno(status)); 1235 break; 1236 } 1237 if (p->cku_err.re_status != RPC_TIMEDOUT) { 1238 1239 if (p->cku_flags & CKU_ONQUEUE) { 1240 call_table_remove(call); 1241 p->cku_flags &= ~CKU_ONQUEUE; 1242 } 1243 1244 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x " 1245 "taken off dispatch list\n", p->cku_xid); 1246 if (call->call_reply) { 1247 freemsg(call->call_reply); 1248 call->call_reply = NULL; 1249 } 1250 } else if (wait.tv_sec != 0) { 1251 /* 1252 * We've sent the request over TCP and so we have 1253 * every reason to believe it will get 1254 * delivered. In which case returning a timeout is not 1255 * appropriate. 1256 */ 1257 if (p->cku_progress == TRUE && 1258 p->cku_recv_attempts < clnt_cots_maxrecv) { 1259 p->cku_err.re_status = RPC_INPROGRESS; 1260 } 1261 } 1262 goto cots_done; 1263 } 1264 1265 xdrs = &p->cku_inxdr; 1266 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 1267 1268 reply_msg.rm_direction = REPLY; 1269 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1270 reply_msg.acpted_rply.ar_stat = SUCCESS; 1271 1272 reply_msg.acpted_rply.ar_verf = _null_auth; 1273 /* 1274 * xdr_results will be done in AUTH_UNWRAP. 1275 */ 1276 reply_msg.acpted_rply.ar_results.where = NULL; 1277 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1278 1279 if (xdr_replymsg(xdrs, &reply_msg)) { 1280 enum clnt_stat re_status; 1281 1282 _seterr_reply(&reply_msg, &p->cku_err); 1283 1284 re_status = p->cku_err.re_status; 1285 if (re_status == RPC_SUCCESS) { 1286 /* 1287 * Reply is good, check auth. 1288 */ 1289 if (!AUTH_VALIDATE(h->cl_auth, 1290 &reply_msg.acpted_rply.ar_verf)) { 1291 COTSRCSTAT_INCR(p->cku_stats, rcbadverfs); 1292 RPCLOG0(1, "clnt_cots_kcallit: validation " 1293 "failure\n"); 1294 freemsg(mp); 1295 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1296 mutex_enter(&call->call_lock); 1297 if (call->call_reply == NULL) 1298 call->call_status = RPC_TIMEDOUT; 1299 mutex_exit(&call->call_lock); 1300 goto read_again; 1301 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs, 1302 xdr_results, resultsp)) { 1303 RPCLOG0(1, "clnt_cots_kcallit: validation " 1304 "failure (unwrap)\n"); 1305 p->cku_err.re_status = RPC_CANTDECODERES; 1306 p->cku_err.re_errno = EIO; 1307 } 1308 } else { 1309 /* set errno in case we can't recover */ 1310 if (re_status != RPC_VERSMISMATCH && 1311 re_status != RPC_AUTHERROR && 1312 re_status != RPC_PROGVERSMISMATCH) 1313 p->cku_err.re_errno = EIO; 1314 1315 if (re_status == RPC_AUTHERROR) { 1316 /* 1317 * Maybe our credential need to be refreshed 1318 */ 1319 if (cm_entry) { 1320 /* 1321 * There is the potential that the 1322 * cm_entry has/will be marked dead, 1323 * so drop the connection altogether, 1324 * force REFRESH to establish new 1325 * connection. 1326 */ 1327 connmgr_cancelconn(cm_entry); 1328 cm_entry = NULL; 1329 } 1330 1331 if ((refreshes > 0) && 1332 AUTH_REFRESH(h->cl_auth, &reply_msg, 1333 p->cku_cred)) { 1334 refreshes--; 1335 (void) xdr_rpc_free_verifier(xdrs, 1336 &reply_msg); 1337 freemsg(mp); 1338 mp = NULL; 1339 1340 if (p->cku_flags & CKU_ONQUEUE) { 1341 call_table_remove(call); 1342 p->cku_flags &= ~CKU_ONQUEUE; 1343 } 1344 1345 RPCLOG(64, 1346 "clnt_cots_kcallit: AUTH_ERROR, xid" 1347 " 0x%x removed off dispatch list\n", 1348 p->cku_xid); 1349 if (call->call_reply) { 1350 freemsg(call->call_reply); 1351 call->call_reply = NULL; 1352 } 1353 1354 COTSRCSTAT_INCR(p->cku_stats, 1355 rcbadcalls); 1356 COTSRCSTAT_INCR(p->cku_stats, 1357 rcnewcreds); 1358 goto call_again; 1359 } 1360 1361 /* 1362 * We have used the client handle to 1363 * do an AUTH_REFRESH and the RPC status may 1364 * be set to RPC_SUCCESS; Let's make sure to 1365 * set it to RPC_AUTHERROR. 1366 */ 1367 p->cku_err.re_status = RPC_AUTHERROR; 1368 1369 /* 1370 * Map recoverable and unrecoverable 1371 * authentication errors to appropriate errno 1372 */ 1373 switch (p->cku_err.re_why) { 1374 case AUTH_TOOWEAK: 1375 /* 1376 * This could be a failure where the 1377 * server requires use of a reserved 1378 * port, check and optionally set the 1379 * client handle useresvport trying 1380 * one more time. Next go round we 1381 * fall out with the tooweak error. 1382 */ 1383 if (p->cku_useresvport != 1) { 1384 p->cku_useresvport = 1; 1385 p->cku_xid = 0; 1386 (void) xdr_rpc_free_verifier 1387 (xdrs, &reply_msg); 1388 freemsg(mp); 1389 goto call_again; 1390 } 1391 /* FALLTHRU */ 1392 case AUTH_BADCRED: 1393 case AUTH_BADVERF: 1394 case AUTH_INVALIDRESP: 1395 case AUTH_FAILED: 1396 case RPCSEC_GSS_NOCRED: 1397 case RPCSEC_GSS_FAILED: 1398 p->cku_err.re_errno = EACCES; 1399 break; 1400 case AUTH_REJECTEDCRED: 1401 case AUTH_REJECTEDVERF: 1402 default: p->cku_err.re_errno = EIO; 1403 break; 1404 } 1405 RPCLOG(1, "clnt_cots_kcallit : authentication" 1406 " failed with RPC_AUTHERROR of type %d\n", 1407 (int)p->cku_err.re_why); 1408 } 1409 } 1410 } else { 1411 /* reply didn't decode properly. */ 1412 p->cku_err.re_status = RPC_CANTDECODERES; 1413 p->cku_err.re_errno = EIO; 1414 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n"); 1415 } 1416 1417 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1418 1419 if (p->cku_flags & CKU_ONQUEUE) { 1420 call_table_remove(call); 1421 p->cku_flags &= ~CKU_ONQUEUE; 1422 } 1423 1424 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list", 1425 p->cku_xid); 1426 RPCLOG(64, " status is %s\n", clnt_sperrno(p->cku_err.re_status)); 1427 cots_done: 1428 if (cm_entry) 1429 connmgr_release(cm_entry); 1430 1431 if (mp != NULL) 1432 freemsg(mp); 1433 if ((p->cku_flags & CKU_ONQUEUE) == 0 && call->call_reply) { 1434 freemsg(call->call_reply); 1435 call->call_reply = NULL; 1436 } 1437 if (p->cku_err.re_status != RPC_SUCCESS) { 1438 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n"); 1439 COTSRCSTAT_INCR(p->cku_stats, rcbadcalls); 1440 } 1441 1442 /* 1443 * No point in delaying if the zone is going away. 1444 */ 1445 if (delay_first == TRUE && 1446 !(zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)) { 1447 if (clnt_delay(ticks, h->cl_nosignal) == EINTR) { 1448 p->cku_err.re_errno = EINTR; 1449 p->cku_err.re_status = RPC_INTR; 1450 } 1451 } 1452 return (p->cku_err.re_status); 1453 } 1454 1455 /* 1456 * Kinit routine for cots. This sets up the correct operations in 1457 * the client handle, as the handle may have previously been a clts 1458 * handle, and clears the xid field so there is no way a new call 1459 * could be mistaken for a retry. It also sets in the handle the 1460 * information that is passed at create/kinit time but needed at 1461 * call time, as cots creates the transport at call time - device, 1462 * address of the server, protocol family. 1463 */ 1464 void 1465 clnt_cots_kinit(CLIENT *h, dev_t dev, int family, struct netbuf *addr, 1466 int max_msgsize, cred_t *cred) 1467 { 1468 /* LINTED pointer alignment */ 1469 cku_private_t *p = htop(h); 1470 calllist_t *call = &p->cku_call; 1471 1472 h->cl_ops = &tcp_ops; 1473 if (p->cku_flags & CKU_ONQUEUE) { 1474 call_table_remove(call); 1475 p->cku_flags &= ~CKU_ONQUEUE; 1476 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from" 1477 " dispatch list\n", p->cku_xid); 1478 } 1479 1480 if (call->call_reply != NULL) { 1481 freemsg(call->call_reply); 1482 call->call_reply = NULL; 1483 } 1484 1485 call->call_bucket = NULL; 1486 call->call_hash = 0; 1487 1488 /* 1489 * We don't clear cku_flags here, because clnt_cots_kcallit() 1490 * takes care of handling the cku_flags reset. 1491 */ 1492 p->cku_xid = 0; 1493 p->cku_device = dev; 1494 p->cku_addrfmly = family; 1495 p->cku_cred = cred; 1496 1497 if (p->cku_addr.maxlen < addr->len) { 1498 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 1499 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 1500 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 1501 p->cku_addr.maxlen = addr->maxlen; 1502 } 1503 1504 p->cku_addr.len = addr->len; 1505 bcopy(addr->buf, p->cku_addr.buf, addr->len); 1506 1507 /* 1508 * If the current sanity check size in rpcmod is smaller 1509 * than the size needed, then increase the sanity check. 1510 */ 1511 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 1512 max_msgsize > *clnt_max_msg_sizep) { 1513 mutex_enter(&clnt_max_msg_lock); 1514 if (max_msgsize > *clnt_max_msg_sizep) 1515 *clnt_max_msg_sizep = max_msgsize; 1516 mutex_exit(&clnt_max_msg_lock); 1517 } 1518 } 1519 1520 /* 1521 * ksettimers is a no-op for cots, with the exception of setting the xid. 1522 */ 1523 /* ARGSUSED */ 1524 static int 1525 clnt_cots_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1526 int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg, 1527 uint32_t xid) 1528 { 1529 /* LINTED pointer alignment */ 1530 cku_private_t *p = htop(h); 1531 1532 if (xid) 1533 p->cku_xid = xid; 1534 COTSRCSTAT_INCR(p->cku_stats, rctimers); 1535 return (0); 1536 } 1537 1538 extern void rpc_poptimod(struct vnode *); 1539 extern int kstr_push(struct vnode *, char *); 1540 1541 int 1542 conn_kstat_update(kstat_t *ksp, int rw) 1543 { 1544 struct cm_xprt *cm_entry; 1545 struct cm_kstat_xprt *cm_ksp_data; 1546 uchar_t *b; 1547 char *fbuf; 1548 1549 if (rw == KSTAT_WRITE) 1550 return (EACCES); 1551 if (ksp == NULL || ksp->ks_private == NULL) 1552 return (EIO); 1553 cm_entry = (struct cm_xprt *)ksp->ks_private; 1554 cm_ksp_data = (struct cm_kstat_xprt *)ksp->ks_data; 1555 1556 cm_ksp_data->x_wq.value.ui32 = (uint32_t)(uintptr_t)cm_entry->x_wq; 1557 cm_ksp_data->x_family.value.ui32 = cm_entry->x_family; 1558 cm_ksp_data->x_rdev.value.ui32 = (uint32_t)cm_entry->x_rdev; 1559 cm_ksp_data->x_time.value.ui32 = cm_entry->x_time; 1560 cm_ksp_data->x_ref.value.ui32 = cm_entry->x_ref; 1561 cm_ksp_data->x_state.value.ui32 = cm_entry->x_state_flags; 1562 1563 if (cm_entry->x_server.buf) { 1564 fbuf = cm_ksp_data->x_server.value.str.addr.ptr; 1565 if (cm_entry->x_family == AF_INET && 1566 cm_entry->x_server.len == 1567 sizeof (struct sockaddr_in)) { 1568 struct sockaddr_in *sa; 1569 sa = (struct sockaddr_in *) 1570 cm_entry->x_server.buf; 1571 b = (uchar_t *)&sa->sin_addr; 1572 (void) sprintf(fbuf, 1573 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1574 b[2] & 0xFF, b[3] & 0xFF); 1575 cm_ksp_data->x_port.value.ui32 = 1576 (uint32_t)sa->sin_port; 1577 } else if (cm_entry->x_family == AF_INET6 && 1578 cm_entry->x_server.len >= 1579 sizeof (struct sockaddr_in6)) { 1580 /* extract server IP address & port */ 1581 struct sockaddr_in6 *sin6; 1582 sin6 = (struct sockaddr_in6 *)cm_entry->x_server.buf; 1583 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, fbuf, 1584 INET6_ADDRSTRLEN); 1585 cm_ksp_data->x_port.value.ui32 = sin6->sin6_port; 1586 } else { 1587 struct sockaddr_in *sa; 1588 1589 sa = (struct sockaddr_in *)cm_entry->x_server.buf; 1590 b = (uchar_t *)&sa->sin_addr; 1591 (void) sprintf(fbuf, 1592 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1593 b[2] & 0xFF, b[3] & 0xFF); 1594 } 1595 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data->x_server) = 1596 strlen(fbuf) + 1; 1597 } 1598 1599 return (0); 1600 } 1601 1602 1603 /* 1604 * We want a version of delay which is interruptible by a UNIX signal 1605 * Return EINTR if an interrupt occured. 1606 */ 1607 static int 1608 clnt_delay(clock_t ticks, bool_t nosignal) 1609 { 1610 if (nosignal == TRUE) { 1611 delay(ticks); 1612 return (0); 1613 } 1614 return (delay_sig(ticks)); 1615 } 1616 1617 /* 1618 * Wait for a connection until a timeout, or until we are 1619 * signalled that there has been a connection state change. 1620 */ 1621 static enum clnt_stat 1622 connmgr_cwait(struct cm_xprt *cm_entry, const struct timeval *waitp, 1623 bool_t nosignal) 1624 { 1625 bool_t interrupted; 1626 clock_t timout, cv_stat; 1627 enum clnt_stat clstat; 1628 unsigned int old_state; 1629 1630 ASSERT(MUTEX_HELD(&connmgr_lock)); 1631 /* 1632 * We wait for the transport connection to be made, or an 1633 * indication that it could not be made. 1634 */ 1635 clstat = RPC_TIMEDOUT; 1636 interrupted = FALSE; 1637 1638 old_state = cm_entry->x_state_flags; 1639 /* 1640 * Now loop until cv_timedwait{_sig} returns because of 1641 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be 1642 * cv_signalled for various other reasons too. So loop 1643 * until there is a state change on the connection. 1644 */ 1645 1646 timout = waitp->tv_sec * drv_usectohz(1000000) + 1647 drv_usectohz(waitp->tv_usec) + lbolt; 1648 1649 if (nosignal) { 1650 while ((cv_stat = cv_timedwait(&cm_entry->x_conn_cv, 1651 &connmgr_lock, timout)) > 0 && 1652 cm_entry->x_state_flags == old_state) 1653 ; 1654 } else { 1655 while ((cv_stat = cv_timedwait_sig(&cm_entry->x_conn_cv, 1656 &connmgr_lock, timout)) > 0 && 1657 cm_entry->x_state_flags == old_state) 1658 ; 1659 1660 if (cv_stat == 0) /* got intr signal? */ 1661 interrupted = TRUE; 1662 } 1663 1664 if ((cm_entry->x_state_flags & (X_BADSTATES|X_CONNECTED)) == 1665 X_CONNECTED) { 1666 clstat = RPC_SUCCESS; 1667 } else { 1668 if (interrupted == TRUE) 1669 clstat = RPC_INTR; 1670 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n", 1671 clnt_sperrno(clstat)); 1672 } 1673 1674 return (clstat); 1675 } 1676 1677 /* 1678 * Primary interface for how RPC grabs a connection. 1679 */ 1680 static struct cm_xprt * 1681 connmgr_wrapget( 1682 struct netbuf *retryaddr, 1683 const struct timeval *waitp, 1684 cku_private_t *p) 1685 { 1686 struct cm_xprt *cm_entry; 1687 1688 cm_entry = connmgr_get(retryaddr, waitp, &p->cku_addr, p->cku_addrfmly, 1689 &p->cku_srcaddr, &p->cku_err, p->cku_device, 1690 p->cku_client.cl_nosignal, p->cku_useresvport); 1691 1692 if (cm_entry == NULL) { 1693 /* 1694 * Re-map the call status to RPC_INTR if the err code is 1695 * EINTR. This can happen if calls status is RPC_TLIERROR. 1696 * However, don't re-map if signalling has been turned off. 1697 * XXX Really need to create a separate thread whenever 1698 * there isn't an existing connection. 1699 */ 1700 if (p->cku_err.re_errno == EINTR) { 1701 if (p->cku_client.cl_nosignal == TRUE) 1702 p->cku_err.re_errno = EIO; 1703 else 1704 p->cku_err.re_status = RPC_INTR; 1705 } 1706 } 1707 1708 return (cm_entry); 1709 } 1710 1711 /* 1712 * Obtains a transport to the server specified in addr. If a suitable transport 1713 * does not already exist in the list of cached transports, a new connection 1714 * is created, connected, and added to the list. The connection is for sending 1715 * only - the reply message may come back on another transport connection. 1716 */ 1717 static struct cm_xprt * 1718 connmgr_get( 1719 struct netbuf *retryaddr, 1720 const struct timeval *waitp, /* changed to a ptr to converse stack */ 1721 struct netbuf *destaddr, 1722 int addrfmly, 1723 struct netbuf *srcaddr, 1724 struct rpc_err *rpcerr, 1725 dev_t device, 1726 bool_t nosignal, 1727 int useresvport) 1728 { 1729 struct cm_xprt *cm_entry; 1730 struct cm_xprt *lru_entry; 1731 struct cm_xprt **cmp; 1732 queue_t *wq; 1733 TIUSER *tiptr; 1734 int i; 1735 int retval; 1736 clock_t prev_time; 1737 int tidu_size; 1738 bool_t connected; 1739 zoneid_t zoneid = rpc_zoneid(); 1740 1741 /* 1742 * If the call is not a retry, look for a transport entry that 1743 * goes to the server of interest. 1744 */ 1745 mutex_enter(&connmgr_lock); 1746 1747 if (retryaddr == NULL) { 1748 use_new_conn: 1749 i = 0; 1750 cm_entry = lru_entry = NULL; 1751 prev_time = lbolt; 1752 1753 cmp = &cm_hd; 1754 while ((cm_entry = *cmp) != NULL) { 1755 ASSERT(cm_entry != cm_entry->x_next); 1756 /* 1757 * Garbage collect conections that are marked 1758 * for needs disconnect. 1759 */ 1760 if (cm_entry->x_needdis) { 1761 CONN_HOLD(cm_entry); 1762 connmgr_dis_and_wait(cm_entry); 1763 connmgr_release(cm_entry); 1764 /* 1765 * connmgr_lock could have been 1766 * dropped for the disconnect 1767 * processing so start over. 1768 */ 1769 goto use_new_conn; 1770 } 1771 1772 /* 1773 * Garbage collect the dead connections that have 1774 * no threads working on them. 1775 */ 1776 if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) == 1777 X_DEAD) { 1778 mutex_enter(&cm_entry->x_lock); 1779 if (cm_entry->x_ref != 0) { 1780 /* 1781 * Currently in use. 1782 * Cleanup later. 1783 */ 1784 cmp = &cm_entry->x_next; 1785 mutex_exit(&cm_entry->x_lock); 1786 continue; 1787 } 1788 mutex_exit(&cm_entry->x_lock); 1789 *cmp = cm_entry->x_next; 1790 mutex_exit(&connmgr_lock); 1791 connmgr_close(cm_entry); 1792 mutex_enter(&connmgr_lock); 1793 goto use_new_conn; 1794 } 1795 1796 1797 if ((cm_entry->x_state_flags & X_BADSTATES) == 0 && 1798 cm_entry->x_zoneid == zoneid && 1799 cm_entry->x_rdev == device && 1800 destaddr->len == cm_entry->x_server.len && 1801 bcmp(destaddr->buf, cm_entry->x_server.buf, 1802 destaddr->len) == 0) { 1803 /* 1804 * If the matching entry isn't connected, 1805 * attempt to reconnect it. 1806 */ 1807 if (cm_entry->x_connected == FALSE) { 1808 /* 1809 * We don't go through trying 1810 * to find the least recently 1811 * used connected because 1812 * connmgr_reconnect() briefly 1813 * dropped the connmgr_lock, 1814 * allowing a window for our 1815 * accounting to be messed up. 1816 * In any case, a re-connected 1817 * connection is as good as 1818 * a LRU connection. 1819 */ 1820 return (connmgr_wrapconnect(cm_entry, 1821 waitp, destaddr, addrfmly, srcaddr, 1822 rpcerr, TRUE, nosignal)); 1823 } 1824 i++; 1825 if (cm_entry->x_time - prev_time <= 0 || 1826 lru_entry == NULL) { 1827 prev_time = cm_entry->x_time; 1828 lru_entry = cm_entry; 1829 } 1830 } 1831 cmp = &cm_entry->x_next; 1832 } 1833 1834 if (i > clnt_max_conns) { 1835 RPCLOG(8, "connmgr_get: too many conns, dooming entry" 1836 " %p\n", (void *)lru_entry->x_tiptr); 1837 lru_entry->x_doomed = TRUE; 1838 goto use_new_conn; 1839 } 1840 1841 /* 1842 * If we are at the maximum number of connections to 1843 * the server, hand back the least recently used one. 1844 */ 1845 if (i == clnt_max_conns) { 1846 /* 1847 * Copy into the handle the source address of 1848 * the connection, which we will use in case of 1849 * a later retry. 1850 */ 1851 if (srcaddr->len != lru_entry->x_src.len) { 1852 if (srcaddr->len > 0) 1853 kmem_free(srcaddr->buf, 1854 srcaddr->maxlen); 1855 srcaddr->buf = kmem_zalloc( 1856 lru_entry->x_src.len, KM_SLEEP); 1857 srcaddr->maxlen = srcaddr->len = 1858 lru_entry->x_src.len; 1859 } 1860 bcopy(lru_entry->x_src.buf, srcaddr->buf, srcaddr->len); 1861 RPCLOG(2, "connmgr_get: call going out on %p\n", 1862 (void *)lru_entry); 1863 lru_entry->x_time = lbolt; 1864 CONN_HOLD(lru_entry); 1865 mutex_exit(&connmgr_lock); 1866 return (lru_entry); 1867 } 1868 1869 } else { 1870 /* 1871 * This is the retry case (retryaddr != NULL). Retries must 1872 * be sent on the same source port as the original call. 1873 */ 1874 1875 /* 1876 * Walk the list looking for a connection with a source address 1877 * that matches the retry address. 1878 */ 1879 cmp = &cm_hd; 1880 while ((cm_entry = *cmp) != NULL) { 1881 ASSERT(cm_entry != cm_entry->x_next); 1882 if (zoneid != cm_entry->x_zoneid || 1883 device != cm_entry->x_rdev || 1884 retryaddr->len != cm_entry->x_src.len || 1885 bcmp(retryaddr->buf, cm_entry->x_src.buf, 1886 retryaddr->len) != 0) { 1887 cmp = &cm_entry->x_next; 1888 continue; 1889 } 1890 1891 /* 1892 * Sanity check: if the connection with our source 1893 * port is going to some other server, something went 1894 * wrong, as we never delete connections (i.e. release 1895 * ports) unless they have been idle. In this case, 1896 * it is probably better to send the call out using 1897 * a new source address than to fail it altogether, 1898 * since that port may never be released. 1899 */ 1900 if (destaddr->len != cm_entry->x_server.len || 1901 bcmp(destaddr->buf, cm_entry->x_server.buf, 1902 destaddr->len) != 0) { 1903 RPCLOG(1, "connmgr_get: tiptr %p" 1904 " is going to a different server" 1905 " with the port that belongs" 1906 " to us!\n", (void *)cm_entry->x_tiptr); 1907 retryaddr = NULL; 1908 goto use_new_conn; 1909 } 1910 1911 /* 1912 * If the connection of interest is not connected and we 1913 * can't reconnect it, then the server is probably 1914 * still down. Return NULL to the caller and let it 1915 * retry later if it wants to. We have a delay so the 1916 * machine doesn't go into a tight retry loop. If the 1917 * entry was already connected, or the reconnected was 1918 * successful, return this entry. 1919 */ 1920 if (cm_entry->x_connected == FALSE) { 1921 return (connmgr_wrapconnect(cm_entry, 1922 waitp, destaddr, addrfmly, NULL, 1923 rpcerr, TRUE, nosignal)); 1924 } else { 1925 CONN_HOLD(cm_entry); 1926 1927 cm_entry->x_time = lbolt; 1928 mutex_exit(&connmgr_lock); 1929 RPCLOG(2, "connmgr_get: found old " 1930 "transport %p for retry\n", 1931 (void *)cm_entry); 1932 return (cm_entry); 1933 } 1934 } 1935 1936 /* 1937 * We cannot find an entry in the list for this retry. 1938 * Either the entry has been removed temporarily to be 1939 * reconnected by another thread, or the original call 1940 * got a port but never got connected, 1941 * and hence the transport never got put in the 1942 * list. Fall through to the "create new connection" code - 1943 * the former case will fail there trying to rebind the port, 1944 * and the later case (and any other pathological cases) will 1945 * rebind and reconnect and not hang the client machine. 1946 */ 1947 RPCLOG0(8, "connmgr_get: no entry in list for retry\n"); 1948 } 1949 /* 1950 * Set up a transport entry in the connection manager's list. 1951 */ 1952 cm_entry = (struct cm_xprt *) 1953 kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP); 1954 1955 cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP); 1956 bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len); 1957 cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len; 1958 1959 cm_entry->x_state_flags = X_THREAD; 1960 cm_entry->x_ref = 1; 1961 cm_entry->x_family = addrfmly; 1962 cm_entry->x_rdev = device; 1963 cm_entry->x_zoneid = zoneid; 1964 mutex_init(&cm_entry->x_lock, NULL, MUTEX_DEFAULT, NULL); 1965 cv_init(&cm_entry->x_cv, NULL, CV_DEFAULT, NULL); 1966 cv_init(&cm_entry->x_conn_cv, NULL, CV_DEFAULT, NULL); 1967 cv_init(&cm_entry->x_dis_cv, NULL, CV_DEFAULT, NULL); 1968 1969 /* 1970 * Note that we add this partially initialized entry to the 1971 * connection list. This is so that we don't have connections to 1972 * the same server. 1973 * 1974 * Note that x_src is not initialized at this point. This is because 1975 * retryaddr might be NULL in which case x_src is whatever 1976 * t_kbind/bindresvport gives us. If another thread wants a 1977 * connection to the same server, seemingly we have an issue, but we 1978 * don't. If the other thread comes in with retryaddr == NULL, then it 1979 * will never look at x_src, and it will end up waiting in 1980 * connmgr_cwait() for the first thread to finish the connection 1981 * attempt. If the other thread comes in with retryaddr != NULL, then 1982 * that means there was a request sent on a connection, in which case 1983 * the the connection should already exist. Thus the first thread 1984 * never gets here ... it finds the connection it its server in the 1985 * connection list. 1986 * 1987 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd 1988 * thread will skip us because x_src.len == 0. 1989 */ 1990 cm_entry->x_next = cm_hd; 1991 cm_hd = cm_entry; 1992 mutex_exit(&connmgr_lock); 1993 1994 /* 1995 * Either we didn't find an entry to the server of interest, or we 1996 * don't have the maximum number of connections to that server - 1997 * create a new connection. 1998 */ 1999 RPCLOG0(8, "connmgr_get: creating new connection\n"); 2000 rpcerr->re_status = RPC_TLIERROR; 2001 2002 i = t_kopen(NULL, device, FREAD|FWRITE|FNDELAY, &tiptr, zone_kcred()); 2003 if (i) { 2004 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i); 2005 rpcerr->re_errno = i; 2006 connmgr_cancelconn(cm_entry); 2007 return (NULL); 2008 } 2009 rpc_poptimod(tiptr->fp->f_vnode); 2010 2011 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0, 2012 K_TO_K, kcred, &retval)) { 2013 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i); 2014 (void) t_kclose(tiptr, 1); 2015 rpcerr->re_errno = i; 2016 connmgr_cancelconn(cm_entry); 2017 return (NULL); 2018 } 2019 2020 if (i = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K, 2021 kcred, &retval)) { 2022 RPCLOG(1, "connmgr_get: can't set client status with cots " 2023 "module, %d\n", i); 2024 (void) t_kclose(tiptr, 1); 2025 rpcerr->re_errno = i; 2026 connmgr_cancelconn(cm_entry); 2027 return (NULL); 2028 } 2029 2030 mutex_enter(&connmgr_lock); 2031 2032 wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next; 2033 cm_entry->x_wq = wq; 2034 2035 mutex_exit(&connmgr_lock); 2036 2037 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0, 2038 K_TO_K, kcred, &retval)) { 2039 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i); 2040 (void) t_kclose(tiptr, 1); 2041 rpcerr->re_errno = i; 2042 connmgr_cancelconn(cm_entry); 2043 return (NULL); 2044 } 2045 2046 /* 2047 * If the caller has not specified reserved port usage then 2048 * take the system default. 2049 */ 2050 if (useresvport == -1) 2051 useresvport = clnt_cots_do_bindresvport; 2052 2053 if ((useresvport || retryaddr != NULL) && 2054 (addrfmly == AF_INET || addrfmly == AF_INET6)) { 2055 bool_t alloc_src = FALSE; 2056 2057 if (srcaddr->len != destaddr->len) { 2058 kmem_free(srcaddr->buf, srcaddr->maxlen); 2059 srcaddr->buf = kmem_zalloc(destaddr->len, KM_SLEEP); 2060 srcaddr->maxlen = destaddr->len; 2061 srcaddr->len = destaddr->len; 2062 alloc_src = TRUE; 2063 } 2064 2065 if ((i = bindresvport(tiptr, retryaddr, srcaddr, TRUE)) != 0) { 2066 (void) t_kclose(tiptr, 1); 2067 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: " 2068 "%p\n", (void *)retryaddr); 2069 2070 /* 2071 * 1225408: If we allocated a source address, then it 2072 * is either garbage or all zeroes. In that case 2073 * we need to clear srcaddr. 2074 */ 2075 if (alloc_src == TRUE) { 2076 kmem_free(srcaddr->buf, srcaddr->maxlen); 2077 srcaddr->maxlen = srcaddr->len = 0; 2078 srcaddr->buf = NULL; 2079 } 2080 rpcerr->re_errno = i; 2081 connmgr_cancelconn(cm_entry); 2082 return (NULL); 2083 } 2084 } else { 2085 if ((i = t_kbind(tiptr, NULL, NULL)) != 0) { 2086 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i); 2087 (void) t_kclose(tiptr, 1); 2088 rpcerr->re_errno = i; 2089 connmgr_cancelconn(cm_entry); 2090 return (NULL); 2091 } 2092 } 2093 2094 { 2095 /* 2096 * Keep the kernel stack lean. Don't move this call 2097 * declaration to the top of this function because a 2098 * call is declared in connmgr_wrapconnect() 2099 */ 2100 calllist_t call; 2101 2102 bzero(&call, sizeof (call)); 2103 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2104 2105 /* 2106 * This is a bound end-point so don't close it's stream. 2107 */ 2108 connected = connmgr_connect(cm_entry, wq, destaddr, addrfmly, 2109 &call, &tidu_size, FALSE, waitp, 2110 nosignal); 2111 *rpcerr = call.call_err; 2112 cv_destroy(&call.call_cv); 2113 2114 } 2115 2116 mutex_enter(&connmgr_lock); 2117 2118 /* 2119 * Set up a transport entry in the connection manager's list. 2120 */ 2121 cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP); 2122 bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len); 2123 cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len; 2124 2125 cm_entry->x_tiptr = tiptr; 2126 cm_entry->x_time = lbolt; 2127 2128 if (tiptr->tp_info.servtype == T_COTS_ORD) 2129 cm_entry->x_ordrel = TRUE; 2130 else 2131 cm_entry->x_ordrel = FALSE; 2132 2133 cm_entry->x_tidu_size = tidu_size; 2134 2135 if (cm_entry->x_early_disc) 2136 cm_entry->x_connected = FALSE; 2137 else 2138 cm_entry->x_connected = connected; 2139 2140 /* 2141 * There could be a discrepancy here such that 2142 * x_early_disc is TRUE yet connected is TRUE as well 2143 * and the connection is actually connected. In that case 2144 * lets be conservative and declare the connection as not 2145 * connected. 2146 */ 2147 cm_entry->x_early_disc = FALSE; 2148 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2149 cm_entry->x_ctime = lbolt; 2150 2151 /* 2152 * Notify any threads waiting that the connection attempt is done. 2153 */ 2154 cm_entry->x_thread = FALSE; 2155 cv_broadcast(&cm_entry->x_conn_cv); 2156 2157 mutex_exit(&connmgr_lock); 2158 2159 if (cm_entry->x_connected == FALSE) { 2160 connmgr_release(cm_entry); 2161 return (NULL); 2162 } 2163 return (cm_entry); 2164 } 2165 2166 /* 2167 * Keep the cm_xprt entry on the connecton list when making a connection. This 2168 * is to prevent multiple connections to a slow server from appearing. 2169 * We use the bit field x_thread to tell if a thread is doing a connection 2170 * which keeps other interested threads from messing with connection. 2171 * Those other threads just wait if x_thread is set. 2172 * 2173 * If x_thread is not set, then we do the actual work of connecting via 2174 * connmgr_connect(). 2175 * 2176 * mutex convention: called with connmgr_lock held, returns with it released. 2177 */ 2178 static struct cm_xprt * 2179 connmgr_wrapconnect( 2180 struct cm_xprt *cm_entry, 2181 const struct timeval *waitp, 2182 struct netbuf *destaddr, 2183 int addrfmly, 2184 struct netbuf *srcaddr, 2185 struct rpc_err *rpcerr, 2186 bool_t reconnect, 2187 bool_t nosignal) 2188 { 2189 ASSERT(MUTEX_HELD(&connmgr_lock)); 2190 /* 2191 * Hold this entry as we are about to drop connmgr_lock. 2192 */ 2193 CONN_HOLD(cm_entry); 2194 2195 /* 2196 * If there is a thread already making a connection for us, then 2197 * wait for it to complete the connection. 2198 */ 2199 if (cm_entry->x_thread == TRUE) { 2200 rpcerr->re_status = connmgr_cwait(cm_entry, waitp, nosignal); 2201 2202 if (rpcerr->re_status != RPC_SUCCESS) { 2203 mutex_exit(&connmgr_lock); 2204 connmgr_release(cm_entry); 2205 return (NULL); 2206 } 2207 } else { 2208 bool_t connected; 2209 calllist_t call; 2210 2211 cm_entry->x_thread = TRUE; 2212 2213 while (cm_entry->x_needrel == TRUE) { 2214 cm_entry->x_needrel = FALSE; 2215 2216 connmgr_sndrel(cm_entry); 2217 delay(drv_usectohz(1000000)); 2218 2219 mutex_enter(&connmgr_lock); 2220 } 2221 2222 /* 2223 * If we need to send a T_DISCON_REQ, send one. 2224 */ 2225 connmgr_dis_and_wait(cm_entry); 2226 2227 mutex_exit(&connmgr_lock); 2228 2229 bzero(&call, sizeof (call)); 2230 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2231 2232 connected = connmgr_connect(cm_entry, cm_entry->x_wq, 2233 destaddr, addrfmly, &call, 2234 &cm_entry->x_tidu_size, 2235 reconnect, waitp, nosignal); 2236 2237 *rpcerr = call.call_err; 2238 cv_destroy(&call.call_cv); 2239 2240 mutex_enter(&connmgr_lock); 2241 2242 2243 if (cm_entry->x_early_disc) 2244 cm_entry->x_connected = FALSE; 2245 else 2246 cm_entry->x_connected = connected; 2247 2248 /* 2249 * There could be a discrepancy here such that 2250 * x_early_disc is TRUE yet connected is TRUE as well 2251 * and the connection is actually connected. In that case 2252 * lets be conservative and declare the connection as not 2253 * connected. 2254 */ 2255 2256 cm_entry->x_early_disc = FALSE; 2257 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2258 2259 2260 /* 2261 * connmgr_connect() may have given up before the connection 2262 * actually timed out. So ensure that before the next 2263 * connection attempt we do a disconnect. 2264 */ 2265 cm_entry->x_ctime = lbolt; 2266 cm_entry->x_thread = FALSE; 2267 2268 cv_broadcast(&cm_entry->x_conn_cv); 2269 2270 if (cm_entry->x_connected == FALSE) { 2271 mutex_exit(&connmgr_lock); 2272 connmgr_release(cm_entry); 2273 return (NULL); 2274 } 2275 } 2276 2277 if (srcaddr != NULL) { 2278 /* 2279 * Copy into the handle the 2280 * source address of the 2281 * connection, which we will use 2282 * in case of a later retry. 2283 */ 2284 if (srcaddr->len != cm_entry->x_src.len) { 2285 if (srcaddr->maxlen > 0) 2286 kmem_free(srcaddr->buf, srcaddr->maxlen); 2287 srcaddr->buf = kmem_zalloc(cm_entry->x_src.len, 2288 KM_SLEEP); 2289 srcaddr->maxlen = srcaddr->len = 2290 cm_entry->x_src.len; 2291 } 2292 bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len); 2293 } 2294 cm_entry->x_time = lbolt; 2295 mutex_exit(&connmgr_lock); 2296 return (cm_entry); 2297 } 2298 2299 /* 2300 * If we need to send a T_DISCON_REQ, send one. 2301 */ 2302 static void 2303 connmgr_dis_and_wait(struct cm_xprt *cm_entry) 2304 { 2305 ASSERT(MUTEX_HELD(&connmgr_lock)); 2306 for (;;) { 2307 while (cm_entry->x_needdis == TRUE) { 2308 RPCLOG(8, "connmgr_dis_and_wait: need " 2309 "T_DISCON_REQ for connection 0x%p\n", 2310 (void *)cm_entry); 2311 cm_entry->x_needdis = FALSE; 2312 cm_entry->x_waitdis = TRUE; 2313 2314 connmgr_snddis(cm_entry); 2315 2316 mutex_enter(&connmgr_lock); 2317 } 2318 2319 if (cm_entry->x_waitdis == TRUE) { 2320 clock_t curlbolt; 2321 clock_t timout; 2322 2323 RPCLOG(8, "connmgr_dis_and_wait waiting for " 2324 "T_DISCON_REQ's ACK for connection %p\n", 2325 (void *)cm_entry); 2326 curlbolt = ddi_get_lbolt(); 2327 2328 timout = clnt_cots_min_conntout * 2329 drv_usectohz(1000000) + curlbolt; 2330 2331 /* 2332 * The TPI spec says that the T_DISCON_REQ 2333 * will get acknowledged, but in practice 2334 * the ACK may never get sent. So don't 2335 * block forever. 2336 */ 2337 (void) cv_timedwait(&cm_entry->x_dis_cv, 2338 &connmgr_lock, timout); 2339 } 2340 /* 2341 * If we got the ACK, break. If we didn't, 2342 * then send another T_DISCON_REQ. 2343 */ 2344 if (cm_entry->x_waitdis == FALSE) { 2345 break; 2346 } else { 2347 RPCLOG(8, "connmgr_dis_and_wait: did" 2348 "not get T_DISCON_REQ's ACK for " 2349 "connection %p\n", (void *)cm_entry); 2350 cm_entry->x_needdis = TRUE; 2351 } 2352 } 2353 } 2354 2355 static void 2356 connmgr_cancelconn(struct cm_xprt *cm_entry) 2357 { 2358 /* 2359 * Mark the connection table entry as dead; the next thread that 2360 * goes through connmgr_release() will notice this and deal with it. 2361 */ 2362 mutex_enter(&connmgr_lock); 2363 cm_entry->x_dead = TRUE; 2364 2365 /* 2366 * Notify any threads waiting for the connection that it isn't 2367 * going to happen. 2368 */ 2369 cm_entry->x_thread = FALSE; 2370 cv_broadcast(&cm_entry->x_conn_cv); 2371 mutex_exit(&connmgr_lock); 2372 2373 connmgr_release(cm_entry); 2374 } 2375 2376 static void 2377 connmgr_close(struct cm_xprt *cm_entry) 2378 { 2379 mutex_enter(&cm_entry->x_lock); 2380 while (cm_entry->x_ref != 0) { 2381 /* 2382 * Must be a noninterruptible wait. 2383 */ 2384 cv_wait(&cm_entry->x_cv, &cm_entry->x_lock); 2385 } 2386 2387 if (cm_entry->x_tiptr != NULL) 2388 (void) t_kclose(cm_entry->x_tiptr, 1); 2389 2390 mutex_exit(&cm_entry->x_lock); 2391 if (cm_entry->x_ksp != NULL) { 2392 mutex_enter(&connmgr_lock); 2393 cm_entry->x_ksp->ks_private = NULL; 2394 mutex_exit(&connmgr_lock); 2395 2396 /* 2397 * Must free the buffer we allocated for the 2398 * server address in the update function 2399 */ 2400 if (((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2401 x_server.value.str.addr.ptr != NULL) 2402 kmem_free(((struct cm_kstat_xprt *)(cm_entry->x_ksp-> 2403 ks_data))->x_server.value.str.addr.ptr, 2404 INET6_ADDRSTRLEN); 2405 kmem_free(cm_entry->x_ksp->ks_data, 2406 cm_entry->x_ksp->ks_data_size); 2407 kstat_delete(cm_entry->x_ksp); 2408 } 2409 2410 mutex_destroy(&cm_entry->x_lock); 2411 cv_destroy(&cm_entry->x_cv); 2412 cv_destroy(&cm_entry->x_conn_cv); 2413 cv_destroy(&cm_entry->x_dis_cv); 2414 2415 if (cm_entry->x_server.buf != NULL) 2416 kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen); 2417 if (cm_entry->x_src.buf != NULL) 2418 kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen); 2419 kmem_free(cm_entry, sizeof (struct cm_xprt)); 2420 } 2421 2422 /* 2423 * Called by KRPC after sending the call message to release the connection 2424 * it was using. 2425 */ 2426 static void 2427 connmgr_release(struct cm_xprt *cm_entry) 2428 { 2429 mutex_enter(&cm_entry->x_lock); 2430 cm_entry->x_ref--; 2431 if (cm_entry->x_ref == 0) 2432 cv_signal(&cm_entry->x_cv); 2433 mutex_exit(&cm_entry->x_lock); 2434 } 2435 2436 /* 2437 * Given an open stream, connect to the remote. Returns true if connected, 2438 * false otherwise. 2439 */ 2440 static bool_t 2441 connmgr_connect( 2442 struct cm_xprt *cm_entry, 2443 queue_t *wq, 2444 struct netbuf *addr, 2445 int addrfmly, 2446 calllist_t *e, 2447 int *tidu_ptr, 2448 bool_t reconnect, 2449 const struct timeval *waitp, 2450 bool_t nosignal) 2451 { 2452 mblk_t *mp; 2453 struct T_conn_req *tcr; 2454 struct T_info_ack *tinfo; 2455 int interrupted, error; 2456 int tidu_size, kstat_instance; 2457 2458 /* if it's a reconnect, flush any lingering data messages */ 2459 if (reconnect) 2460 (void) putctl1(wq, M_FLUSH, FLUSHRW); 2461 2462 mp = allocb(sizeof (*tcr) + addr->len, BPRI_LO); 2463 if (mp == NULL) { 2464 /* 2465 * This is unfortunate, but we need to look up the stats for 2466 * this zone to increment the "memory allocation failed" 2467 * counter. curproc->p_zone is safe since we're initiating a 2468 * connection and not in some strange streams context. 2469 */ 2470 struct rpcstat *rpcstat; 2471 2472 rpcstat = zone_getspecific(rpcstat_zone_key, rpc_zone()); 2473 ASSERT(rpcstat != NULL); 2474 2475 RPCLOG0(1, "connmgr_connect: cannot alloc mp for " 2476 "sending conn request\n"); 2477 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem); 2478 e->call_status = RPC_SYSTEMERROR; 2479 e->call_reason = ENOSR; 2480 return (FALSE); 2481 } 2482 2483 mp->b_datap->db_type = M_PROTO; 2484 tcr = (struct T_conn_req *)mp->b_rptr; 2485 bzero(tcr, sizeof (*tcr)); 2486 tcr->PRIM_type = T_CONN_REQ; 2487 tcr->DEST_length = addr->len; 2488 tcr->DEST_offset = sizeof (struct T_conn_req); 2489 mp->b_wptr = mp->b_rptr + sizeof (*tcr); 2490 2491 bcopy(addr->buf, mp->b_wptr, tcr->DEST_length); 2492 mp->b_wptr += tcr->DEST_length; 2493 2494 RPCLOG(8, "connmgr_connect: sending conn request on queue " 2495 "%p", (void *)wq); 2496 RPCLOG(8, " call %p\n", (void *)wq); 2497 /* 2498 * We use the entry in the handle that is normally used for 2499 * waiting for RPC replies to wait for the connection accept. 2500 */ 2501 clnt_dispatch_send(wq, mp, e, 0, 0); 2502 2503 mutex_enter(&clnt_pending_lock); 2504 2505 /* 2506 * We wait for the transport connection to be made, or an 2507 * indication that it could not be made. 2508 */ 2509 interrupted = 0; 2510 2511 /* 2512 * waitforack should have been called with T_OK_ACK, but the 2513 * present implementation needs to be passed T_INFO_ACK to 2514 * work correctly. 2515 */ 2516 error = waitforack(e, T_INFO_ACK, waitp, nosignal); 2517 if (error == EINTR) 2518 interrupted = 1; 2519 if (zone_status_get(curproc->p_zone) >= ZONE_IS_EMPTY) { 2520 /* 2521 * No time to lose; we essentially have been signaled to 2522 * quit. 2523 */ 2524 interrupted = 1; 2525 } 2526 #ifdef RPCDEBUG 2527 if (error == ETIME) 2528 RPCLOG0(8, "connmgr_connect: giving up " 2529 "on connection attempt; " 2530 "clnt_dispatch notifyconn " 2531 "diagnostic 'no one waiting for " 2532 "connection' should not be " 2533 "unexpected\n"); 2534 #endif 2535 if (e->call_prev) 2536 e->call_prev->call_next = e->call_next; 2537 else 2538 clnt_pending = e->call_next; 2539 if (e->call_next) 2540 e->call_next->call_prev = e->call_prev; 2541 mutex_exit(&clnt_pending_lock); 2542 2543 if (e->call_status != RPC_SUCCESS || error != 0) { 2544 if (interrupted) 2545 e->call_status = RPC_INTR; 2546 else if (error == ETIME) 2547 e->call_status = RPC_TIMEDOUT; 2548 else if (error == EPROTO) 2549 e->call_status = RPC_SYSTEMERROR; 2550 2551 RPCLOG(8, "connmgr_connect: can't connect, status: " 2552 "%s\n", clnt_sperrno(e->call_status)); 2553 2554 if (e->call_reply) { 2555 freemsg(e->call_reply); 2556 e->call_reply = NULL; 2557 } 2558 2559 return (FALSE); 2560 } 2561 /* 2562 * The result of the "connection accept" is a T_info_ack 2563 * in the call_reply field. 2564 */ 2565 ASSERT(e->call_reply != NULL); 2566 mp = e->call_reply; 2567 e->call_reply = NULL; 2568 tinfo = (struct T_info_ack *)mp->b_rptr; 2569 2570 tidu_size = tinfo->TIDU_size; 2571 tidu_size -= (tidu_size % BYTES_PER_XDR_UNIT); 2572 if (tidu_size > COTS_DEFAULT_ALLOCSIZE || (tidu_size <= 0)) 2573 tidu_size = COTS_DEFAULT_ALLOCSIZE; 2574 *tidu_ptr = tidu_size; 2575 2576 freemsg(mp); 2577 2578 /* 2579 * Set up the pertinent options. NODELAY is so the transport doesn't 2580 * buffer up RPC messages on either end. This may not be valid for 2581 * all transports. Failure to set this option is not cause to 2582 * bail out so we return success anyway. Note that lack of NODELAY 2583 * or some other way to flush the message on both ends will cause 2584 * lots of retries and terrible performance. 2585 */ 2586 if (addrfmly == AF_INET || addrfmly == AF_INET6) { 2587 (void) connmgr_setopt(wq, IPPROTO_TCP, TCP_NODELAY, e); 2588 if (e->call_status == RPC_XPRTFAILED) 2589 return (FALSE); 2590 } 2591 2592 /* 2593 * Since we have a connection, we now need to figure out if 2594 * we need to create a kstat. If x_ksp is not NULL then we 2595 * are reusing a connection and so we do not need to create 2596 * another kstat -- lets just return. 2597 */ 2598 if (cm_entry->x_ksp != NULL) 2599 return (TRUE); 2600 2601 /* 2602 * We need to increment rpc_kstat_instance atomically to prevent 2603 * two kstats being created with the same instance. 2604 */ 2605 kstat_instance = atomic_add_32_nv((uint32_t *)&rpc_kstat_instance, 1); 2606 2607 if ((cm_entry->x_ksp = kstat_create_zone("unix", kstat_instance, 2608 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED, 2609 (uint_t)(sizeof (cm_kstat_xprt_t) / sizeof (kstat_named_t)), 2610 KSTAT_FLAG_VIRTUAL, cm_entry->x_zoneid)) == NULL) { 2611 return (TRUE); 2612 } 2613 2614 cm_entry->x_ksp->ks_lock = &connmgr_lock; 2615 cm_entry->x_ksp->ks_private = cm_entry; 2616 cm_entry->x_ksp->ks_data_size = ((INET6_ADDRSTRLEN * sizeof (char)) 2617 + sizeof (cm_kstat_template)); 2618 cm_entry->x_ksp->ks_data = kmem_alloc(cm_entry->x_ksp->ks_data_size, 2619 KM_SLEEP); 2620 bcopy(&cm_kstat_template, cm_entry->x_ksp->ks_data, 2621 cm_entry->x_ksp->ks_data_size); 2622 ((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2623 x_server.value.str.addr.ptr = 2624 kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP); 2625 2626 cm_entry->x_ksp->ks_update = conn_kstat_update; 2627 kstat_install(cm_entry->x_ksp); 2628 return (TRUE); 2629 } 2630 2631 /* 2632 * Called by connmgr_connect to set an option on the new stream. 2633 */ 2634 static bool_t 2635 connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e) 2636 { 2637 mblk_t *mp; 2638 struct opthdr *opt; 2639 struct T_optmgmt_req *tor; 2640 struct timeval waitp; 2641 int error; 2642 2643 mp = allocb(sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2644 sizeof (int), BPRI_LO); 2645 if (mp == NULL) { 2646 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option " 2647 "request\n"); 2648 return (FALSE); 2649 } 2650 2651 mp->b_datap->db_type = M_PROTO; 2652 tor = (struct T_optmgmt_req *)(mp->b_rptr); 2653 tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 2654 tor->MGMT_flags = T_NEGOTIATE; 2655 tor->OPT_length = sizeof (struct opthdr) + sizeof (int); 2656 tor->OPT_offset = sizeof (struct T_optmgmt_req); 2657 2658 opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); 2659 opt->level = level; 2660 opt->name = name; 2661 opt->len = sizeof (int); 2662 *(int *)((char *)opt + sizeof (*opt)) = 1; 2663 mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2664 sizeof (int); 2665 2666 /* 2667 * We will use this connection regardless 2668 * of whether or not the option is settable. 2669 */ 2670 clnt_dispatch_send(wq, mp, e, 0, 0); 2671 mutex_enter(&clnt_pending_lock); 2672 2673 waitp.tv_sec = clnt_cots_min_conntout; 2674 waitp.tv_usec = 0; 2675 error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1); 2676 2677 if (e->call_prev) 2678 e->call_prev->call_next = e->call_next; 2679 else 2680 clnt_pending = e->call_next; 2681 if (e->call_next) 2682 e->call_next->call_prev = e->call_prev; 2683 mutex_exit(&clnt_pending_lock); 2684 2685 if (e->call_reply != NULL) { 2686 freemsg(e->call_reply); 2687 e->call_reply = NULL; 2688 } 2689 2690 if (e->call_status != RPC_SUCCESS || error != 0) { 2691 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name); 2692 return (FALSE); 2693 } 2694 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name); 2695 return (TRUE); 2696 } 2697 2698 #ifdef DEBUG 2699 2700 /* 2701 * This is a knob to let us force code coverage in allocation failure 2702 * case. 2703 */ 2704 static int connmgr_failsnd; 2705 #define CONN_SND_ALLOC(Size, Pri) \ 2706 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri)) 2707 2708 #else 2709 2710 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri) 2711 2712 #endif 2713 2714 /* 2715 * Sends an orderly release on the specified queue. 2716 * Entered with connmgr_lock. Exited without connmgr_lock 2717 */ 2718 static void 2719 connmgr_sndrel(struct cm_xprt *cm_entry) 2720 { 2721 struct T_ordrel_req *torr; 2722 mblk_t *mp; 2723 queue_t *q = cm_entry->x_wq; 2724 ASSERT(MUTEX_HELD(&connmgr_lock)); 2725 mp = CONN_SND_ALLOC(sizeof (struct T_ordrel_req), BPRI_LO); 2726 if (mp == NULL) { 2727 cm_entry->x_needrel = TRUE; 2728 mutex_exit(&connmgr_lock); 2729 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel " 2730 "to queue %p\n", (void *)q); 2731 return; 2732 } 2733 mutex_exit(&connmgr_lock); 2734 2735 mp->b_datap->db_type = M_PROTO; 2736 torr = (struct T_ordrel_req *)(mp->b_rptr); 2737 torr->PRIM_type = T_ORDREL_REQ; 2738 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_req); 2739 2740 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q); 2741 put(q, mp); 2742 } 2743 2744 /* 2745 * Sends an disconnect on the specified queue. 2746 * Entered with connmgr_lock. Exited without connmgr_lock 2747 */ 2748 static void 2749 connmgr_snddis(struct cm_xprt *cm_entry) 2750 { 2751 struct T_discon_req *tdis; 2752 mblk_t *mp; 2753 queue_t *q = cm_entry->x_wq; 2754 2755 ASSERT(MUTEX_HELD(&connmgr_lock)); 2756 mp = CONN_SND_ALLOC(sizeof (*tdis), BPRI_LO); 2757 if (mp == NULL) { 2758 cm_entry->x_needdis = TRUE; 2759 mutex_exit(&connmgr_lock); 2760 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon " 2761 "to queue %p\n", (void *)q); 2762 return; 2763 } 2764 mutex_exit(&connmgr_lock); 2765 2766 mp->b_datap->db_type = M_PROTO; 2767 tdis = (struct T_discon_req *)mp->b_rptr; 2768 tdis->PRIM_type = T_DISCON_REQ; 2769 mp->b_wptr = mp->b_rptr + sizeof (*tdis); 2770 2771 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q); 2772 put(q, mp); 2773 } 2774 2775 /* 2776 * Sets up the entry for receiving replies, and calls rpcmod's write put proc 2777 * (through put) to send the call. 2778 */ 2779 static void 2780 clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid, 2781 uint_t queue_flag) 2782 { 2783 ASSERT(e != NULL); 2784 2785 e->call_status = RPC_TIMEDOUT; /* optimistic, eh? */ 2786 e->call_reason = 0; 2787 e->call_wq = q; 2788 e->call_xid = xid; 2789 e->call_notified = FALSE; 2790 2791 /* 2792 * If queue_flag is set then the calllist_t is already on the hash 2793 * queue. In this case just send the message and return. 2794 */ 2795 if (queue_flag) { 2796 put(q, mp); 2797 return; 2798 } 2799 2800 /* 2801 * Set up calls for RPC requests (with XID != 0) on the hash 2802 * queue for fast lookups and place other calls (i.e. 2803 * connection management) on the linked list. 2804 */ 2805 if (xid != 0) { 2806 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on " 2807 "dispatch list\n", xid); 2808 e->call_hash = call_hash(xid, clnt_cots_hash_size); 2809 e->call_bucket = &cots_call_ht[e->call_hash]; 2810 call_table_enter(e); 2811 } else { 2812 mutex_enter(&clnt_pending_lock); 2813 if (clnt_pending) 2814 clnt_pending->call_prev = e; 2815 e->call_next = clnt_pending; 2816 e->call_prev = NULL; 2817 clnt_pending = e; 2818 mutex_exit(&clnt_pending_lock); 2819 } 2820 2821 put(q, mp); 2822 } 2823 2824 /* 2825 * Called by rpcmod to notify a client with a clnt_pending call that its reply 2826 * has arrived. If we can't find a client waiting for this reply, we log 2827 * the error and return. 2828 */ 2829 bool_t 2830 clnt_dispatch_notify(mblk_t *mp, zoneid_t zoneid) 2831 { 2832 calllist_t *e = NULL; 2833 call_table_t *chtp; 2834 uint32_t xid; 2835 uint_t hash; 2836 2837 if ((IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) && 2838 (mp->b_wptr - mp->b_rptr) >= sizeof (xid)) 2839 xid = *((uint32_t *)mp->b_rptr); 2840 else { 2841 int i = 0; 2842 unsigned char *p = (unsigned char *)&xid; 2843 unsigned char *rptr; 2844 mblk_t *tmp = mp; 2845 2846 /* 2847 * Copy the xid, byte-by-byte into xid. 2848 */ 2849 while (tmp) { 2850 rptr = tmp->b_rptr; 2851 while (rptr < tmp->b_wptr) { 2852 *p++ = *rptr++; 2853 if (++i >= sizeof (xid)) 2854 goto done_xid_copy; 2855 } 2856 tmp = tmp->b_cont; 2857 } 2858 2859 /* 2860 * If we got here, we ran out of mblk space before the 2861 * xid could be copied. 2862 */ 2863 ASSERT(tmp == NULL && i < sizeof (xid)); 2864 2865 RPCLOG0(1, 2866 "clnt_dispatch_notify: message less than size of xid\n"); 2867 return (FALSE); 2868 2869 } 2870 done_xid_copy: 2871 2872 hash = call_hash(xid, clnt_cots_hash_size); 2873 chtp = &cots_call_ht[hash]; 2874 /* call_table_find returns with the hash bucket locked */ 2875 call_table_find(chtp, xid, e); 2876 2877 if (e != NULL) { 2878 /* 2879 * Found thread waiting for this reply 2880 */ 2881 mutex_enter(&e->call_lock); 2882 if (e->call_reply) 2883 /* 2884 * This can happen under the following scenario: 2885 * clnt_cots_kcallit() times out on the response, 2886 * rfscall() repeats the CLNT_CALL() with 2887 * the same xid, clnt_cots_kcallit() sends the retry, 2888 * thereby putting the clnt handle on the pending list, 2889 * the first response arrives, signalling the thread 2890 * in clnt_cots_kcallit(). Before that thread is 2891 * dispatched, the second response arrives as well, 2892 * and clnt_dispatch_notify still finds the handle on 2893 * the pending list, with call_reply set. So free the 2894 * old reply now. 2895 * 2896 * It is also possible for a response intended for 2897 * an RPC call with a different xid to reside here. 2898 * This can happen if the thread that owned this 2899 * client handle prior to the current owner bailed 2900 * out and left its call record on the dispatch 2901 * queue. A window exists where the response can 2902 * arrive before the current owner dispatches its 2903 * RPC call. 2904 * 2905 * In any case, this is the very last point where we 2906 * can safely check the call_reply field before 2907 * placing the new response there. 2908 */ 2909 freemsg(e->call_reply); 2910 e->call_reply = mp; 2911 e->call_status = RPC_SUCCESS; 2912 e->call_notified = TRUE; 2913 cv_signal(&e->call_cv); 2914 mutex_exit(&e->call_lock); 2915 mutex_exit(&chtp->ct_lock); 2916 return (TRUE); 2917 } else { 2918 zone_t *zone; 2919 struct rpcstat *rpcstat; 2920 2921 mutex_exit(&chtp->ct_lock); 2922 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n", 2923 xid); 2924 /* 2925 * This is unfortunate, but we need to lookup the zone so we 2926 * can increment its "rcbadxids" counter. 2927 */ 2928 zone = zone_find_by_id(zoneid); 2929 if (zone == NULL) { 2930 /* 2931 * The zone went away... 2932 */ 2933 return (FALSE); 2934 } 2935 rpcstat = zone_getspecific(rpcstat_zone_key, zone); 2936 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 2937 /* 2938 * Not interested 2939 */ 2940 zone_rele(zone); 2941 return (FALSE); 2942 } 2943 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcbadxids); 2944 zone_rele(zone); 2945 } 2946 return (FALSE); 2947 } 2948 2949 /* 2950 * Called by rpcmod when a non-data indication arrives. The ones in which we 2951 * are interested are connection indications and options acks. We dispatch 2952 * based on the queue the indication came in on. If we are not interested in 2953 * what came in, we return false to rpcmod, who will then pass it upstream. 2954 */ 2955 bool_t 2956 clnt_dispatch_notifyconn(queue_t *q, mblk_t *mp) 2957 { 2958 calllist_t *e; 2959 int type; 2960 2961 ASSERT((q->q_flag & QREADR) == 0); 2962 2963 type = ((union T_primitives *)mp->b_rptr)->type; 2964 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n", 2965 rpc_tpiprim2name(type)); 2966 mutex_enter(&clnt_pending_lock); 2967 for (e = clnt_pending; /* NO CONDITION */; e = e->call_next) { 2968 if (e == NULL) { 2969 mutex_exit(&clnt_pending_lock); 2970 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting " 2971 "for connection on queue 0x%p\n", (void *)q); 2972 return (FALSE); 2973 } 2974 if (e->call_wq == q) 2975 break; 2976 } 2977 2978 switch (type) { 2979 case T_CONN_CON: 2980 /* 2981 * The transport is now connected, send a T_INFO_REQ to get 2982 * the tidu size. 2983 */ 2984 mutex_exit(&clnt_pending_lock); 2985 ASSERT(mp->b_datap->db_lim - mp->b_datap->db_base >= 2986 sizeof (struct T_info_req)); 2987 mp->b_rptr = mp->b_datap->db_base; 2988 ((union T_primitives *)mp->b_rptr)->type = T_INFO_REQ; 2989 mp->b_wptr = mp->b_rptr + sizeof (struct T_info_req); 2990 mp->b_datap->db_type = M_PCPROTO; 2991 put(q, mp); 2992 return (TRUE); 2993 case T_INFO_ACK: 2994 case T_OPTMGMT_ACK: 2995 e->call_status = RPC_SUCCESS; 2996 e->call_reply = mp; 2997 e->call_notified = TRUE; 2998 cv_signal(&e->call_cv); 2999 break; 3000 case T_ERROR_ACK: 3001 e->call_status = RPC_CANTCONNECT; 3002 e->call_reply = mp; 3003 e->call_notified = TRUE; 3004 cv_signal(&e->call_cv); 3005 break; 3006 case T_OK_ACK: 3007 /* 3008 * Great, but we are really waiting for a T_CONN_CON 3009 */ 3010 freemsg(mp); 3011 break; 3012 default: 3013 mutex_exit(&clnt_pending_lock); 3014 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type); 3015 return (FALSE); 3016 } 3017 3018 mutex_exit(&clnt_pending_lock); 3019 return (TRUE); 3020 } 3021 3022 /* 3023 * Called by rpcmod when the transport is (or should be) going away. Informs 3024 * all callers waiting for replies and marks the entry in the connection 3025 * manager's list as unconnected, and either closing (close handshake in 3026 * progress) or dead. 3027 */ 3028 void 3029 clnt_dispatch_notifyall(queue_t *q, int32_t msg_type, int32_t reason) 3030 { 3031 calllist_t *e; 3032 call_table_t *ctp; 3033 struct cm_xprt *cm_entry; 3034 int have_connmgr_lock; 3035 int i; 3036 3037 ASSERT((q->q_flag & QREADR) == 0); 3038 3039 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q); 3040 RPCLOG(1, " received a notifcation prim type [%s]", 3041 rpc_tpiprim2name(msg_type)); 3042 RPCLOG(1, " and reason %d\n", reason); 3043 3044 /* 3045 * Find the transport entry in the connection manager's list, close 3046 * the transport and delete the entry. In the case where rpcmod's 3047 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we 3048 * should gracefully close the connection. 3049 */ 3050 have_connmgr_lock = 1; 3051 mutex_enter(&connmgr_lock); 3052 for (cm_entry = cm_hd; cm_entry; cm_entry = cm_entry->x_next) { 3053 ASSERT(cm_entry != cm_entry->x_next); 3054 if (cm_entry->x_wq == q) { 3055 ASSERT(MUTEX_HELD(&connmgr_lock)); 3056 ASSERT(have_connmgr_lock == 1); 3057 switch (msg_type) { 3058 case T_ORDREL_REQ: 3059 3060 if (cm_entry->x_dead) { 3061 RPCLOG(1, "idle timeout on dead " 3062 "connection: %p\n", 3063 (void *)cm_entry); 3064 if (clnt_stop_idle != NULL) 3065 (*clnt_stop_idle)(q); 3066 break; 3067 } 3068 3069 /* 3070 * Only mark the connection as dead if it is 3071 * connected and idle. 3072 * An unconnected connection has probably 3073 * gone idle because the server is down, 3074 * and when it comes back up there will be 3075 * retries that need to use that connection. 3076 */ 3077 if (cm_entry->x_connected || 3078 cm_entry->x_doomed) { 3079 if (cm_entry->x_ordrel) { 3080 if (cm_entry->x_closing == TRUE) { 3081 /* 3082 * The connection is obviously 3083 * wedged due to a bug or problem 3084 * with the transport. Mark it 3085 * as dead. Otherwise we can leak 3086 * connections. 3087 */ 3088 cm_entry->x_dead = TRUE; 3089 mutex_exit(&connmgr_lock); 3090 have_connmgr_lock = 0; 3091 if (clnt_stop_idle != NULL) 3092 (*clnt_stop_idle)(q); 3093 break; 3094 } 3095 cm_entry->x_closing = TRUE; 3096 connmgr_sndrel(cm_entry); 3097 have_connmgr_lock = 0; 3098 } else { 3099 cm_entry->x_dead = TRUE; 3100 mutex_exit(&connmgr_lock); 3101 have_connmgr_lock = 0; 3102 if (clnt_stop_idle != NULL) 3103 (*clnt_stop_idle)(q); 3104 } 3105 } else { 3106 /* 3107 * We don't mark the connection 3108 * as dead, but we turn off the 3109 * idle timer. 3110 */ 3111 mutex_exit(&connmgr_lock); 3112 have_connmgr_lock = 0; 3113 if (clnt_stop_idle != NULL) 3114 (*clnt_stop_idle)(q); 3115 RPCLOG(1, "clnt_dispatch_notifyall:" 3116 " ignoring timeout from rpcmod" 3117 " (q %p) because we are not " 3118 " connected\n", (void *)q); 3119 } 3120 break; 3121 case T_ORDREL_IND: 3122 /* 3123 * If this entry is marked closing, then we are 3124 * completing a close handshake, and the 3125 * connection is dead. Otherwise, the server is 3126 * trying to close. Since the server will not 3127 * be sending any more RPC replies, we abort 3128 * the connection, including flushing 3129 * any RPC requests that are in-transit. 3130 */ 3131 if (cm_entry->x_closing) { 3132 cm_entry->x_dead = TRUE; 3133 mutex_exit(&connmgr_lock); 3134 have_connmgr_lock = 0; 3135 if (clnt_stop_idle != NULL) 3136 (*clnt_stop_idle)(q); 3137 } else { 3138 /* 3139 * if we're getting a disconnect 3140 * before we've finished our 3141 * connect attempt, mark it for 3142 * later processing 3143 */ 3144 if (cm_entry->x_thread) 3145 cm_entry->x_early_disc = TRUE; 3146 else 3147 cm_entry->x_connected = FALSE; 3148 cm_entry->x_waitdis = TRUE; 3149 connmgr_snddis(cm_entry); 3150 have_connmgr_lock = 0; 3151 } 3152 break; 3153 3154 case T_ERROR_ACK: 3155 case T_OK_ACK: 3156 cm_entry->x_waitdis = FALSE; 3157 cv_signal(&cm_entry->x_dis_cv); 3158 mutex_exit(&connmgr_lock); 3159 return; 3160 3161 case T_DISCON_REQ: 3162 if (cm_entry->x_thread) 3163 cm_entry->x_early_disc = TRUE; 3164 else 3165 cm_entry->x_connected = FALSE; 3166 cm_entry->x_waitdis = TRUE; 3167 3168 connmgr_snddis(cm_entry); 3169 have_connmgr_lock = 0; 3170 break; 3171 3172 case T_DISCON_IND: 3173 default: 3174 /* 3175 * if we're getting a disconnect before 3176 * we've finished our connect attempt, 3177 * mark it for later processing 3178 */ 3179 if (cm_entry->x_closing) { 3180 cm_entry->x_dead = TRUE; 3181 mutex_exit(&connmgr_lock); 3182 have_connmgr_lock = 0; 3183 if (clnt_stop_idle != NULL) 3184 (*clnt_stop_idle)(q); 3185 } else { 3186 if (cm_entry->x_thread) { 3187 cm_entry->x_early_disc = TRUE; 3188 } else { 3189 cm_entry->x_dead = TRUE; 3190 cm_entry->x_connected = FALSE; 3191 } 3192 } 3193 break; 3194 } 3195 break; 3196 } 3197 } 3198 3199 if (have_connmgr_lock) 3200 mutex_exit(&connmgr_lock); 3201 3202 if (msg_type == T_ERROR_ACK || msg_type == T_OK_ACK) { 3203 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find " 3204 "connmgr entry for discon ack\n", (void *)q); 3205 return; 3206 } 3207 3208 /* 3209 * Then kick all the clnt_pending calls out of their wait. There 3210 * should be no clnt_pending calls in the case of rpcmod's idle 3211 * timer firing. 3212 */ 3213 for (i = 0; i < clnt_cots_hash_size; i++) { 3214 ctp = &cots_call_ht[i]; 3215 mutex_enter(&ctp->ct_lock); 3216 for (e = ctp->ct_call_next; 3217 e != (calllist_t *)ctp; 3218 e = e->call_next) { 3219 if (e->call_wq == q && e->call_notified == FALSE) { 3220 RPCLOG(1, 3221 "clnt_dispatch_notifyall for queue %p ", 3222 (void *)q); 3223 RPCLOG(1, "aborting clnt_pending call %p\n", 3224 (void *)e); 3225 3226 if (msg_type == T_DISCON_IND) 3227 e->call_reason = reason; 3228 e->call_notified = TRUE; 3229 e->call_status = RPC_XPRTFAILED; 3230 cv_signal(&e->call_cv); 3231 } 3232 } 3233 mutex_exit(&ctp->ct_lock); 3234 } 3235 3236 mutex_enter(&clnt_pending_lock); 3237 for (e = clnt_pending; e; e = e->call_next) { 3238 /* 3239 * Only signal those RPC handles that haven't been 3240 * signalled yet. Otherwise we can get a bogus call_reason. 3241 * This can happen if thread A is making a call over a 3242 * connection. If the server is killed, it will cause 3243 * reset, and reason will default to EIO as a result of 3244 * a T_ORDREL_IND. Thread B then attempts to recreate 3245 * the connection but gets a T_DISCON_IND. If we set the 3246 * call_reason code for all threads, then if thread A 3247 * hasn't been dispatched yet, it will get the wrong 3248 * reason. The bogus call_reason can make it harder to 3249 * discriminate between calls that fail because the 3250 * connection attempt failed versus those where the call 3251 * may have been executed on the server. 3252 */ 3253 if (e->call_wq == q && e->call_notified == FALSE) { 3254 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ", 3255 (void *)q); 3256 RPCLOG(1, " aborting clnt_pending call %p\n", 3257 (void *)e); 3258 3259 if (msg_type == T_DISCON_IND) 3260 e->call_reason = reason; 3261 e->call_notified = TRUE; 3262 /* 3263 * Let the caller timeout, else he will retry 3264 * immediately. 3265 */ 3266 e->call_status = RPC_XPRTFAILED; 3267 3268 /* 3269 * We used to just signal those threads 3270 * waiting for a connection, (call_xid = 0). 3271 * That meant that threads waiting for a response 3272 * waited till their timeout expired. This 3273 * could be a long time if they've specified a 3274 * maximum timeout. (2^31 - 1). So we 3275 * Signal all threads now. 3276 */ 3277 cv_signal(&e->call_cv); 3278 } 3279 } 3280 mutex_exit(&clnt_pending_lock); 3281 } 3282 3283 3284 /*ARGSUSED*/ 3285 /* 3286 * after resuming a system that's been suspended for longer than the 3287 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall() 3288 * generates "NFS server X not responding" and "NFS server X ok" messages; 3289 * here we reset inet connections to cause a re-connect and avoid those 3290 * NFS messages. see 4045054 3291 */ 3292 boolean_t 3293 connmgr_cpr_reset(void *arg, int code) 3294 { 3295 struct cm_xprt *cxp; 3296 3297 if (code == CB_CODE_CPR_CHKPT) 3298 return (B_TRUE); 3299 3300 if (mutex_tryenter(&connmgr_lock) == 0) 3301 return (B_FALSE); 3302 for (cxp = cm_hd; cxp; cxp = cxp->x_next) { 3303 if ((cxp->x_family == AF_INET || cxp->x_family == AF_INET6) && 3304 cxp->x_connected == TRUE) { 3305 if (cxp->x_thread) 3306 cxp->x_early_disc = TRUE; 3307 else 3308 cxp->x_connected = FALSE; 3309 cxp->x_needdis = TRUE; 3310 } 3311 } 3312 mutex_exit(&connmgr_lock); 3313 return (B_TRUE); 3314 } 3315 3316 void 3317 clnt_cots_stats_init(zoneid_t zoneid, struct rpc_cots_client **statsp) 3318 { 3319 3320 *statsp = (struct rpc_cots_client *)rpcstat_zone_init_common(zoneid, 3321 "unix", "rpc_cots_client", (const kstat_named_t *)&cots_rcstat_tmpl, 3322 sizeof (cots_rcstat_tmpl)); 3323 } 3324 3325 void 3326 clnt_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_client **statsp) 3327 { 3328 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_client"); 3329 kmem_free(*statsp, sizeof (cots_rcstat_tmpl)); 3330 } 3331 3332 void 3333 clnt_cots_init(void) 3334 { 3335 mutex_init(&connmgr_lock, NULL, MUTEX_DEFAULT, NULL); 3336 mutex_init(&clnt_pending_lock, NULL, MUTEX_DEFAULT, NULL); 3337 3338 if (clnt_cots_hash_size < DEFAULT_MIN_HASH_SIZE) 3339 clnt_cots_hash_size = DEFAULT_MIN_HASH_SIZE; 3340 3341 cots_call_ht = call_table_init(clnt_cots_hash_size); 3342 zone_key_create(&zone_cots_key, NULL, NULL, clnt_zone_destroy); 3343 } 3344 3345 void 3346 clnt_cots_fini(void) 3347 { 3348 (void) zone_key_delete(zone_cots_key); 3349 } 3350 3351 /* 3352 * Wait for TPI ack, returns success only if expected ack is received 3353 * within timeout period. 3354 */ 3355 3356 static int 3357 waitforack(calllist_t *e, t_scalar_t ack_prim, const struct timeval *waitp, 3358 bool_t nosignal) 3359 { 3360 union T_primitives *tpr; 3361 clock_t timout; 3362 int cv_stat = 1; 3363 3364 ASSERT(MUTEX_HELD(&clnt_pending_lock)); 3365 while (e->call_reply == NULL) { 3366 if (waitp != NULL) { 3367 timout = waitp->tv_sec * drv_usectohz(MICROSEC) + 3368 drv_usectohz(waitp->tv_usec) + lbolt; 3369 if (nosignal) 3370 cv_stat = cv_timedwait(&e->call_cv, 3371 &clnt_pending_lock, timout); 3372 else 3373 cv_stat = cv_timedwait_sig(&e->call_cv, 3374 &clnt_pending_lock, timout); 3375 } else { 3376 if (nosignal) 3377 cv_wait(&e->call_cv, &clnt_pending_lock); 3378 else 3379 cv_stat = cv_wait_sig(&e->call_cv, 3380 &clnt_pending_lock); 3381 } 3382 if (cv_stat == -1) 3383 return (ETIME); 3384 if (cv_stat == 0) 3385 return (EINTR); 3386 } 3387 tpr = (union T_primitives *)e->call_reply->b_rptr; 3388 if (tpr->type == ack_prim) 3389 return (0); /* Success */ 3390 3391 if (tpr->type == T_ERROR_ACK) { 3392 if (tpr->error_ack.TLI_error == TSYSERR) 3393 return (tpr->error_ack.UNIX_error); 3394 else 3395 return (t_tlitosyserr(tpr->error_ack.TLI_error)); 3396 } 3397 3398 return (EPROTO); /* unknown or unexpected primitive */ 3399 } 3400