1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 29 * All Rights Reserved 30 */ 31 32 /* 33 * Portions of this source code were derived from Berkeley 4.3 BSD 34 * under license from the Regents of the University of California. 35 */ 36 37 #pragma ident "%Z%%M% %I% %E% SMI" 38 39 /* 40 * Implements a kernel based, client side RPC over Connection Oriented 41 * Transports (COTS). 42 */ 43 44 /* 45 * Much of this file has been re-written to let NFS work better over slow 46 * transports. A description follows. 47 * 48 * One of the annoying things about kRPC/COTS is that it will temporarily 49 * create more than one connection between a client and server. This 50 * happens because when a connection is made, the end-points entry in the 51 * linked list of connections (headed by cm_hd), is removed so that other 52 * threads don't mess with it. Went ahead and bit the bullet by keeping 53 * the endpoint on the connection list and introducing state bits, 54 * condition variables etc. to the connection entry data structure (struct 55 * cm_xprt). 56 * 57 * Here is a summary of the changes to cm-xprt: 58 * 59 * x_ctime is the timestamp of when the endpoint was last 60 * connected or disconnected. If an end-point is ever disconnected 61 * or re-connected, then any outstanding RPC request is presumed 62 * lost, telling clnt_cots_kcallit that it needs to re-send the 63 * request, not just wait for the original request's reply to 64 * arrive. 65 * 66 * x_thread flag which tells us if a thread is doing a connection attempt. 67 * 68 * x_waitdis flag which tells us we are waiting a disconnect ACK. 69 * 70 * x_needdis flag which tells us we need to send a T_DISCONN_REQ 71 * to kill the connection. 72 * 73 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to 74 * gracefully close the connection. 75 * 76 * #defined bitmasks for the all the b_* bits so that more 77 * efficient (and at times less clumsy) masks can be used to 78 * manipulated state in cases where multiple bits have to 79 * set/cleared/checked in the same critical section. 80 * 81 * x_conn_cv and x_dis-_cv are new condition variables to let 82 * threads knows when the connection attempt is done, and to let 83 * the connecting thread know when the disconnect handshake is 84 * done. 85 * 86 * Added the CONN_HOLD() macro so that all reference holds have the same 87 * look and feel. 88 * 89 * In the private (cku_private) portion of the client handle, 90 * 91 * cku_flags replaces the cku_sent a boolean. cku_flags keeps 92 * track of whether a request as been sent, and whether the 93 * client's handles call record is on the dispatch list (so that 94 * the reply can be matched by XID to the right client handle). 95 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit() 96 * and still have the response find the right client handle so 97 * that the retry of CLNT_CALL() gets the result. Testing, found 98 * situations where if the timeout was increased, performance 99 * degraded. This was due to us hitting a window where the thread 100 * was back in rfscall() (probably printing server not responding) 101 * while the response came back but no place to put it. 102 * 103 * cku_ctime is just a cache of x_ctime. If they match, 104 * clnt_cots_kcallit() won't to send a retry (unless the maximum 105 * receive count limit as been reached). If the don't match, then 106 * we assume the request has been lost, and a retry of the request 107 * is needed. 108 * 109 * cku_recv_attempts counts the number of receive count attempts 110 * after one try is sent on the wire. 111 * 112 * Added the clnt_delay() routine so that interruptible and 113 * noninterruptible delays are possible. 114 * 115 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to 116 * control how long the client delays before returned after getting 117 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash 118 * a server that may be booting and not yet started nfsd. 119 * 120 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable) 121 * Why don't we just wait forever (receive an infinite # of times)? 122 * Because the server may have rebooted. More insidious is that some 123 * servers (ours) will drop NFS/TCP requests in some cases. This is bad, 124 * but it is a reality. 125 * 126 * The case of a server doing orderly release really messes up the 127 * client's recovery, especially if the server's TCP implementation is 128 * buggy. It was found was that the kRPC/COTS client was breaking some 129 * TPI rules, such as not waiting for the acknowledgement of a 130 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and 131 * T_DISCON_REQ in clnt_dispatch_notifyall()). 132 * 133 * One of things that we've seen is that a kRPC TCP endpoint goes into 134 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because 135 * that the TIMEWAIT state takes a while to finish. If a server sends a 136 * T_ORDREL_IND, there is little point in an RPC client doing a 137 * T_ORDREL_REQ, because the RPC request isn't going to make it (the 138 * server is saying that it won't accept any more data). So kRPC was 139 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the 140 * connection skips the TIMEWAIT state and goes straight to a bound state 141 * that kRPC can quickly switch to connected. 142 * 143 * Code that issues TPI request must use waitforack() to wait for the 144 * corresponding ack (assuming there is one) in any future modifications. 145 * This works around problems that may be introduced by breaking TPI rules 146 * (by submitting new calls before earlier requests have been acked) in the 147 * case of a signal or other early return. waitforack() depends on 148 * clnt_dispatch_notifyconn() to issue the wakeup when the ack 149 * arrives, so adding new TPI calls may require corresponding changes 150 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on 151 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure 152 * not to set it too low or TPI ACKS will be lost. 153 */ 154 155 #include <sys/param.h> 156 #include <sys/types.h> 157 #include <sys/user.h> 158 #include <sys/systm.h> 159 #include <sys/sysmacros.h> 160 #include <sys/proc.h> 161 #include <sys/socket.h> 162 #include <sys/file.h> 163 #include <sys/stream.h> 164 #include <sys/strsubr.h> 165 #include <sys/stropts.h> 166 #include <sys/strsun.h> 167 #include <sys/timod.h> 168 #include <sys/tiuser.h> 169 #include <sys/tihdr.h> 170 #include <sys/t_kuser.h> 171 #include <sys/fcntl.h> 172 #include <sys/errno.h> 173 #include <sys/kmem.h> 174 #include <sys/debug.h> 175 #include <sys/systm.h> 176 #include <sys/kstat.h> 177 #include <sys/t_lock.h> 178 #include <sys/ddi.h> 179 #include <sys/cmn_err.h> 180 #include <sys/time.h> 181 #include <sys/isa_defs.h> 182 #include <sys/callb.h> 183 #include <sys/sunddi.h> 184 #include <sys/atomic.h> 185 186 #include <netinet/in.h> 187 #include <netinet/tcp.h> 188 189 #include <rpc/types.h> 190 #include <rpc/xdr.h> 191 #include <rpc/auth.h> 192 #include <rpc/clnt.h> 193 #include <rpc/rpc_msg.h> 194 195 #define COTS_DEFAULT_ALLOCSIZE 2048 196 197 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */ 198 #define MSG_OFFSET 128 /* offset of call into the mblk */ 199 200 const char *kinet_ntop6(uchar_t *, char *, size_t); 201 202 static int clnt_cots_ksettimers(CLIENT *, struct rpc_timers *, 203 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 204 static enum clnt_stat clnt_cots_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 205 caddr_t, xdrproc_t, caddr_t, struct timeval); 206 static void clnt_cots_kabort(CLIENT *); 207 static void clnt_cots_kerror(CLIENT *, struct rpc_err *); 208 static bool_t clnt_cots_kfreeres(CLIENT *, xdrproc_t, caddr_t); 209 static void clnt_cots_kdestroy(CLIENT *); 210 static bool_t clnt_cots_kcontrol(CLIENT *, int, char *); 211 212 213 /* List of transports managed by the connection manager. */ 214 struct cm_xprt { 215 TIUSER *x_tiptr; /* transport handle */ 216 queue_t *x_wq; /* send queue */ 217 clock_t x_time; /* last time we handed this xprt out */ 218 clock_t x_ctime; /* time we went to CONNECTED */ 219 int x_tidu_size; /* TIDU size of this transport */ 220 union { 221 struct { 222 unsigned int 223 #ifdef _BIT_FIELDS_HTOL 224 b_closing: 1, /* we've sent a ord rel on this conn */ 225 b_dead: 1, /* transport is closed or disconn */ 226 b_doomed: 1, /* too many conns, let this go idle */ 227 b_connected: 1, /* this connection is connected */ 228 229 b_ordrel: 1, /* do an orderly release? */ 230 b_thread: 1, /* thread doing connect */ 231 b_waitdis: 1, /* waiting for disconnect ACK */ 232 b_needdis: 1, /* need T_DISCON_REQ */ 233 234 b_needrel: 1, /* need T_ORDREL_REQ */ 235 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 236 /* disconnect during connect */ 237 238 b_pad: 22; 239 240 #endif 241 242 #ifdef _BIT_FIELDS_LTOH 243 b_pad: 22, 244 245 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 246 /* disconnect during connect */ 247 b_needrel: 1, /* need T_ORDREL_REQ */ 248 249 b_needdis: 1, /* need T_DISCON_REQ */ 250 b_waitdis: 1, /* waiting for disconnect ACK */ 251 b_thread: 1, /* thread doing connect */ 252 b_ordrel: 1, /* do an orderly release? */ 253 254 b_connected: 1, /* this connection is connected */ 255 b_doomed: 1, /* too many conns, let this go idle */ 256 b_dead: 1, /* transport is closed or disconn */ 257 b_closing: 1; /* we've sent a ord rel on this conn */ 258 #endif 259 } bit; unsigned int word; 260 261 #define x_closing x_state.bit.b_closing 262 #define x_dead x_state.bit.b_dead 263 #define x_doomed x_state.bit.b_doomed 264 #define x_connected x_state.bit.b_connected 265 266 #define x_ordrel x_state.bit.b_ordrel 267 #define x_thread x_state.bit.b_thread 268 #define x_waitdis x_state.bit.b_waitdis 269 #define x_needdis x_state.bit.b_needdis 270 271 #define x_needrel x_state.bit.b_needrel 272 #define x_early_disc x_state.bit.b_early_disc 273 274 #define x_state_flags x_state.word 275 276 #define X_CLOSING 0x80000000 277 #define X_DEAD 0x40000000 278 #define X_DOOMED 0x20000000 279 #define X_CONNECTED 0x10000000 280 281 #define X_ORDREL 0x08000000 282 #define X_THREAD 0x04000000 283 #define X_WAITDIS 0x02000000 284 #define X_NEEDDIS 0x01000000 285 286 #define X_NEEDREL 0x00800000 287 #define X_EARLYDISC 0x00400000 288 289 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED) 290 291 } x_state; 292 int x_ref; /* number of users of this xprt */ 293 int x_family; /* address family of transport */ 294 dev_t x_rdev; /* device number of transport */ 295 struct cm_xprt *x_next; 296 297 struct netbuf x_server; /* destination address */ 298 struct netbuf x_src; /* src address (for retries) */ 299 kmutex_t x_lock; /* lock on this entry */ 300 kcondvar_t x_cv; /* to signal when can be closed */ 301 kcondvar_t x_conn_cv; /* to signal when connection attempt */ 302 /* is complete */ 303 kstat_t *x_ksp; 304 305 kcondvar_t x_dis_cv; /* to signal when disconnect attempt */ 306 /* is complete */ 307 zoneid_t x_zoneid; /* zone this xprt belongs to */ 308 }; 309 310 typedef struct cm_kstat_xprt { 311 kstat_named_t x_wq; 312 kstat_named_t x_server; 313 kstat_named_t x_family; 314 kstat_named_t x_rdev; 315 kstat_named_t x_time; 316 kstat_named_t x_state; 317 kstat_named_t x_ref; 318 kstat_named_t x_port; 319 } cm_kstat_xprt_t; 320 321 static cm_kstat_xprt_t cm_kstat_template = { 322 { "write_queue", KSTAT_DATA_UINT32 }, 323 { "server", KSTAT_DATA_STRING }, 324 { "addr_family", KSTAT_DATA_UINT32 }, 325 { "device", KSTAT_DATA_UINT32 }, 326 { "time_stamp", KSTAT_DATA_UINT32 }, 327 { "status", KSTAT_DATA_UINT32 }, 328 { "ref_count", KSTAT_DATA_INT32 }, 329 { "port", KSTAT_DATA_UINT32 }, 330 }; 331 332 /* 333 * The inverse of this is connmgr_release(). 334 */ 335 #define CONN_HOLD(Cm_entry) {\ 336 mutex_enter(&(Cm_entry)->x_lock); \ 337 (Cm_entry)->x_ref++; \ 338 mutex_exit(&(Cm_entry)->x_lock); \ 339 } 340 341 342 /* 343 * Private data per rpc handle. This structure is allocated by 344 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy. 345 */ 346 typedef struct cku_private_s { 347 CLIENT cku_client; /* client handle */ 348 calllist_t cku_call; /* for dispatching calls */ 349 struct rpc_err cku_err; /* error status */ 350 351 struct netbuf cku_srcaddr; /* source address for retries */ 352 int cku_addrfmly; /* for binding port */ 353 struct netbuf cku_addr; /* remote address */ 354 dev_t cku_device; /* device to use */ 355 uint_t cku_flags; 356 #define CKU_ONQUEUE 0x1 357 #define CKU_SENT 0x2 358 359 bool_t cku_progress; /* for CLSET_PROGRESS */ 360 uint32_t cku_xid; /* current XID */ 361 clock_t cku_ctime; /* time stamp of when */ 362 /* connection was created */ 363 uint_t cku_recv_attempts; 364 XDR cku_outxdr; /* xdr routine for output */ 365 XDR cku_inxdr; /* xdr routine for input */ 366 char cku_rpchdr[WIRE_HDR_SIZE + 4]; 367 /* pre-serialized rpc header */ 368 369 uint_t cku_outbuflen; /* default output mblk length */ 370 struct cred *cku_cred; /* credentials */ 371 bool_t cku_nodelayonerr; 372 /* for CLSET_NODELAYONERR */ 373 int cku_useresvport; /* Use reserved port */ 374 struct rpc_cots_client *cku_stats; /* stats for zone */ 375 } cku_private_t; 376 377 static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *, 378 const struct timeval *, struct netbuf *, int, struct netbuf *, 379 struct rpc_err *, bool_t, bool_t); 380 381 static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *, 382 int, calllist_t *, int *, bool_t reconnect, 383 const struct timeval *, bool_t); 384 385 static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *); 386 static void connmgr_sndrel(struct cm_xprt *); 387 static void connmgr_snddis(struct cm_xprt *); 388 static void connmgr_close(struct cm_xprt *); 389 static void connmgr_release(struct cm_xprt *); 390 static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *, 391 cku_private_t *); 392 393 static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *, 394 struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t, 395 bool_t, int); 396 397 static void connmgr_cancelconn(struct cm_xprt *); 398 static enum clnt_stat connmgr_cwait(struct cm_xprt *, const struct timeval *, 399 bool_t); 400 static void connmgr_dis_and_wait(struct cm_xprt *); 401 402 static void clnt_dispatch_send(queue_t *, mblk_t *, calllist_t *, uint_t, 403 uint_t); 404 405 static int clnt_delay(clock_t, bool_t); 406 407 static int waitforack(calllist_t *, t_scalar_t, const struct timeval *, bool_t); 408 409 /* 410 * Operations vector for TCP/IP based RPC 411 */ 412 static struct clnt_ops tcp_ops = { 413 clnt_cots_kcallit, /* do rpc call */ 414 clnt_cots_kabort, /* abort call */ 415 clnt_cots_kerror, /* return error status */ 416 clnt_cots_kfreeres, /* free results */ 417 clnt_cots_kdestroy, /* destroy rpc handle */ 418 clnt_cots_kcontrol, /* the ioctl() of rpc */ 419 clnt_cots_ksettimers, /* set retry timers */ 420 }; 421 422 static int rpc_kstat_instance = 0; /* keeps the current instance */ 423 /* number for the next kstat_create */ 424 425 static struct cm_xprt *cm_hd = NULL; 426 static kmutex_t connmgr_lock; /* for connection mngr's list of transports */ 427 428 extern kmutex_t clnt_max_msg_lock; 429 430 static calllist_t *clnt_pending = NULL; 431 extern kmutex_t clnt_pending_lock; 432 433 static int clnt_cots_hash_size = DEFAULT_HASH_SIZE; 434 435 static call_table_t *cots_call_ht; 436 437 static const struct rpc_cots_client { 438 kstat_named_t rccalls; 439 kstat_named_t rcbadcalls; 440 kstat_named_t rcbadxids; 441 kstat_named_t rctimeouts; 442 kstat_named_t rcnewcreds; 443 kstat_named_t rcbadverfs; 444 kstat_named_t rctimers; 445 kstat_named_t rccantconn; 446 kstat_named_t rcnomem; 447 kstat_named_t rcintrs; 448 } cots_rcstat_tmpl = { 449 { "calls", KSTAT_DATA_UINT64 }, 450 { "badcalls", KSTAT_DATA_UINT64 }, 451 { "badxids", KSTAT_DATA_UINT64 }, 452 { "timeouts", KSTAT_DATA_UINT64 }, 453 { "newcreds", KSTAT_DATA_UINT64 }, 454 { "badverfs", KSTAT_DATA_UINT64 }, 455 { "timers", KSTAT_DATA_UINT64 }, 456 { "cantconn", KSTAT_DATA_UINT64 }, 457 { "nomem", KSTAT_DATA_UINT64 }, 458 { "interrupts", KSTAT_DATA_UINT64 } 459 }; 460 461 #define COTSRCSTAT_INCR(p, x) \ 462 atomic_add_64(&(p)->x.value.ui64, 1) 463 464 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */ 465 static int clnt_max_conns = CLNT_MAX_CONNS; 466 467 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */ 468 /* connection reset */ 469 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */ 470 471 472 static int clnt_cots_min_tout = CLNT_MIN_TIMEOUT; 473 static int clnt_cots_min_conntout = CLNT_MIN_CONNTIMEOUT; 474 475 /* 476 * Limit the number of times we will attempt to receive a reply without 477 * re-sending a response. 478 */ 479 #define CLNT_MAXRECV_WITHOUT_RETRY 3 480 static uint_t clnt_cots_maxrecv = CLNT_MAXRECV_WITHOUT_RETRY; 481 482 uint_t *clnt_max_msg_sizep; 483 void (*clnt_stop_idle)(queue_t *wq); 484 485 #define ptoh(p) (&((p)->cku_client)) 486 #define htop(h) ((cku_private_t *)((h)->cl_private)) 487 488 /* 489 * Times to retry 490 */ 491 #define REFRESHES 2 /* authentication refreshes */ 492 493 /* 494 * The following is used to determine the global default behavior for 495 * COTS when binding to a local port. 496 * 497 * If the value is set to 1 the default will be to select a reserved 498 * (aka privileged) port, if the value is zero the default will be to 499 * use non-reserved ports. Users of kRPC may override this by using 500 * CLNT_CONTROL() and CLSET_BINDRESVPORT. 501 */ 502 static int clnt_cots_do_bindresvport = 1; 503 504 static zone_key_t zone_cots_key; 505 506 /* 507 * We need to do this after all kernel threads in the zone have exited. 508 */ 509 /* ARGSUSED */ 510 static void 511 clnt_zone_destroy(zoneid_t zoneid, void *unused) 512 { 513 struct cm_xprt **cmp; 514 struct cm_xprt *cm_entry; 515 struct cm_xprt *freelist = NULL; 516 517 mutex_enter(&connmgr_lock); 518 cmp = &cm_hd; 519 while ((cm_entry = *cmp) != NULL) { 520 if (cm_entry->x_zoneid == zoneid) { 521 *cmp = cm_entry->x_next; 522 cm_entry->x_next = freelist; 523 freelist = cm_entry; 524 } else { 525 cmp = &cm_entry->x_next; 526 } 527 } 528 mutex_exit(&connmgr_lock); 529 while ((cm_entry = freelist) != NULL) { 530 freelist = cm_entry->x_next; 531 connmgr_close(cm_entry); 532 } 533 } 534 535 int 536 clnt_cots_kcreate(dev_t dev, struct netbuf *addr, int family, rpcprog_t prog, 537 rpcvers_t vers, uint_t max_msgsize, cred_t *cred, CLIENT **ncl) 538 { 539 CLIENT *h; 540 cku_private_t *p; 541 struct rpc_msg call_msg; 542 struct rpcstat *rpcstat; 543 544 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog); 545 546 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 547 ASSERT(rpcstat != NULL); 548 549 /* Allocate and intialize the client handle. */ 550 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 551 552 h = ptoh(p); 553 554 h->cl_private = (caddr_t)p; 555 h->cl_auth = authkern_create(); 556 h->cl_ops = &tcp_ops; 557 558 cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL); 559 mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL); 560 561 /* 562 * If the current sanity check size in rpcmod is smaller 563 * than the size needed, then increase the sanity check. 564 */ 565 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 566 max_msgsize > *clnt_max_msg_sizep) { 567 mutex_enter(&clnt_max_msg_lock); 568 if (max_msgsize > *clnt_max_msg_sizep) 569 *clnt_max_msg_sizep = max_msgsize; 570 mutex_exit(&clnt_max_msg_lock); 571 } 572 573 p->cku_outbuflen = COTS_DEFAULT_ALLOCSIZE; 574 575 /* Preserialize the call message header */ 576 577 call_msg.rm_xid = 0; 578 call_msg.rm_direction = CALL; 579 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 580 call_msg.rm_call.cb_prog = prog; 581 call_msg.rm_call.cb_vers = vers; 582 583 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, WIRE_HDR_SIZE, XDR_ENCODE); 584 585 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 586 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization " 587 "error\n"); 588 auth_destroy(h->cl_auth); 589 kmem_free(p, sizeof (cku_private_t)); 590 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n"); 591 return (EINVAL); /* XXX */ 592 } 593 594 /* 595 * The zalloc initialized the fields below. 596 * p->cku_xid = 0; 597 * p->cku_flags = 0; 598 * p->cku_srcaddr.len = 0; 599 * p->cku_srcaddr.maxlen = 0; 600 */ 601 602 p->cku_cred = cred; 603 p->cku_device = dev; 604 p->cku_addrfmly = family; 605 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 606 p->cku_addr.maxlen = addr->maxlen; 607 p->cku_addr.len = addr->len; 608 bcopy(addr->buf, p->cku_addr.buf, addr->len); 609 p->cku_stats = rpcstat->rpc_cots_client; 610 p->cku_useresvport = -1; /* value is has not been set */ 611 612 *ncl = h; 613 return (0); 614 } 615 616 /*ARGSUSED*/ 617 static void 618 clnt_cots_kabort(CLIENT *h) 619 { 620 } 621 622 /* 623 * Return error info on this handle. 624 */ 625 static void 626 clnt_cots_kerror(CLIENT *h, struct rpc_err *err) 627 { 628 /* LINTED pointer alignment */ 629 cku_private_t *p = htop(h); 630 631 *err = p->cku_err; 632 } 633 634 static bool_t 635 clnt_cots_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 636 { 637 /* LINTED pointer alignment */ 638 cku_private_t *p = htop(h); 639 XDR *xdrs; 640 641 xdrs = &(p->cku_outxdr); 642 xdrs->x_op = XDR_FREE; 643 return ((*xdr_res)(xdrs, res_ptr)); 644 } 645 646 static bool_t 647 clnt_cots_kcontrol(CLIENT *h, int cmd, char *arg) 648 { 649 cku_private_t *p = htop(h); 650 651 switch (cmd) { 652 case CLSET_PROGRESS: 653 p->cku_progress = TRUE; 654 return (TRUE); 655 656 case CLSET_XID: 657 if (arg == NULL) 658 return (FALSE); 659 660 p->cku_xid = *((uint32_t *)arg); 661 return (TRUE); 662 663 case CLGET_XID: 664 if (arg == NULL) 665 return (FALSE); 666 667 *((uint32_t *)arg) = p->cku_xid; 668 return (TRUE); 669 670 case CLSET_NODELAYONERR: 671 if (arg == NULL) 672 return (FALSE); 673 674 if (*((bool_t *)arg) == TRUE) { 675 p->cku_nodelayonerr = TRUE; 676 return (TRUE); 677 } 678 if (*((bool_t *)arg) == FALSE) { 679 p->cku_nodelayonerr = FALSE; 680 return (TRUE); 681 } 682 return (FALSE); 683 684 case CLGET_NODELAYONERR: 685 if (arg == NULL) 686 return (FALSE); 687 688 *((bool_t *)arg) = p->cku_nodelayonerr; 689 return (TRUE); 690 691 case CLSET_BINDRESVPORT: 692 if (arg == NULL) 693 return (FALSE); 694 695 if (*(int *)arg != 1 && *(int *)arg != 0) 696 return (FALSE); 697 698 p->cku_useresvport = *(int *)arg; 699 700 return (TRUE); 701 702 case CLGET_BINDRESVPORT: 703 if (arg == NULL) 704 return (FALSE); 705 706 *(int *)arg = p->cku_useresvport; 707 708 return (TRUE); 709 710 default: 711 return (FALSE); 712 } 713 } 714 715 /* 716 * Destroy rpc handle. Frees the space used for output buffer, 717 * private data, and handle structure. 718 */ 719 static void 720 clnt_cots_kdestroy(CLIENT *h) 721 { 722 /* LINTED pointer alignment */ 723 cku_private_t *p = htop(h); 724 calllist_t *call = &p->cku_call; 725 726 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h); 727 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p->cku_xid); 728 729 if (p->cku_flags & CKU_ONQUEUE) { 730 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x " 731 "from dispatch list\n", p->cku_xid); 732 call_table_remove(call); 733 } 734 735 if (call->call_reply) 736 freemsg(call->call_reply); 737 cv_destroy(&call->call_cv); 738 mutex_destroy(&call->call_lock); 739 740 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen); 741 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 742 kmem_free(p, sizeof (*p)); 743 } 744 745 static int clnt_cots_pulls; 746 #define RM_HDR_SIZE 4 /* record mark header size */ 747 748 /* 749 * Call remote procedure. 750 */ 751 static enum clnt_stat 752 clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 753 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 754 { 755 /* LINTED pointer alignment */ 756 cku_private_t *p = htop(h); 757 calllist_t *call = &p->cku_call; 758 XDR *xdrs; 759 struct rpc_msg reply_msg; 760 mblk_t *mp; 761 #ifdef RPCDEBUG 762 clock_t time_sent; 763 #endif 764 struct netbuf *retryaddr; 765 struct cm_xprt *cm_entry = NULL; 766 queue_t *wq; 767 int len; 768 int mpsize; 769 int refreshes = REFRESHES; 770 int interrupted; 771 int tidu_size; 772 enum clnt_stat status; 773 struct timeval cwait; 774 bool_t delay_first = FALSE; 775 clock_t ticks; 776 777 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum); 778 COTSRCSTAT_INCR(p->cku_stats, rccalls); 779 780 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec); 781 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec); 782 783 /* 784 * Bug ID 1240234: 785 * Look out for zero length timeouts. We don't want to 786 * wait zero seconds for a connection to be established. 787 */ 788 if (wait.tv_sec < clnt_cots_min_conntout) { 789 cwait.tv_sec = clnt_cots_min_conntout; 790 cwait.tv_usec = 0; 791 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,", 792 wait.tv_sec); 793 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout); 794 } else { 795 cwait = wait; 796 } 797 798 call_again: 799 if (cm_entry) { 800 connmgr_release(cm_entry); 801 cm_entry = NULL; 802 } 803 804 mp = NULL; 805 806 /* 807 * If the call is not a retry, allocate a new xid and cache it 808 * for future retries. 809 * Bug ID 1246045: 810 * Treat call as a retry for purposes of binding the source 811 * port only if we actually attempted to send anything on 812 * the previous call. 813 */ 814 if (p->cku_xid == 0) { 815 p->cku_xid = alloc_xid(); 816 /* 817 * We need to ASSERT here that our xid != 0 because this 818 * determines whether or not our call record gets placed on 819 * the hash table or the linked list. By design, we mandate 820 * that RPC calls over cots must have xid's != 0, so we can 821 * ensure proper management of the hash table. 822 */ 823 ASSERT(p->cku_xid != 0); 824 825 retryaddr = NULL; 826 p->cku_flags &= ~CKU_SENT; 827 828 if (p->cku_flags & CKU_ONQUEUE) { 829 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old" 830 " one (%p)\n", (void *)call); 831 call_table_remove(call); 832 p->cku_flags &= ~CKU_ONQUEUE; 833 RPCLOG(64, "clnt_cots_kcallit: removing call from " 834 "dispatch list because xid was zero (now 0x%x)\n", 835 p->cku_xid); 836 } 837 838 if (call->call_reply != NULL) { 839 freemsg(call->call_reply); 840 call->call_reply = NULL; 841 } 842 } else if (p->cku_srcaddr.buf == NULL || p->cku_srcaddr.len == 0) { 843 retryaddr = NULL; 844 845 } else if (p->cku_flags & CKU_SENT) { 846 retryaddr = &p->cku_srcaddr; 847 848 } else { 849 /* 850 * Bug ID 1246045: Nothing was sent, so set retryaddr to 851 * NULL and let connmgr_get() bind to any source port it 852 * can get. 853 */ 854 retryaddr = NULL; 855 } 856 857 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p->cku_xid); 858 RPCLOG(64, " flags = 0x%x\n", p->cku_flags); 859 860 p->cku_err.re_status = RPC_TIMEDOUT; 861 p->cku_err.re_errno = p->cku_err.re_terrno = 0; 862 863 cm_entry = connmgr_wrapget(retryaddr, &cwait, p); 864 865 if (cm_entry == NULL) { 866 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n", 867 clnt_sperrno(p->cku_err.re_status)); 868 869 /* 870 * The reasons why we fail to create a connection are 871 * varied. In most cases we don't want the caller to 872 * immediately retry. This could have one or more 873 * bad effects. This includes flooding the net with 874 * connect requests to ports with no listener; a hard 875 * kernel loop due to all the "reserved" TCP ports being 876 * in use. 877 */ 878 delay_first = TRUE; 879 880 /* 881 * Even if we end up returning EINTR, we still count a 882 * a "can't connect", because the connection manager 883 * might have been committed to waiting for or timing out on 884 * a connection. 885 */ 886 COTSRCSTAT_INCR(p->cku_stats, rccantconn); 887 switch (p->cku_err.re_status) { 888 case RPC_INTR: 889 p->cku_err.re_errno = EINTR; 890 891 /* 892 * No need to delay because a UNIX signal(2) 893 * interrupted us. The caller likely won't 894 * retry the CLNT_CALL() and even if it does, 895 * we assume the caller knows what it is doing. 896 */ 897 delay_first = FALSE; 898 break; 899 900 case RPC_TIMEDOUT: 901 p->cku_err.re_errno = ETIMEDOUT; 902 903 /* 904 * No need to delay because timed out already 905 * on the connection request and assume that the 906 * transport time out is longer than our minimum 907 * timeout, or least not too much smaller. 908 */ 909 delay_first = FALSE; 910 break; 911 912 case RPC_SYSTEMERROR: 913 case RPC_TLIERROR: 914 /* 915 * We want to delay here because a transient 916 * system error has a better chance of going away 917 * if we delay a bit. If it's not transient, then 918 * we don't want end up in a hard kernel loop 919 * due to retries. 920 */ 921 ASSERT(p->cku_err.re_errno != 0); 922 break; 923 924 925 case RPC_CANTCONNECT: 926 /* 927 * RPC_CANTCONNECT is set on T_ERROR_ACK which 928 * implies some error down in the TCP layer or 929 * below. If cku_nodelayonerror is set then we 930 * assume the caller knows not to try too hard. 931 */ 932 RPCLOG0(8, "clnt_cots_kcallit: connection failed,"); 933 RPCLOG0(8, " re_status=RPC_CANTCONNECT,"); 934 RPCLOG(8, " re_errno=%d,", p->cku_err.re_errno); 935 RPCLOG(8, " cku_nodelayonerr=%d", p->cku_nodelayonerr); 936 if (p->cku_nodelayonerr == TRUE) 937 delay_first = FALSE; 938 939 p->cku_err.re_errno = EIO; 940 941 break; 942 943 case RPC_XPRTFAILED: 944 /* 945 * We want to delay here because we likely 946 * got a refused connection. 947 */ 948 if (p->cku_err.re_errno != 0) 949 break; 950 951 /* fall thru */ 952 953 default: 954 /* 955 * We delay here because it is better to err 956 * on the side of caution. If we got here then 957 * status could have been RPC_SUCCESS, but we 958 * know that we did not get a connection, so 959 * force the rpc status to RPC_CANTCONNECT. 960 */ 961 p->cku_err.re_status = RPC_CANTCONNECT; 962 p->cku_err.re_errno = EIO; 963 break; 964 } 965 if (delay_first == TRUE) 966 ticks = clnt_cots_min_tout * drv_usectohz(1000000); 967 goto cots_done; 968 } 969 970 /* 971 * If we've never sent any request on this connection (send count 972 * is zero, or the connection has been reset), cache the 973 * the connection's create time and send a request (possibly a retry) 974 */ 975 if ((p->cku_flags & CKU_SENT) == 0 || 976 p->cku_ctime != cm_entry->x_ctime) { 977 p->cku_ctime = cm_entry->x_ctime; 978 979 } else if ((p->cku_flags & CKU_SENT) && (p->cku_flags & CKU_ONQUEUE) && 980 (call->call_reply != NULL || 981 p->cku_recv_attempts < clnt_cots_maxrecv)) { 982 983 /* 984 * If we've sent a request and our call is on the dispatch 985 * queue and we haven't made too many receive attempts, then 986 * don't re-send, just receive. 987 */ 988 p->cku_recv_attempts++; 989 goto read_again; 990 } 991 992 /* 993 * Now we create the RPC request in a STREAMS message. We have to do 994 * this after the call to connmgr_get so that we have the correct 995 * TIDU size for the transport. 996 */ 997 tidu_size = cm_entry->x_tidu_size; 998 len = MSG_OFFSET + MAX(tidu_size, RM_HDR_SIZE + WIRE_HDR_SIZE); 999 1000 while ((mp = allocb(len, BPRI_MED)) == NULL) { 1001 if (strwaitbuf(len, BPRI_MED)) { 1002 p->cku_err.re_status = RPC_SYSTEMERROR; 1003 p->cku_err.re_errno = ENOSR; 1004 COTSRCSTAT_INCR(p->cku_stats, rcnomem); 1005 goto cots_done; 1006 } 1007 } 1008 xdrs = &p->cku_outxdr; 1009 xdrmblk_init(xdrs, mp, XDR_ENCODE, tidu_size); 1010 mpsize = MBLKSIZE(mp); 1011 ASSERT(mpsize >= len); 1012 ASSERT(mp->b_rptr == mp->b_datap->db_base); 1013 1014 /* 1015 * If the size of mblk is not appreciably larger than what we 1016 * asked, then resize the mblk to exactly len bytes. The reason for 1017 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 1018 * (from TCP over ethernet), and the arguments to the RPC require 1019 * 2800 bytes. Ideally we want the protocol to render two 1020 * ~1400 byte segments over the wire. However if allocb() gives us a 2k 1021 * mblk, and we allocate a second mblk for the remainder, the protocol 1022 * module may generate 3 segments over the wire: 1023 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and 1024 * 892 for the third. If we "waste" 448 bytes in the first mblk, 1025 * the XDR encoding will generate two ~1400 byte mblks, and the 1026 * protocol module is more likely to produce properly sized segments. 1027 */ 1028 if ((mpsize >> 1) <= len) 1029 mp->b_rptr += (mpsize - len); 1030 1031 /* 1032 * Adjust b_rptr to reserve space for the non-data protocol headers 1033 * any downstream modules might like to add, and for the 1034 * record marking header. 1035 */ 1036 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 1037 1038 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 1039 /* Copy in the preserialized RPC header information. */ 1040 bcopy(p->cku_rpchdr, mp->b_rptr, WIRE_HDR_SIZE); 1041 1042 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */ 1043 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base + 1044 WIRE_HDR_SIZE)); 1045 1046 ASSERT((mp->b_wptr - mp->b_rptr) == WIRE_HDR_SIZE); 1047 1048 /* Serialize the procedure number and the arguments. */ 1049 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 1050 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 1051 (!(*xdr_args)(xdrs, argsp))) { 1052 p->cku_err.re_status = RPC_CANTENCODEARGS; 1053 p->cku_err.re_errno = EIO; 1054 goto cots_done; 1055 } 1056 1057 (*(uint32_t *)(mp->b_rptr)) = p->cku_xid; 1058 } else { 1059 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[WIRE_HDR_SIZE]; 1060 IXDR_PUT_U_INT32(uproc, procnum); 1061 1062 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 1063 1064 /* Use XDR_SETPOS() to set the b_wptr. */ 1065 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 1066 1067 /* Serialize the procedure number and the arguments. */ 1068 if (!AUTH_WRAP(h->cl_auth, p->cku_rpchdr, WIRE_HDR_SIZE+4, 1069 xdrs, xdr_args, argsp)) { 1070 p->cku_err.re_status = RPC_CANTENCODEARGS; 1071 p->cku_err.re_errno = EIO; 1072 goto cots_done; 1073 } 1074 } 1075 1076 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n", 1077 tidu_size); 1078 1079 wq = cm_entry->x_wq; 1080 clnt_dispatch_send(wq, mp, call, p->cku_xid, 1081 (p->cku_flags & CKU_ONQUEUE)); 1082 1083 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n", 1084 (uint_t)p->cku_xid); 1085 p->cku_flags = (CKU_ONQUEUE|CKU_SENT); 1086 p->cku_recv_attempts = 1; 1087 1088 #ifdef RPCDEBUG 1089 time_sent = lbolt; 1090 #endif 1091 1092 /* 1093 * Wait for a reply or a timeout. If there is no error or timeout, 1094 * (both indicated by call_status), call->call_reply will contain 1095 * the RPC reply message. 1096 */ 1097 read_again: 1098 mutex_enter(&call->call_lock); 1099 interrupted = 0; 1100 if (call->call_status == RPC_TIMEDOUT) { 1101 /* 1102 * Indicate that the lwp is not to be stopped while waiting 1103 * for this network traffic. This is to avoid deadlock while 1104 * debugging a process via /proc and also to avoid recursive 1105 * mutex_enter()s due to NFS page faults while stopping 1106 * (NFS holds locks when it calls here). 1107 */ 1108 clock_t cv_wait_ret; 1109 clock_t timout; 1110 clock_t oldlbolt; 1111 1112 klwp_t *lwp = ttolwp(curthread); 1113 1114 if (lwp != NULL) 1115 lwp->lwp_nostop++; 1116 1117 oldlbolt = lbolt; 1118 timout = wait.tv_sec * drv_usectohz(1000000) + 1119 drv_usectohz(wait.tv_usec) + oldlbolt; 1120 /* 1121 * Iterate until the call_status is changed to something 1122 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns 1123 * something <=0 zero. The latter means that we timed 1124 * out. 1125 */ 1126 if (h->cl_nosignal) 1127 while ((cv_wait_ret = cv_timedwait(&call->call_cv, 1128 &call->call_lock, timout)) > 0 && 1129 call->call_status == RPC_TIMEDOUT); 1130 else 1131 while ((cv_wait_ret = cv_timedwait_sig( 1132 &call->call_cv, 1133 &call->call_lock, timout)) > 0 && 1134 call->call_status == RPC_TIMEDOUT); 1135 1136 switch (cv_wait_ret) { 1137 case 0: 1138 /* 1139 * If we got out of the above loop with 1140 * cv_timedwait_sig() returning 0, then we were 1141 * interrupted regardless what call_status is. 1142 */ 1143 interrupted = 1; 1144 break; 1145 case -1: 1146 /* cv_timedwait_sig() timed out */ 1147 break; 1148 default: 1149 1150 /* 1151 * We were cv_signaled(). If we didn't 1152 * get a successful call_status and returned 1153 * before time expired, delay up to clnt_cots_min_tout 1154 * seconds so that the caller doesn't immediately 1155 * try to call us again and thus force the 1156 * same condition that got us here (such 1157 * as a RPC_XPRTFAILED due to the server not 1158 * listening on the end-point. 1159 */ 1160 if (call->call_status != RPC_SUCCESS) { 1161 clock_t curlbolt; 1162 clock_t diff; 1163 1164 curlbolt = ddi_get_lbolt(); 1165 ticks = clnt_cots_min_tout * 1166 drv_usectohz(1000000); 1167 diff = curlbolt - oldlbolt; 1168 if (diff < ticks) { 1169 delay_first = TRUE; 1170 if (diff > 0) 1171 ticks -= diff; 1172 } 1173 } 1174 break; 1175 } 1176 1177 if (lwp != NULL) 1178 lwp->lwp_nostop--; 1179 } 1180 /* 1181 * Get the reply message, if any. This will be freed at the end 1182 * whether or not an error occurred. 1183 */ 1184 mp = call->call_reply; 1185 call->call_reply = NULL; 1186 1187 /* 1188 * call_err is the error info when the call is on dispatch queue. 1189 * cku_err is the error info returned to the caller. 1190 * Sync cku_err with call_err for local message processing. 1191 */ 1192 1193 status = call->call_status; 1194 p->cku_err = call->call_err; 1195 mutex_exit(&call->call_lock); 1196 1197 if (status != RPC_SUCCESS) { 1198 switch (status) { 1199 case RPC_TIMEDOUT: 1200 if (interrupted) { 1201 COTSRCSTAT_INCR(p->cku_stats, rcintrs); 1202 p->cku_err.re_status = RPC_INTR; 1203 p->cku_err.re_errno = EINTR; 1204 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x", 1205 p->cku_xid); 1206 RPCLOG(1, "signal interrupted at %ld", lbolt); 1207 RPCLOG(1, ", was sent at %ld\n", time_sent); 1208 } else { 1209 COTSRCSTAT_INCR(p->cku_stats, rctimeouts); 1210 p->cku_err.re_errno = ETIMEDOUT; 1211 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld", 1212 lbolt); 1213 RPCLOG(1, ", was sent at %ld\n", time_sent); 1214 } 1215 break; 1216 1217 case RPC_XPRTFAILED: 1218 if (p->cku_err.re_errno == 0) 1219 p->cku_err.re_errno = EIO; 1220 1221 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n", 1222 p->cku_err.re_errno); 1223 break; 1224 1225 case RPC_SYSTEMERROR: 1226 ASSERT(p->cku_err.re_errno); 1227 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n", 1228 p->cku_err.re_errno); 1229 break; 1230 1231 default: 1232 p->cku_err.re_status = RPC_SYSTEMERROR; 1233 p->cku_err.re_errno = EIO; 1234 RPCLOG(1, "clnt_cots_kcallit: error: %s\n", 1235 clnt_sperrno(status)); 1236 break; 1237 } 1238 if (p->cku_err.re_status != RPC_TIMEDOUT) { 1239 1240 if (p->cku_flags & CKU_ONQUEUE) { 1241 call_table_remove(call); 1242 p->cku_flags &= ~CKU_ONQUEUE; 1243 } 1244 1245 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x " 1246 "taken off dispatch list\n", p->cku_xid); 1247 if (call->call_reply) { 1248 freemsg(call->call_reply); 1249 call->call_reply = NULL; 1250 } 1251 } else if (wait.tv_sec != 0) { 1252 /* 1253 * We've sent the request over TCP and so we have 1254 * every reason to believe it will get 1255 * delivered. In which case returning a timeout is not 1256 * appropriate. 1257 */ 1258 if (p->cku_progress == TRUE && 1259 p->cku_recv_attempts < clnt_cots_maxrecv) { 1260 p->cku_err.re_status = RPC_INPROGRESS; 1261 } 1262 } 1263 goto cots_done; 1264 } 1265 1266 xdrs = &p->cku_inxdr; 1267 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 1268 1269 reply_msg.rm_direction = REPLY; 1270 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1271 reply_msg.acpted_rply.ar_stat = SUCCESS; 1272 1273 reply_msg.acpted_rply.ar_verf = _null_auth; 1274 /* 1275 * xdr_results will be done in AUTH_UNWRAP. 1276 */ 1277 reply_msg.acpted_rply.ar_results.where = NULL; 1278 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1279 1280 if (xdr_replymsg(xdrs, &reply_msg)) { 1281 enum clnt_stat re_status; 1282 1283 _seterr_reply(&reply_msg, &p->cku_err); 1284 1285 re_status = p->cku_err.re_status; 1286 if (re_status == RPC_SUCCESS) { 1287 /* 1288 * Reply is good, check auth. 1289 */ 1290 if (!AUTH_VALIDATE(h->cl_auth, 1291 &reply_msg.acpted_rply.ar_verf)) { 1292 COTSRCSTAT_INCR(p->cku_stats, rcbadverfs); 1293 RPCLOG0(1, "clnt_cots_kcallit: validation " 1294 "failure\n"); 1295 freemsg(mp); 1296 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1297 mutex_enter(&call->call_lock); 1298 if (call->call_reply == NULL) 1299 call->call_status = RPC_TIMEDOUT; 1300 mutex_exit(&call->call_lock); 1301 goto read_again; 1302 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs, 1303 xdr_results, resultsp)) { 1304 RPCLOG0(1, "clnt_cots_kcallit: validation " 1305 "failure (unwrap)\n"); 1306 p->cku_err.re_status = RPC_CANTDECODERES; 1307 p->cku_err.re_errno = EIO; 1308 } 1309 } else { 1310 /* set errno in case we can't recover */ 1311 if (re_status != RPC_VERSMISMATCH && 1312 re_status != RPC_AUTHERROR && 1313 re_status != RPC_PROGVERSMISMATCH) 1314 p->cku_err.re_errno = EIO; 1315 1316 if (re_status == RPC_AUTHERROR) { 1317 /* 1318 * Maybe our credential need to be refreshed 1319 */ 1320 if (cm_entry) { 1321 /* 1322 * There is the potential that the 1323 * cm_entry has/will be marked dead, 1324 * so drop the connection altogether, 1325 * force REFRESH to establish new 1326 * connection. 1327 */ 1328 connmgr_cancelconn(cm_entry); 1329 cm_entry = NULL; 1330 } 1331 1332 if ((refreshes > 0) && 1333 AUTH_REFRESH(h->cl_auth, &reply_msg, 1334 p->cku_cred)) { 1335 refreshes--; 1336 (void) xdr_rpc_free_verifier(xdrs, 1337 &reply_msg); 1338 freemsg(mp); 1339 mp = NULL; 1340 1341 if (p->cku_flags & CKU_ONQUEUE) { 1342 call_table_remove(call); 1343 p->cku_flags &= ~CKU_ONQUEUE; 1344 } 1345 1346 RPCLOG(64, 1347 "clnt_cots_kcallit: AUTH_ERROR, xid" 1348 " 0x%x removed off dispatch list\n", 1349 p->cku_xid); 1350 if (call->call_reply) { 1351 freemsg(call->call_reply); 1352 call->call_reply = NULL; 1353 } 1354 1355 COTSRCSTAT_INCR(p->cku_stats, 1356 rcbadcalls); 1357 COTSRCSTAT_INCR(p->cku_stats, 1358 rcnewcreds); 1359 goto call_again; 1360 } 1361 1362 /* 1363 * We have used the client handle to 1364 * do an AUTH_REFRESH and the RPC status may 1365 * be set to RPC_SUCCESS; Let's make sure to 1366 * set it to RPC_AUTHERROR. 1367 */ 1368 p->cku_err.re_status = RPC_AUTHERROR; 1369 1370 /* 1371 * Map recoverable and unrecoverable 1372 * authentication errors to appropriate errno 1373 */ 1374 switch (p->cku_err.re_why) { 1375 case AUTH_TOOWEAK: 1376 /* 1377 * This could be a failure where the 1378 * server requires use of a reserved 1379 * port, check and optionally set the 1380 * client handle useresvport trying 1381 * one more time. Next go round we 1382 * fall out with the tooweak error. 1383 */ 1384 if (p->cku_useresvport != 1) { 1385 p->cku_useresvport = 1; 1386 p->cku_xid = 0; 1387 (void) xdr_rpc_free_verifier 1388 (xdrs, &reply_msg); 1389 freemsg(mp); 1390 goto call_again; 1391 } 1392 /* FALLTHRU */ 1393 case AUTH_BADCRED: 1394 case AUTH_BADVERF: 1395 case AUTH_INVALIDRESP: 1396 case AUTH_FAILED: 1397 case RPCSEC_GSS_NOCRED: 1398 case RPCSEC_GSS_FAILED: 1399 p->cku_err.re_errno = EACCES; 1400 break; 1401 case AUTH_REJECTEDCRED: 1402 case AUTH_REJECTEDVERF: 1403 default: p->cku_err.re_errno = EIO; 1404 break; 1405 } 1406 RPCLOG(1, "clnt_cots_kcallit : authentication" 1407 " failed with RPC_AUTHERROR of type %d\n", 1408 (int)p->cku_err.re_why); 1409 } 1410 } 1411 } else { 1412 /* reply didn't decode properly. */ 1413 p->cku_err.re_status = RPC_CANTDECODERES; 1414 p->cku_err.re_errno = EIO; 1415 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n"); 1416 } 1417 1418 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1419 1420 if (p->cku_flags & CKU_ONQUEUE) { 1421 call_table_remove(call); 1422 p->cku_flags &= ~CKU_ONQUEUE; 1423 } 1424 1425 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list", 1426 p->cku_xid); 1427 RPCLOG(64, " status is %s\n", clnt_sperrno(p->cku_err.re_status)); 1428 cots_done: 1429 if (cm_entry) 1430 connmgr_release(cm_entry); 1431 1432 if (mp != NULL) 1433 freemsg(mp); 1434 if ((p->cku_flags & CKU_ONQUEUE) == 0 && call->call_reply) { 1435 freemsg(call->call_reply); 1436 call->call_reply = NULL; 1437 } 1438 if (p->cku_err.re_status != RPC_SUCCESS) { 1439 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n"); 1440 COTSRCSTAT_INCR(p->cku_stats, rcbadcalls); 1441 } 1442 1443 /* 1444 * No point in delaying if the zone is going away. 1445 */ 1446 if (delay_first == TRUE && 1447 !(zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)) { 1448 if (clnt_delay(ticks, h->cl_nosignal) == EINTR) { 1449 p->cku_err.re_errno = EINTR; 1450 p->cku_err.re_status = RPC_INTR; 1451 } 1452 } 1453 return (p->cku_err.re_status); 1454 } 1455 1456 /* 1457 * Kinit routine for cots. This sets up the correct operations in 1458 * the client handle, as the handle may have previously been a clts 1459 * handle, and clears the xid field so there is no way a new call 1460 * could be mistaken for a retry. It also sets in the handle the 1461 * information that is passed at create/kinit time but needed at 1462 * call time, as cots creates the transport at call time - device, 1463 * address of the server, protocol family. 1464 */ 1465 void 1466 clnt_cots_kinit(CLIENT *h, dev_t dev, int family, struct netbuf *addr, 1467 int max_msgsize, cred_t *cred) 1468 { 1469 /* LINTED pointer alignment */ 1470 cku_private_t *p = htop(h); 1471 calllist_t *call = &p->cku_call; 1472 1473 h->cl_ops = &tcp_ops; 1474 if (p->cku_flags & CKU_ONQUEUE) { 1475 call_table_remove(call); 1476 p->cku_flags &= ~CKU_ONQUEUE; 1477 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from" 1478 " dispatch list\n", p->cku_xid); 1479 } 1480 1481 if (call->call_reply != NULL) { 1482 freemsg(call->call_reply); 1483 call->call_reply = NULL; 1484 } 1485 1486 call->call_bucket = NULL; 1487 call->call_hash = 0; 1488 1489 /* 1490 * We don't clear cku_flags here, because clnt_cots_kcallit() 1491 * takes care of handling the cku_flags reset. 1492 */ 1493 p->cku_xid = 0; 1494 p->cku_device = dev; 1495 p->cku_addrfmly = family; 1496 p->cku_cred = cred; 1497 1498 if (p->cku_addr.maxlen < addr->len) { 1499 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 1500 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 1501 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 1502 p->cku_addr.maxlen = addr->maxlen; 1503 } 1504 1505 p->cku_addr.len = addr->len; 1506 bcopy(addr->buf, p->cku_addr.buf, addr->len); 1507 1508 /* 1509 * If the current sanity check size in rpcmod is smaller 1510 * than the size needed, then increase the sanity check. 1511 */ 1512 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 1513 max_msgsize > *clnt_max_msg_sizep) { 1514 mutex_enter(&clnt_max_msg_lock); 1515 if (max_msgsize > *clnt_max_msg_sizep) 1516 *clnt_max_msg_sizep = max_msgsize; 1517 mutex_exit(&clnt_max_msg_lock); 1518 } 1519 } 1520 1521 /* 1522 * ksettimers is a no-op for cots, with the exception of setting the xid. 1523 */ 1524 /* ARGSUSED */ 1525 static int 1526 clnt_cots_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1527 int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg, 1528 uint32_t xid) 1529 { 1530 /* LINTED pointer alignment */ 1531 cku_private_t *p = htop(h); 1532 1533 if (xid) 1534 p->cku_xid = xid; 1535 COTSRCSTAT_INCR(p->cku_stats, rctimers); 1536 return (0); 1537 } 1538 1539 extern void rpc_poptimod(struct vnode *); 1540 extern int kstr_push(struct vnode *, char *); 1541 1542 int 1543 conn_kstat_update(kstat_t *ksp, int rw) 1544 { 1545 struct cm_xprt *cm_entry; 1546 struct cm_kstat_xprt *cm_ksp_data; 1547 uchar_t *b; 1548 char *fbuf; 1549 1550 if (rw == KSTAT_WRITE) 1551 return (EACCES); 1552 if (ksp == NULL || ksp->ks_private == NULL) 1553 return (EIO); 1554 cm_entry = (struct cm_xprt *)ksp->ks_private; 1555 cm_ksp_data = (struct cm_kstat_xprt *)ksp->ks_data; 1556 1557 cm_ksp_data->x_wq.value.ui32 = (uint32_t)(uintptr_t)cm_entry->x_wq; 1558 cm_ksp_data->x_family.value.ui32 = cm_entry->x_family; 1559 cm_ksp_data->x_rdev.value.ui32 = (uint32_t)cm_entry->x_rdev; 1560 cm_ksp_data->x_time.value.ui32 = cm_entry->x_time; 1561 cm_ksp_data->x_ref.value.ui32 = cm_entry->x_ref; 1562 cm_ksp_data->x_state.value.ui32 = cm_entry->x_state_flags; 1563 1564 if (cm_entry->x_server.buf) { 1565 fbuf = cm_ksp_data->x_server.value.str.addr.ptr; 1566 if (cm_entry->x_family == AF_INET && 1567 cm_entry->x_server.len == 1568 sizeof (struct sockaddr_in)) { 1569 struct sockaddr_in *sa; 1570 sa = (struct sockaddr_in *) 1571 cm_entry->x_server.buf; 1572 b = (uchar_t *)&sa->sin_addr; 1573 (void) sprintf(fbuf, 1574 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1575 b[2] & 0xFF, b[3] & 0xFF); 1576 cm_ksp_data->x_port.value.ui32 = 1577 (uint32_t)sa->sin_port; 1578 } else if (cm_entry->x_family == AF_INET6 && 1579 cm_entry->x_server.len >= 1580 sizeof (struct sockaddr_in6)) { 1581 /* extract server IP address & port */ 1582 struct sockaddr_in6 *sin6; 1583 sin6 = (struct sockaddr_in6 *)cm_entry->x_server.buf; 1584 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, fbuf, 1585 INET6_ADDRSTRLEN); 1586 cm_ksp_data->x_port.value.ui32 = sin6->sin6_port; 1587 } else { 1588 struct sockaddr_in *sa; 1589 1590 sa = (struct sockaddr_in *)cm_entry->x_server.buf; 1591 b = (uchar_t *)&sa->sin_addr; 1592 (void) sprintf(fbuf, 1593 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1594 b[2] & 0xFF, b[3] & 0xFF); 1595 } 1596 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data->x_server) = 1597 strlen(fbuf) + 1; 1598 } 1599 1600 return (0); 1601 } 1602 1603 1604 /* 1605 * We want a version of delay which is interruptible by a UNIX signal 1606 * Return EINTR if an interrupt occured. 1607 */ 1608 static int 1609 clnt_delay(clock_t ticks, bool_t nosignal) 1610 { 1611 if (nosignal == TRUE) { 1612 delay(ticks); 1613 return (0); 1614 } 1615 return (delay_sig(ticks)); 1616 } 1617 1618 /* 1619 * Wait for a connection until a timeout, or until we are 1620 * signalled that there has been a connection state change. 1621 */ 1622 static enum clnt_stat 1623 connmgr_cwait(struct cm_xprt *cm_entry, const struct timeval *waitp, 1624 bool_t nosignal) 1625 { 1626 bool_t interrupted; 1627 clock_t timout, cv_stat; 1628 enum clnt_stat clstat; 1629 unsigned int old_state; 1630 1631 ASSERT(MUTEX_HELD(&connmgr_lock)); 1632 /* 1633 * We wait for the transport connection to be made, or an 1634 * indication that it could not be made. 1635 */ 1636 clstat = RPC_TIMEDOUT; 1637 interrupted = FALSE; 1638 1639 old_state = cm_entry->x_state_flags; 1640 /* 1641 * Now loop until cv_timedwait{_sig} returns because of 1642 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be 1643 * cv_signalled for various other reasons too. So loop 1644 * until there is a state change on the connection. 1645 */ 1646 1647 timout = waitp->tv_sec * drv_usectohz(1000000) + 1648 drv_usectohz(waitp->tv_usec) + lbolt; 1649 1650 if (nosignal) { 1651 while ((cv_stat = cv_timedwait(&cm_entry->x_conn_cv, 1652 &connmgr_lock, timout)) > 0 && 1653 cm_entry->x_state_flags == old_state) 1654 ; 1655 } else { 1656 while ((cv_stat = cv_timedwait_sig(&cm_entry->x_conn_cv, 1657 &connmgr_lock, timout)) > 0 && 1658 cm_entry->x_state_flags == old_state) 1659 ; 1660 1661 if (cv_stat == 0) /* got intr signal? */ 1662 interrupted = TRUE; 1663 } 1664 1665 if ((cm_entry->x_state_flags & (X_BADSTATES|X_CONNECTED)) == 1666 X_CONNECTED) { 1667 clstat = RPC_SUCCESS; 1668 } else { 1669 if (interrupted == TRUE) 1670 clstat = RPC_INTR; 1671 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n", 1672 clnt_sperrno(clstat)); 1673 } 1674 1675 return (clstat); 1676 } 1677 1678 /* 1679 * Primary interface for how RPC grabs a connection. 1680 */ 1681 static struct cm_xprt * 1682 connmgr_wrapget( 1683 struct netbuf *retryaddr, 1684 const struct timeval *waitp, 1685 cku_private_t *p) 1686 { 1687 struct cm_xprt *cm_entry; 1688 1689 cm_entry = connmgr_get(retryaddr, waitp, &p->cku_addr, p->cku_addrfmly, 1690 &p->cku_srcaddr, &p->cku_err, p->cku_device, 1691 p->cku_client.cl_nosignal, p->cku_useresvport); 1692 1693 if (cm_entry == NULL) { 1694 /* 1695 * Re-map the call status to RPC_INTR if the err code is 1696 * EINTR. This can happen if calls status is RPC_TLIERROR. 1697 * However, don't re-map if signalling has been turned off. 1698 * XXX Really need to create a separate thread whenever 1699 * there isn't an existing connection. 1700 */ 1701 if (p->cku_err.re_errno == EINTR) { 1702 if (p->cku_client.cl_nosignal == TRUE) 1703 p->cku_err.re_errno = EIO; 1704 else 1705 p->cku_err.re_status = RPC_INTR; 1706 } 1707 } 1708 1709 return (cm_entry); 1710 } 1711 1712 /* 1713 * Obtains a transport to the server specified in addr. If a suitable transport 1714 * does not already exist in the list of cached transports, a new connection 1715 * is created, connected, and added to the list. The connection is for sending 1716 * only - the reply message may come back on another transport connection. 1717 */ 1718 static struct cm_xprt * 1719 connmgr_get( 1720 struct netbuf *retryaddr, 1721 const struct timeval *waitp, /* changed to a ptr to converse stack */ 1722 struct netbuf *destaddr, 1723 int addrfmly, 1724 struct netbuf *srcaddr, 1725 struct rpc_err *rpcerr, 1726 dev_t device, 1727 bool_t nosignal, 1728 int useresvport) 1729 { 1730 struct cm_xprt *cm_entry; 1731 struct cm_xprt *lru_entry; 1732 struct cm_xprt **cmp; 1733 queue_t *wq; 1734 TIUSER *tiptr; 1735 int i; 1736 int retval; 1737 clock_t prev_time; 1738 int tidu_size; 1739 bool_t connected; 1740 zoneid_t zoneid = getzoneid(); 1741 1742 /* 1743 * If the call is not a retry, look for a transport entry that 1744 * goes to the server of interest. 1745 */ 1746 mutex_enter(&connmgr_lock); 1747 1748 if (retryaddr == NULL) { 1749 use_new_conn: 1750 i = 0; 1751 cm_entry = lru_entry = NULL; 1752 prev_time = lbolt; 1753 1754 cmp = &cm_hd; 1755 while ((cm_entry = *cmp) != NULL) { 1756 ASSERT(cm_entry != cm_entry->x_next); 1757 /* 1758 * Garbage collect conections that are marked 1759 * for needs disconnect. 1760 */ 1761 if (cm_entry->x_needdis) { 1762 CONN_HOLD(cm_entry); 1763 connmgr_dis_and_wait(cm_entry); 1764 connmgr_release(cm_entry); 1765 /* 1766 * connmgr_lock could have been 1767 * dropped for the disconnect 1768 * processing so start over. 1769 */ 1770 goto use_new_conn; 1771 } 1772 1773 /* 1774 * Garbage collect the dead connections that have 1775 * no threads working on them. 1776 */ 1777 if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) == 1778 X_DEAD) { 1779 *cmp = cm_entry->x_next; 1780 mutex_exit(&connmgr_lock); 1781 connmgr_close(cm_entry); 1782 mutex_enter(&connmgr_lock); 1783 goto use_new_conn; 1784 } 1785 1786 1787 if ((cm_entry->x_state_flags & X_BADSTATES) == 0 && 1788 cm_entry->x_zoneid == zoneid && 1789 cm_entry->x_rdev == device && 1790 destaddr->len == cm_entry->x_server.len && 1791 bcmp(destaddr->buf, cm_entry->x_server.buf, 1792 destaddr->len) == 0) { 1793 /* 1794 * If the matching entry isn't connected, 1795 * attempt to reconnect it. 1796 */ 1797 if (cm_entry->x_connected == FALSE) { 1798 /* 1799 * We don't go through trying 1800 * to find the least recently 1801 * used connected because 1802 * connmgr_reconnect() briefly 1803 * dropped the connmgr_lock, 1804 * allowing a window for our 1805 * accounting to be messed up. 1806 * In any case, a re-connected 1807 * connection is as good as 1808 * a LRU connection. 1809 */ 1810 return (connmgr_wrapconnect(cm_entry, 1811 waitp, destaddr, addrfmly, srcaddr, 1812 rpcerr, TRUE, nosignal)); 1813 } 1814 i++; 1815 if (cm_entry->x_time - prev_time <= 0 || 1816 lru_entry == NULL) { 1817 prev_time = cm_entry->x_time; 1818 lru_entry = cm_entry; 1819 } 1820 } 1821 cmp = &cm_entry->x_next; 1822 } 1823 1824 if (i > clnt_max_conns) { 1825 RPCLOG(8, "connmgr_get: too many conns, dooming entry" 1826 " %p\n", (void *)lru_entry->x_tiptr); 1827 lru_entry->x_doomed = TRUE; 1828 goto use_new_conn; 1829 } 1830 1831 /* 1832 * If we are at the maximum number of connections to 1833 * the server, hand back the least recently used one. 1834 */ 1835 if (i == clnt_max_conns) { 1836 /* 1837 * Copy into the handle the source address of 1838 * the connection, which we will use in case of 1839 * a later retry. 1840 */ 1841 if (srcaddr->len != lru_entry->x_src.len) { 1842 if (srcaddr->len > 0) 1843 kmem_free(srcaddr->buf, 1844 srcaddr->maxlen); 1845 srcaddr->buf = kmem_zalloc( 1846 lru_entry->x_src.len, KM_SLEEP); 1847 srcaddr->maxlen = srcaddr->len = 1848 lru_entry->x_src.len; 1849 } 1850 bcopy(lru_entry->x_src.buf, srcaddr->buf, srcaddr->len); 1851 RPCLOG(2, "connmgr_get: call going out on %p\n", 1852 (void *)lru_entry); 1853 lru_entry->x_time = lbolt; 1854 CONN_HOLD(lru_entry); 1855 mutex_exit(&connmgr_lock); 1856 return (lru_entry); 1857 } 1858 1859 } else { 1860 /* 1861 * This is the retry case (retryaddr != NULL). Retries must 1862 * be sent on the same source port as the original call. 1863 */ 1864 1865 /* 1866 * Walk the list looking for a connection with a source address 1867 * that matches the retry address. 1868 */ 1869 cmp = &cm_hd; 1870 while ((cm_entry = *cmp) != NULL) { 1871 ASSERT(cm_entry != cm_entry->x_next); 1872 if (zoneid != cm_entry->x_zoneid || 1873 device != cm_entry->x_rdev || 1874 retryaddr->len != cm_entry->x_src.len || 1875 bcmp(retryaddr->buf, cm_entry->x_src.buf, 1876 retryaddr->len) != 0) { 1877 cmp = &cm_entry->x_next; 1878 continue; 1879 } 1880 1881 /* 1882 * Sanity check: if the connection with our source 1883 * port is going to some other server, something went 1884 * wrong, as we never delete connections (i.e. release 1885 * ports) unless they have been idle. In this case, 1886 * it is probably better to send the call out using 1887 * a new source address than to fail it altogether, 1888 * since that port may never be released. 1889 */ 1890 if (destaddr->len != cm_entry->x_server.len || 1891 bcmp(destaddr->buf, cm_entry->x_server.buf, 1892 destaddr->len) != 0) { 1893 RPCLOG(1, "connmgr_get: tiptr %p" 1894 " is going to a different server" 1895 " with the port that belongs" 1896 " to us!\n", (void *)cm_entry->x_tiptr); 1897 retryaddr = NULL; 1898 goto use_new_conn; 1899 } 1900 1901 /* 1902 * If the connection of interest is not connected and we 1903 * can't reconnect it, then the server is probably 1904 * still down. Return NULL to the caller and let it 1905 * retry later if it wants to. We have a delay so the 1906 * machine doesn't go into a tight retry loop. If the 1907 * entry was already connected, or the reconnected was 1908 * successful, return this entry. 1909 */ 1910 if (cm_entry->x_connected == FALSE) { 1911 return (connmgr_wrapconnect(cm_entry, 1912 waitp, destaddr, addrfmly, NULL, 1913 rpcerr, TRUE, nosignal)); 1914 } else { 1915 CONN_HOLD(cm_entry); 1916 1917 cm_entry->x_time = lbolt; 1918 mutex_exit(&connmgr_lock); 1919 RPCLOG(2, "connmgr_get: found old " 1920 "transport %p for retry\n", 1921 (void *)cm_entry); 1922 return (cm_entry); 1923 } 1924 } 1925 1926 /* 1927 * We cannot find an entry in the list for this retry. 1928 * Either the entry has been removed temporarily to be 1929 * reconnected by another thread, or the original call 1930 * got a port but never got connected, 1931 * and hence the transport never got put in the 1932 * list. Fall through to the "create new connection" code - 1933 * the former case will fail there trying to rebind the port, 1934 * and the later case (and any other pathological cases) will 1935 * rebind and reconnect and not hang the client machine. 1936 */ 1937 RPCLOG0(8, "connmgr_get: no entry in list for retry\n"); 1938 } 1939 /* 1940 * Set up a transport entry in the connection manager's list. 1941 */ 1942 cm_entry = (struct cm_xprt *) 1943 kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP); 1944 1945 cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP); 1946 bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len); 1947 cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len; 1948 1949 cm_entry->x_state_flags = X_THREAD; 1950 cm_entry->x_ref = 1; 1951 cm_entry->x_family = addrfmly; 1952 cm_entry->x_rdev = device; 1953 cm_entry->x_zoneid = zoneid; 1954 mutex_init(&cm_entry->x_lock, NULL, MUTEX_DEFAULT, NULL); 1955 cv_init(&cm_entry->x_cv, NULL, CV_DEFAULT, NULL); 1956 cv_init(&cm_entry->x_conn_cv, NULL, CV_DEFAULT, NULL); 1957 cv_init(&cm_entry->x_dis_cv, NULL, CV_DEFAULT, NULL); 1958 1959 /* 1960 * Note that we add this partially initialized entry to the 1961 * connection list. This is so that we don't have connections to 1962 * the same server. 1963 * 1964 * Note that x_src is not initialized at this point. This is because 1965 * retryaddr might be NULL in which case x_src is whatever 1966 * t_kbind/bindresvport gives us. If another thread wants a 1967 * connection to the same server, seemingly we have an issue, but we 1968 * don't. If the other thread comes in with retryaddr == NULL, then it 1969 * will never look at x_src, and it will end up waiting in 1970 * connmgr_cwait() for the first thread to finish the connection 1971 * attempt. If the other thread comes in with retryaddr != NULL, then 1972 * that means there was a request sent on a connection, in which case 1973 * the the connection should already exist. Thus the first thread 1974 * never gets here ... it finds the connection it its server in the 1975 * connection list. 1976 * 1977 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd 1978 * thread will skip us because x_src.len == 0. 1979 */ 1980 cm_entry->x_next = cm_hd; 1981 cm_hd = cm_entry; 1982 mutex_exit(&connmgr_lock); 1983 1984 /* 1985 * Either we didn't find an entry to the server of interest, or we 1986 * don't have the maximum number of connections to that server - 1987 * create a new connection. 1988 */ 1989 RPCLOG0(8, "connmgr_get: creating new connection\n"); 1990 rpcerr->re_status = RPC_TLIERROR; 1991 1992 i = t_kopen(NULL, device, FREAD|FWRITE|FNDELAY, &tiptr, kcred); 1993 if (i) { 1994 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i); 1995 rpcerr->re_errno = i; 1996 connmgr_cancelconn(cm_entry); 1997 return (NULL); 1998 } 1999 rpc_poptimod(tiptr->fp->f_vnode); 2000 2001 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0, 2002 K_TO_K, kcred, &retval)) { 2003 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i); 2004 (void) t_kclose(tiptr, 1); 2005 rpcerr->re_errno = i; 2006 connmgr_cancelconn(cm_entry); 2007 return (NULL); 2008 } 2009 2010 if (i = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K, 2011 kcred, &retval)) { 2012 RPCLOG(1, "connmgr_get: can't set client status with cots " 2013 "module, %d\n", i); 2014 (void) t_kclose(tiptr, 1); 2015 rpcerr->re_errno = i; 2016 connmgr_cancelconn(cm_entry); 2017 return (NULL); 2018 } 2019 2020 mutex_enter(&connmgr_lock); 2021 2022 wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next; 2023 cm_entry->x_wq = wq; 2024 2025 mutex_exit(&connmgr_lock); 2026 2027 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0, 2028 K_TO_K, kcred, &retval)) { 2029 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i); 2030 (void) t_kclose(tiptr, 1); 2031 rpcerr->re_errno = i; 2032 connmgr_cancelconn(cm_entry); 2033 return (NULL); 2034 } 2035 2036 /* 2037 * If the caller has not specified reserved port usage then 2038 * take the system default. 2039 */ 2040 if (useresvport == -1) 2041 useresvport = clnt_cots_do_bindresvport; 2042 2043 if ((useresvport || retryaddr != NULL) && 2044 (addrfmly == AF_INET || addrfmly == AF_INET6)) { 2045 bool_t alloc_src = FALSE; 2046 2047 if (srcaddr->len != destaddr->len) { 2048 kmem_free(srcaddr->buf, srcaddr->maxlen); 2049 srcaddr->buf = kmem_zalloc(destaddr->len, KM_SLEEP); 2050 srcaddr->maxlen = destaddr->len; 2051 srcaddr->len = destaddr->len; 2052 alloc_src = TRUE; 2053 } 2054 2055 if ((i = bindresvport(tiptr, retryaddr, srcaddr, TRUE)) != 0) { 2056 (void) t_kclose(tiptr, 1); 2057 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: " 2058 "%p\n", (void *)retryaddr); 2059 2060 /* 2061 * 1225408: If we allocated a source address, then it 2062 * is either garbage or all zeroes. In that case 2063 * we need to clear srcaddr. 2064 */ 2065 if (alloc_src == TRUE) { 2066 kmem_free(srcaddr->buf, srcaddr->maxlen); 2067 srcaddr->maxlen = srcaddr->len = 0; 2068 srcaddr->buf = NULL; 2069 } 2070 rpcerr->re_errno = i; 2071 connmgr_cancelconn(cm_entry); 2072 return (NULL); 2073 } 2074 } else { 2075 if ((i = t_kbind(tiptr, NULL, NULL)) != 0) { 2076 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i); 2077 (void) t_kclose(tiptr, 1); 2078 rpcerr->re_errno = i; 2079 connmgr_cancelconn(cm_entry); 2080 return (NULL); 2081 } 2082 } 2083 2084 { 2085 /* 2086 * Keep the kernel stack lean. Don't move this call 2087 * declaration to the top of this function because a 2088 * call is declared in connmgr_wrapconnect() 2089 */ 2090 calllist_t call; 2091 2092 bzero(&call, sizeof (call)); 2093 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2094 2095 /* 2096 * This is a bound end-point so don't close it's stream. 2097 */ 2098 connected = connmgr_connect(cm_entry, wq, destaddr, addrfmly, 2099 &call, &tidu_size, FALSE, waitp, 2100 nosignal); 2101 *rpcerr = call.call_err; 2102 cv_destroy(&call.call_cv); 2103 2104 } 2105 2106 mutex_enter(&connmgr_lock); 2107 2108 /* 2109 * Set up a transport entry in the connection manager's list. 2110 */ 2111 cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP); 2112 bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len); 2113 cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len; 2114 2115 cm_entry->x_tiptr = tiptr; 2116 cm_entry->x_time = lbolt; 2117 2118 if (tiptr->tp_info.servtype == T_COTS_ORD) 2119 cm_entry->x_ordrel = TRUE; 2120 else 2121 cm_entry->x_ordrel = FALSE; 2122 2123 cm_entry->x_tidu_size = tidu_size; 2124 2125 if (cm_entry->x_early_disc) 2126 cm_entry->x_connected = FALSE; 2127 else 2128 cm_entry->x_connected = connected; 2129 2130 /* 2131 * There could be a discrepancy here such that 2132 * x_early_disc is TRUE yet connected is TRUE as well 2133 * and the connection is actually connected. In that case 2134 * lets be conservative and declare the connection as not 2135 * connected. 2136 */ 2137 cm_entry->x_early_disc = FALSE; 2138 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2139 cm_entry->x_ctime = lbolt; 2140 2141 /* 2142 * Notify any threads waiting that the connection attempt is done. 2143 */ 2144 cm_entry->x_thread = FALSE; 2145 cv_broadcast(&cm_entry->x_conn_cv); 2146 2147 mutex_exit(&connmgr_lock); 2148 2149 if (cm_entry->x_connected == FALSE) { 2150 connmgr_release(cm_entry); 2151 return (NULL); 2152 } 2153 return (cm_entry); 2154 } 2155 2156 /* 2157 * Keep the cm_xprt entry on the connecton list when making a connection. This 2158 * is to prevent multiple connections to a slow server from appearing. 2159 * We use the bit field x_thread to tell if a thread is doing a connection 2160 * which keeps other interested threads from messing with connection. 2161 * Those other threads just wait if x_thread is set. 2162 * 2163 * If x_thread is not set, then we do the actual work of connecting via 2164 * connmgr_connect(). 2165 * 2166 * mutex convention: called with connmgr_lock held, returns with it released. 2167 */ 2168 static struct cm_xprt * 2169 connmgr_wrapconnect( 2170 struct cm_xprt *cm_entry, 2171 const struct timeval *waitp, 2172 struct netbuf *destaddr, 2173 int addrfmly, 2174 struct netbuf *srcaddr, 2175 struct rpc_err *rpcerr, 2176 bool_t reconnect, 2177 bool_t nosignal) 2178 { 2179 ASSERT(MUTEX_HELD(&connmgr_lock)); 2180 /* 2181 * Hold this entry as we are about to drop connmgr_lock. 2182 */ 2183 CONN_HOLD(cm_entry); 2184 2185 /* 2186 * If there is a thread already making a connection for us, then 2187 * wait for it to complete the connection. 2188 */ 2189 if (cm_entry->x_thread == TRUE) { 2190 rpcerr->re_status = connmgr_cwait(cm_entry, waitp, nosignal); 2191 2192 if (rpcerr->re_status != RPC_SUCCESS) { 2193 mutex_exit(&connmgr_lock); 2194 connmgr_release(cm_entry); 2195 return (NULL); 2196 } 2197 } else { 2198 bool_t connected; 2199 calllist_t call; 2200 2201 cm_entry->x_thread = TRUE; 2202 2203 while (cm_entry->x_needrel == TRUE) { 2204 cm_entry->x_needrel = FALSE; 2205 2206 connmgr_sndrel(cm_entry); 2207 delay(drv_usectohz(1000000)); 2208 2209 mutex_enter(&connmgr_lock); 2210 } 2211 2212 /* 2213 * If we need to send a T_DISCON_REQ, send one. 2214 */ 2215 connmgr_dis_and_wait(cm_entry); 2216 2217 mutex_exit(&connmgr_lock); 2218 2219 bzero(&call, sizeof (call)); 2220 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2221 2222 connected = connmgr_connect(cm_entry, cm_entry->x_wq, 2223 destaddr, addrfmly, &call, 2224 &cm_entry->x_tidu_size, 2225 reconnect, waitp, nosignal); 2226 2227 *rpcerr = call.call_err; 2228 cv_destroy(&call.call_cv); 2229 2230 mutex_enter(&connmgr_lock); 2231 2232 2233 if (cm_entry->x_early_disc) 2234 cm_entry->x_connected = FALSE; 2235 else 2236 cm_entry->x_connected = connected; 2237 2238 /* 2239 * There could be a discrepancy here such that 2240 * x_early_disc is TRUE yet connected is TRUE as well 2241 * and the connection is actually connected. In that case 2242 * lets be conservative and declare the connection as not 2243 * connected. 2244 */ 2245 2246 cm_entry->x_early_disc = FALSE; 2247 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2248 2249 2250 /* 2251 * connmgr_connect() may have given up before the connection 2252 * actually timed out. So ensure that before the next 2253 * connection attempt we do a disconnect. 2254 */ 2255 cm_entry->x_ctime = lbolt; 2256 cm_entry->x_thread = FALSE; 2257 2258 cv_broadcast(&cm_entry->x_conn_cv); 2259 2260 if (cm_entry->x_connected == FALSE) { 2261 mutex_exit(&connmgr_lock); 2262 connmgr_release(cm_entry); 2263 return (NULL); 2264 } 2265 } 2266 2267 if (srcaddr != NULL) { 2268 /* 2269 * Copy into the handle the 2270 * source address of the 2271 * connection, which we will use 2272 * in case of a later retry. 2273 */ 2274 if (srcaddr->len != cm_entry->x_src.len) { 2275 if (srcaddr->maxlen > 0) 2276 kmem_free(srcaddr->buf, srcaddr->maxlen); 2277 srcaddr->buf = kmem_zalloc(cm_entry->x_src.len, 2278 KM_SLEEP); 2279 srcaddr->maxlen = srcaddr->len = 2280 cm_entry->x_src.len; 2281 } 2282 bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len); 2283 } 2284 cm_entry->x_time = lbolt; 2285 mutex_exit(&connmgr_lock); 2286 return (cm_entry); 2287 } 2288 2289 /* 2290 * If we need to send a T_DISCON_REQ, send one. 2291 */ 2292 static void 2293 connmgr_dis_and_wait(struct cm_xprt *cm_entry) 2294 { 2295 ASSERT(MUTEX_HELD(&connmgr_lock)); 2296 for (;;) { 2297 while (cm_entry->x_needdis == TRUE) { 2298 RPCLOG(8, "connmgr_dis_and_wait: need " 2299 "T_DISCON_REQ for connection 0x%p\n", 2300 (void *)cm_entry); 2301 cm_entry->x_needdis = FALSE; 2302 cm_entry->x_waitdis = TRUE; 2303 2304 connmgr_snddis(cm_entry); 2305 2306 mutex_enter(&connmgr_lock); 2307 } 2308 2309 if (cm_entry->x_waitdis == TRUE) { 2310 clock_t curlbolt; 2311 clock_t timout; 2312 2313 RPCLOG(8, "connmgr_dis_and_wait waiting for " 2314 "T_DISCON_REQ's ACK for connection %p\n", 2315 (void *)cm_entry); 2316 curlbolt = ddi_get_lbolt(); 2317 2318 timout = clnt_cots_min_conntout * 2319 drv_usectohz(1000000) + curlbolt; 2320 2321 /* 2322 * The TPI spec says that the T_DISCON_REQ 2323 * will get acknowledged, but in practice 2324 * the ACK may never get sent. So don't 2325 * block forever. 2326 */ 2327 (void) cv_timedwait(&cm_entry->x_dis_cv, 2328 &connmgr_lock, timout); 2329 } 2330 /* 2331 * If we got the ACK, break. If we didn't, 2332 * then send another T_DISCON_REQ. 2333 */ 2334 if (cm_entry->x_waitdis == FALSE) { 2335 break; 2336 } else { 2337 RPCLOG(8, "connmgr_dis_and_wait: did" 2338 "not get T_DISCON_REQ's ACK for " 2339 "connection %p\n", (void *)cm_entry); 2340 cm_entry->x_needdis = TRUE; 2341 } 2342 } 2343 } 2344 2345 static void 2346 connmgr_cancelconn(struct cm_xprt *cm_entry) 2347 { 2348 /* 2349 * Mark the connection table entry as dead; the next thread that 2350 * goes through connmgr_release() will notice this and deal with it. 2351 */ 2352 mutex_enter(&connmgr_lock); 2353 cm_entry->x_dead = TRUE; 2354 2355 /* 2356 * Notify any threads waiting for the connection that it isn't 2357 * going to happen. 2358 */ 2359 cm_entry->x_thread = FALSE; 2360 cv_broadcast(&cm_entry->x_conn_cv); 2361 mutex_exit(&connmgr_lock); 2362 2363 connmgr_release(cm_entry); 2364 } 2365 2366 static void 2367 connmgr_close(struct cm_xprt *cm_entry) 2368 { 2369 mutex_enter(&cm_entry->x_lock); 2370 while (cm_entry->x_ref != 0) { 2371 /* 2372 * Must be a noninterruptible wait. 2373 */ 2374 cv_wait(&cm_entry->x_cv, &cm_entry->x_lock); 2375 } 2376 2377 if (cm_entry->x_tiptr != NULL) 2378 (void) t_kclose(cm_entry->x_tiptr, 1); 2379 2380 mutex_exit(&cm_entry->x_lock); 2381 if (cm_entry->x_ksp != NULL) { 2382 mutex_enter(&connmgr_lock); 2383 cm_entry->x_ksp->ks_private = NULL; 2384 mutex_exit(&connmgr_lock); 2385 2386 /* 2387 * Must free the buffer we allocated for the 2388 * server address in the update function 2389 */ 2390 if (((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2391 x_server.value.str.addr.ptr != NULL) 2392 kmem_free(((struct cm_kstat_xprt *)(cm_entry->x_ksp-> 2393 ks_data))->x_server.value.str.addr.ptr, 2394 INET6_ADDRSTRLEN); 2395 kmem_free(cm_entry->x_ksp->ks_data, 2396 cm_entry->x_ksp->ks_data_size); 2397 kstat_delete(cm_entry->x_ksp); 2398 } 2399 2400 mutex_destroy(&cm_entry->x_lock); 2401 cv_destroy(&cm_entry->x_cv); 2402 cv_destroy(&cm_entry->x_conn_cv); 2403 cv_destroy(&cm_entry->x_dis_cv); 2404 2405 if (cm_entry->x_server.buf != NULL) 2406 kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen); 2407 if (cm_entry->x_src.buf != NULL) 2408 kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen); 2409 kmem_free(cm_entry, sizeof (struct cm_xprt)); 2410 } 2411 2412 /* 2413 * Called by KRPC after sending the call message to release the connection 2414 * it was using. 2415 */ 2416 static void 2417 connmgr_release(struct cm_xprt *cm_entry) 2418 { 2419 mutex_enter(&cm_entry->x_lock); 2420 cm_entry->x_ref--; 2421 if (cm_entry->x_ref == 0) 2422 cv_signal(&cm_entry->x_cv); 2423 mutex_exit(&cm_entry->x_lock); 2424 } 2425 2426 /* 2427 * Given an open stream, connect to the remote. Returns true if connected, 2428 * false otherwise. 2429 */ 2430 static bool_t 2431 connmgr_connect( 2432 struct cm_xprt *cm_entry, 2433 queue_t *wq, 2434 struct netbuf *addr, 2435 int addrfmly, 2436 calllist_t *e, 2437 int *tidu_ptr, 2438 bool_t reconnect, 2439 const struct timeval *waitp, 2440 bool_t nosignal) 2441 { 2442 mblk_t *mp; 2443 struct T_conn_req *tcr; 2444 struct T_info_ack *tinfo; 2445 int interrupted, error; 2446 int tidu_size, kstat_instance; 2447 2448 /* if it's a reconnect, flush any lingering data messages */ 2449 if (reconnect) 2450 (void) putctl1(wq, M_FLUSH, FLUSHRW); 2451 2452 mp = allocb(sizeof (*tcr) + addr->len, BPRI_LO); 2453 if (mp == NULL) { 2454 /* 2455 * This is unfortunate, but we need to look up the stats for 2456 * this zone to increment the "memory allocation failed" 2457 * counter. curproc->p_zone is safe since we're initiating a 2458 * connection and not in some strange streams context. 2459 */ 2460 struct rpcstat *rpcstat; 2461 2462 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 2463 ASSERT(rpcstat != NULL); 2464 2465 RPCLOG0(1, "connmgr_connect: cannot alloc mp for " 2466 "sending conn request\n"); 2467 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem); 2468 e->call_status = RPC_SYSTEMERROR; 2469 e->call_reason = ENOSR; 2470 return (FALSE); 2471 } 2472 2473 mp->b_datap->db_type = M_PROTO; 2474 tcr = (struct T_conn_req *)mp->b_rptr; 2475 bzero(tcr, sizeof (*tcr)); 2476 tcr->PRIM_type = T_CONN_REQ; 2477 tcr->DEST_length = addr->len; 2478 tcr->DEST_offset = sizeof (struct T_conn_req); 2479 mp->b_wptr = mp->b_rptr + sizeof (*tcr); 2480 2481 bcopy(addr->buf, mp->b_wptr, tcr->DEST_length); 2482 mp->b_wptr += tcr->DEST_length; 2483 2484 RPCLOG(8, "connmgr_connect: sending conn request on queue " 2485 "%p", (void *)wq); 2486 RPCLOG(8, " call %p\n", (void *)wq); 2487 /* 2488 * We use the entry in the handle that is normally used for 2489 * waiting for RPC replies to wait for the connection accept. 2490 */ 2491 clnt_dispatch_send(wq, mp, e, 0, 0); 2492 2493 mutex_enter(&clnt_pending_lock); 2494 2495 /* 2496 * We wait for the transport connection to be made, or an 2497 * indication that it could not be made. 2498 */ 2499 interrupted = 0; 2500 2501 /* 2502 * waitforack should have been called with T_OK_ACK, but the 2503 * present implementation needs to be passed T_INFO_ACK to 2504 * work correctly. 2505 */ 2506 error = waitforack(e, T_INFO_ACK, waitp, nosignal); 2507 if (error == EINTR) 2508 interrupted = 1; 2509 if (zone_status_get(curproc->p_zone) >= ZONE_IS_EMPTY) { 2510 /* 2511 * No time to lose; we essentially have been signaled to 2512 * quit. 2513 */ 2514 interrupted = 1; 2515 } 2516 #ifdef RPCDEBUG 2517 if (error == ETIME) 2518 RPCLOG0(8, "connmgr_connect: giving up " 2519 "on connection attempt; " 2520 "clnt_dispatch notifyconn " 2521 "diagnostic 'no one waiting for " 2522 "connection' should not be " 2523 "unexpected\n"); 2524 #endif 2525 if (e->call_prev) 2526 e->call_prev->call_next = e->call_next; 2527 else 2528 clnt_pending = e->call_next; 2529 if (e->call_next) 2530 e->call_next->call_prev = e->call_prev; 2531 mutex_exit(&clnt_pending_lock); 2532 2533 if (e->call_status != RPC_SUCCESS || error != 0) { 2534 if (interrupted) 2535 e->call_status = RPC_INTR; 2536 else if (error == ETIME) 2537 e->call_status = RPC_TIMEDOUT; 2538 else if (error == EPROTO) 2539 e->call_status = RPC_SYSTEMERROR; 2540 2541 RPCLOG(8, "connmgr_connect: can't connect, status: " 2542 "%s\n", clnt_sperrno(e->call_status)); 2543 2544 if (e->call_reply) { 2545 freemsg(e->call_reply); 2546 e->call_reply = NULL; 2547 } 2548 2549 return (FALSE); 2550 } 2551 /* 2552 * The result of the "connection accept" is a T_info_ack 2553 * in the call_reply field. 2554 */ 2555 ASSERT(e->call_reply != NULL); 2556 mp = e->call_reply; 2557 e->call_reply = NULL; 2558 tinfo = (struct T_info_ack *)mp->b_rptr; 2559 2560 tidu_size = tinfo->TIDU_size; 2561 tidu_size -= (tidu_size % BYTES_PER_XDR_UNIT); 2562 if (tidu_size > COTS_DEFAULT_ALLOCSIZE || (tidu_size <= 0)) 2563 tidu_size = COTS_DEFAULT_ALLOCSIZE; 2564 *tidu_ptr = tidu_size; 2565 2566 freemsg(mp); 2567 2568 /* 2569 * Set up the pertinent options. NODELAY is so the transport doesn't 2570 * buffer up RPC messages on either end. This may not be valid for 2571 * all transports. Failure to set this option is not cause to 2572 * bail out so we return success anyway. Note that lack of NODELAY 2573 * or some other way to flush the message on both ends will cause 2574 * lots of retries and terrible performance. 2575 */ 2576 if (addrfmly == AF_INET || addrfmly == AF_INET6) { 2577 (void) connmgr_setopt(wq, IPPROTO_TCP, TCP_NODELAY, e); 2578 if (e->call_status == RPC_XPRTFAILED) 2579 return (FALSE); 2580 } 2581 2582 /* 2583 * Since we have a connection, we now need to figure out if 2584 * we need to create a kstat. If x_ksp is not NULL then we 2585 * are reusing a connection and so we do not need to create 2586 * another kstat -- lets just return. 2587 */ 2588 if (cm_entry->x_ksp != NULL) 2589 return (TRUE); 2590 2591 /* 2592 * We need to increment rpc_kstat_instance atomically to prevent 2593 * two kstats being created with the same instance. 2594 */ 2595 kstat_instance = atomic_add_32_nv((uint32_t *)&rpc_kstat_instance, 1); 2596 2597 if ((cm_entry->x_ksp = kstat_create_zone("unix", kstat_instance, 2598 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED, 2599 (uint_t)(sizeof (cm_kstat_xprt_t) / sizeof (kstat_named_t)), 2600 KSTAT_FLAG_VIRTUAL, cm_entry->x_zoneid)) == NULL) { 2601 return (TRUE); 2602 } 2603 2604 cm_entry->x_ksp->ks_lock = &connmgr_lock; 2605 cm_entry->x_ksp->ks_private = cm_entry; 2606 cm_entry->x_ksp->ks_data_size = ((INET6_ADDRSTRLEN * sizeof (char)) 2607 + sizeof (cm_kstat_template)); 2608 cm_entry->x_ksp->ks_data = kmem_alloc(cm_entry->x_ksp->ks_data_size, 2609 KM_SLEEP); 2610 bcopy(&cm_kstat_template, cm_entry->x_ksp->ks_data, 2611 cm_entry->x_ksp->ks_data_size); 2612 ((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2613 x_server.value.str.addr.ptr = 2614 kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP); 2615 2616 cm_entry->x_ksp->ks_update = conn_kstat_update; 2617 kstat_install(cm_entry->x_ksp); 2618 return (TRUE); 2619 } 2620 2621 /* 2622 * Called by connmgr_connect to set an option on the new stream. 2623 */ 2624 static bool_t 2625 connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e) 2626 { 2627 mblk_t *mp; 2628 struct opthdr *opt; 2629 struct T_optmgmt_req *tor; 2630 struct timeval waitp; 2631 int error; 2632 2633 mp = allocb(sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2634 sizeof (int), BPRI_LO); 2635 if (mp == NULL) { 2636 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option " 2637 "request\n"); 2638 return (FALSE); 2639 } 2640 2641 mp->b_datap->db_type = M_PROTO; 2642 tor = (struct T_optmgmt_req *)(mp->b_rptr); 2643 tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 2644 tor->MGMT_flags = T_NEGOTIATE; 2645 tor->OPT_length = sizeof (struct opthdr) + sizeof (int); 2646 tor->OPT_offset = sizeof (struct T_optmgmt_req); 2647 2648 opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); 2649 opt->level = level; 2650 opt->name = name; 2651 opt->len = sizeof (int); 2652 *(int *)((char *)opt + sizeof (*opt)) = 1; 2653 mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2654 sizeof (int); 2655 2656 /* 2657 * We will use this connection regardless 2658 * of whether or not the option is settable. 2659 */ 2660 clnt_dispatch_send(wq, mp, e, 0, 0); 2661 mutex_enter(&clnt_pending_lock); 2662 2663 waitp.tv_sec = clnt_cots_min_conntout; 2664 waitp.tv_usec = 0; 2665 error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1); 2666 2667 if (e->call_prev) 2668 e->call_prev->call_next = e->call_next; 2669 else 2670 clnt_pending = e->call_next; 2671 if (e->call_next) 2672 e->call_next->call_prev = e->call_prev; 2673 mutex_exit(&clnt_pending_lock); 2674 2675 if (e->call_reply != NULL) { 2676 freemsg(e->call_reply); 2677 e->call_reply = NULL; 2678 } 2679 2680 if (e->call_status != RPC_SUCCESS || error != 0) { 2681 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name); 2682 return (FALSE); 2683 } 2684 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name); 2685 return (TRUE); 2686 } 2687 2688 #ifdef DEBUG 2689 2690 /* 2691 * This is a knob to let us force code coverage in allocation failure 2692 * case. 2693 */ 2694 static int connmgr_failsnd; 2695 #define CONN_SND_ALLOC(Size, Pri) \ 2696 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri)) 2697 2698 #else 2699 2700 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri) 2701 2702 #endif 2703 2704 /* 2705 * Sends an orderly release on the specified queue. 2706 * Entered with connmgr_lock. Exited without connmgr_lock 2707 */ 2708 static void 2709 connmgr_sndrel(struct cm_xprt *cm_entry) 2710 { 2711 struct T_ordrel_req *torr; 2712 mblk_t *mp; 2713 queue_t *q = cm_entry->x_wq; 2714 ASSERT(MUTEX_HELD(&connmgr_lock)); 2715 mp = CONN_SND_ALLOC(sizeof (struct T_ordrel_req), BPRI_LO); 2716 if (mp == NULL) { 2717 cm_entry->x_needrel = TRUE; 2718 mutex_exit(&connmgr_lock); 2719 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel " 2720 "to queue %p\n", (void *)q); 2721 return; 2722 } 2723 mutex_exit(&connmgr_lock); 2724 2725 mp->b_datap->db_type = M_PROTO; 2726 torr = (struct T_ordrel_req *)(mp->b_rptr); 2727 torr->PRIM_type = T_ORDREL_REQ; 2728 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_req); 2729 2730 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q); 2731 put(q, mp); 2732 } 2733 2734 /* 2735 * Sends an disconnect on the specified queue. 2736 * Entered with connmgr_lock. Exited without connmgr_lock 2737 */ 2738 static void 2739 connmgr_snddis(struct cm_xprt *cm_entry) 2740 { 2741 struct T_discon_req *tdis; 2742 mblk_t *mp; 2743 queue_t *q = cm_entry->x_wq; 2744 2745 ASSERT(MUTEX_HELD(&connmgr_lock)); 2746 mp = CONN_SND_ALLOC(sizeof (*tdis), BPRI_LO); 2747 if (mp == NULL) { 2748 cm_entry->x_needdis = TRUE; 2749 mutex_exit(&connmgr_lock); 2750 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon " 2751 "to queue %p\n", (void *)q); 2752 return; 2753 } 2754 mutex_exit(&connmgr_lock); 2755 2756 mp->b_datap->db_type = M_PROTO; 2757 tdis = (struct T_discon_req *)mp->b_rptr; 2758 tdis->PRIM_type = T_DISCON_REQ; 2759 mp->b_wptr = mp->b_rptr + sizeof (*tdis); 2760 2761 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q); 2762 put(q, mp); 2763 } 2764 2765 /* 2766 * Sets up the entry for receiving replies, and calls rpcmod's write put proc 2767 * (through put) to send the call. 2768 */ 2769 static void 2770 clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid, 2771 uint_t queue_flag) 2772 { 2773 ASSERT(e != NULL); 2774 2775 e->call_status = RPC_TIMEDOUT; /* optimistic, eh? */ 2776 e->call_reason = 0; 2777 e->call_wq = q; 2778 e->call_xid = xid; 2779 e->call_notified = FALSE; 2780 2781 /* 2782 * If queue_flag is set then the calllist_t is already on the hash 2783 * queue. In this case just send the message and return. 2784 */ 2785 if (queue_flag) { 2786 put(q, mp); 2787 return; 2788 } 2789 2790 /* 2791 * Set up calls for RPC requests (with XID != 0) on the hash 2792 * queue for fast lookups and place other calls (i.e. 2793 * connection management) on the linked list. 2794 */ 2795 if (xid != 0) { 2796 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on " 2797 "dispatch list\n", xid); 2798 e->call_hash = call_hash(xid, clnt_cots_hash_size); 2799 e->call_bucket = &cots_call_ht[e->call_hash]; 2800 call_table_enter(e); 2801 } else { 2802 mutex_enter(&clnt_pending_lock); 2803 if (clnt_pending) 2804 clnt_pending->call_prev = e; 2805 e->call_next = clnt_pending; 2806 e->call_prev = NULL; 2807 clnt_pending = e; 2808 mutex_exit(&clnt_pending_lock); 2809 } 2810 2811 put(q, mp); 2812 } 2813 2814 /* 2815 * Called by rpcmod to notify a client with a clnt_pending call that its reply 2816 * has arrived. If we can't find a client waiting for this reply, we log 2817 * the error and return. 2818 */ 2819 bool_t 2820 clnt_dispatch_notify(mblk_t *mp, zoneid_t zoneid) 2821 { 2822 calllist_t *e = NULL; 2823 call_table_t *chtp; 2824 uint32_t xid; 2825 uint_t hash; 2826 2827 if ((IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) && 2828 (mp->b_wptr - mp->b_rptr) >= sizeof (xid)) 2829 xid = *((uint32_t *)mp->b_rptr); 2830 else { 2831 int i = 0; 2832 unsigned char *p = (unsigned char *)&xid; 2833 unsigned char *rptr; 2834 mblk_t *tmp = mp; 2835 2836 /* 2837 * Copy the xid, byte-by-byte into xid. 2838 */ 2839 while (tmp) { 2840 rptr = tmp->b_rptr; 2841 while (rptr < tmp->b_wptr) { 2842 *p++ = *rptr++; 2843 if (++i >= sizeof (xid)) 2844 goto done_xid_copy; 2845 } 2846 tmp = tmp->b_cont; 2847 } 2848 2849 /* 2850 * If we got here, we ran out of mblk space before the 2851 * xid could be copied. 2852 */ 2853 ASSERT(tmp == NULL && i < sizeof (xid)); 2854 2855 RPCLOG0(1, 2856 "clnt_dispatch_notify: message less than size of xid\n"); 2857 return (FALSE); 2858 2859 } 2860 done_xid_copy: 2861 2862 hash = call_hash(xid, clnt_cots_hash_size); 2863 chtp = &cots_call_ht[hash]; 2864 /* call_table_find returns with the hash bucket locked */ 2865 call_table_find(chtp, xid, e); 2866 2867 if (e != NULL) { 2868 /* 2869 * Found thread waiting for this reply 2870 */ 2871 mutex_enter(&e->call_lock); 2872 if (e->call_reply) 2873 /* 2874 * This can happen under the following scenario: 2875 * clnt_cots_kcallit() times out on the response, 2876 * rfscall() repeats the CLNT_CALL() with 2877 * the same xid, clnt_cots_kcallit() sends the retry, 2878 * thereby putting the clnt handle on the pending list, 2879 * the first response arrives, signalling the thread 2880 * in clnt_cots_kcallit(). Before that thread is 2881 * dispatched, the second response arrives as well, 2882 * and clnt_dispatch_notify still finds the handle on 2883 * the pending list, with call_reply set. So free the 2884 * old reply now. 2885 * 2886 * It is also possible for a response intended for 2887 * an RPC call with a different xid to reside here. 2888 * This can happen if the thread that owned this 2889 * client handle prior to the current owner bailed 2890 * out and left its call record on the dispatch 2891 * queue. A window exists where the response can 2892 * arrive before the current owner dispatches its 2893 * RPC call. 2894 * 2895 * In any case, this is the very last point where we 2896 * can safely check the call_reply field before 2897 * placing the new response there. 2898 */ 2899 freemsg(e->call_reply); 2900 e->call_reply = mp; 2901 e->call_status = RPC_SUCCESS; 2902 e->call_notified = TRUE; 2903 cv_signal(&e->call_cv); 2904 mutex_exit(&e->call_lock); 2905 mutex_exit(&chtp->ct_lock); 2906 return (TRUE); 2907 } else { 2908 zone_t *zone; 2909 struct rpcstat *rpcstat; 2910 2911 mutex_exit(&chtp->ct_lock); 2912 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n", 2913 xid); 2914 /* 2915 * This is unfortunate, but we need to lookup the zone so we 2916 * can increment its "rcbadxids" counter. 2917 */ 2918 zone = zone_find_by_id(zoneid); 2919 if (zone == NULL) { 2920 /* 2921 * The zone went away... 2922 */ 2923 return (FALSE); 2924 } 2925 rpcstat = zone_getspecific(rpcstat_zone_key, zone); 2926 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 2927 /* 2928 * Not interested 2929 */ 2930 zone_rele(zone); 2931 return (FALSE); 2932 } 2933 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcbadxids); 2934 zone_rele(zone); 2935 } 2936 return (FALSE); 2937 } 2938 2939 /* 2940 * Called by rpcmod when a non-data indication arrives. The ones in which we 2941 * are interested are connection indications and options acks. We dispatch 2942 * based on the queue the indication came in on. If we are not interested in 2943 * what came in, we return false to rpcmod, who will then pass it upstream. 2944 */ 2945 bool_t 2946 clnt_dispatch_notifyconn(queue_t *q, mblk_t *mp) 2947 { 2948 calllist_t *e; 2949 int type; 2950 2951 ASSERT((q->q_flag & QREADR) == 0); 2952 2953 type = ((union T_primitives *)mp->b_rptr)->type; 2954 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n", 2955 rpc_tpiprim2name(type)); 2956 mutex_enter(&clnt_pending_lock); 2957 for (e = clnt_pending; /* NO CONDITION */; e = e->call_next) { 2958 if (e == NULL) { 2959 mutex_exit(&clnt_pending_lock); 2960 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting " 2961 "for connection on queue 0x%p\n", (void *)q); 2962 return (FALSE); 2963 } 2964 if (e->call_wq == q) 2965 break; 2966 } 2967 2968 switch (type) { 2969 case T_CONN_CON: 2970 /* 2971 * The transport is now connected, send a T_INFO_REQ to get 2972 * the tidu size. 2973 */ 2974 mutex_exit(&clnt_pending_lock); 2975 ASSERT(mp->b_datap->db_lim - mp->b_datap->db_base >= 2976 sizeof (struct T_info_req)); 2977 mp->b_rptr = mp->b_datap->db_base; 2978 ((union T_primitives *)mp->b_rptr)->type = T_INFO_REQ; 2979 mp->b_wptr = mp->b_rptr + sizeof (struct T_info_req); 2980 mp->b_datap->db_type = M_PCPROTO; 2981 put(q, mp); 2982 return (TRUE); 2983 case T_INFO_ACK: 2984 case T_OPTMGMT_ACK: 2985 e->call_status = RPC_SUCCESS; 2986 e->call_reply = mp; 2987 e->call_notified = TRUE; 2988 cv_signal(&e->call_cv); 2989 break; 2990 case T_ERROR_ACK: 2991 e->call_status = RPC_CANTCONNECT; 2992 e->call_reply = mp; 2993 e->call_notified = TRUE; 2994 cv_signal(&e->call_cv); 2995 break; 2996 case T_OK_ACK: 2997 /* 2998 * Great, but we are really waiting for a T_CONN_CON 2999 */ 3000 freemsg(mp); 3001 break; 3002 default: 3003 mutex_exit(&clnt_pending_lock); 3004 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type); 3005 return (FALSE); 3006 } 3007 3008 mutex_exit(&clnt_pending_lock); 3009 return (TRUE); 3010 } 3011 3012 /* 3013 * Called by rpcmod when the transport is (or should be) going away. Informs 3014 * all callers waiting for replies and marks the entry in the connection 3015 * manager's list as unconnected, and either closing (close handshake in 3016 * progress) or dead. 3017 */ 3018 void 3019 clnt_dispatch_notifyall(queue_t *q, int32_t msg_type, int32_t reason) 3020 { 3021 calllist_t *e; 3022 call_table_t *ctp; 3023 struct cm_xprt *cm_entry; 3024 int have_connmgr_lock; 3025 int i; 3026 3027 ASSERT((q->q_flag & QREADR) == 0); 3028 3029 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q); 3030 RPCLOG(1, " received a notifcation prim type [%s]", 3031 rpc_tpiprim2name(msg_type)); 3032 RPCLOG(1, " and reason %d\n", reason); 3033 3034 /* 3035 * Find the transport entry in the connection manager's list, close 3036 * the transport and delete the entry. In the case where rpcmod's 3037 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we 3038 * should gracefully close the connection. 3039 */ 3040 have_connmgr_lock = 1; 3041 mutex_enter(&connmgr_lock); 3042 for (cm_entry = cm_hd; cm_entry; cm_entry = cm_entry->x_next) { 3043 ASSERT(cm_entry != cm_entry->x_next); 3044 if (cm_entry->x_wq == q) { 3045 ASSERT(MUTEX_HELD(&connmgr_lock)); 3046 ASSERT(have_connmgr_lock == 1); 3047 switch (msg_type) { 3048 case T_ORDREL_REQ: 3049 3050 if (cm_entry->x_dead) { 3051 RPCLOG(1, "idle timeout on dead " 3052 "connection: %p\n", 3053 (void *)cm_entry); 3054 if (clnt_stop_idle != NULL) 3055 (*clnt_stop_idle)(q); 3056 break; 3057 } 3058 3059 /* 3060 * Only mark the connection as dead if it is 3061 * connected and idle. 3062 * An unconnected connection has probably 3063 * gone idle because the server is down, 3064 * and when it comes back up there will be 3065 * retries that need to use that connection. 3066 */ 3067 if (cm_entry->x_connected || 3068 cm_entry->x_doomed) { 3069 if (cm_entry->x_ordrel) { 3070 if (cm_entry->x_closing == TRUE) { 3071 /* 3072 * The connection is obviously 3073 * wedged due to a bug or problem 3074 * with the transport. Mark it 3075 * as dead. Otherwise we can leak 3076 * connections. 3077 */ 3078 cm_entry->x_dead = TRUE; 3079 mutex_exit(&connmgr_lock); 3080 have_connmgr_lock = 0; 3081 if (clnt_stop_idle != NULL) 3082 (*clnt_stop_idle)(q); 3083 break; 3084 } 3085 cm_entry->x_closing = TRUE; 3086 connmgr_sndrel(cm_entry); 3087 have_connmgr_lock = 0; 3088 } else { 3089 cm_entry->x_dead = TRUE; 3090 mutex_exit(&connmgr_lock); 3091 have_connmgr_lock = 0; 3092 if (clnt_stop_idle != NULL) 3093 (*clnt_stop_idle)(q); 3094 } 3095 } else { 3096 /* 3097 * We don't mark the connection 3098 * as dead, but we turn off the 3099 * idle timer. 3100 */ 3101 mutex_exit(&connmgr_lock); 3102 have_connmgr_lock = 0; 3103 if (clnt_stop_idle != NULL) 3104 (*clnt_stop_idle)(q); 3105 RPCLOG(1, "clnt_dispatch_notifyall:" 3106 " ignoring timeout from rpcmod" 3107 " (q %p) because we are not " 3108 " connected\n", (void *)q); 3109 } 3110 break; 3111 case T_ORDREL_IND: 3112 /* 3113 * If this entry is marked closing, then we are 3114 * completing a close handshake, and the 3115 * connection is dead. Otherwise, the server is 3116 * trying to close. Since the server will not 3117 * be sending any more RPC replies, we abort 3118 * the connection, including flushing 3119 * any RPC requests that are in-transit. 3120 */ 3121 if (cm_entry->x_closing) { 3122 cm_entry->x_dead = TRUE; 3123 mutex_exit(&connmgr_lock); 3124 have_connmgr_lock = 0; 3125 if (clnt_stop_idle != NULL) 3126 (*clnt_stop_idle)(q); 3127 } else { 3128 /* 3129 * if we're getting a disconnect 3130 * before we've finished our 3131 * connect attempt, mark it for 3132 * later processing 3133 */ 3134 if (cm_entry->x_thread) 3135 cm_entry->x_early_disc = TRUE; 3136 else 3137 cm_entry->x_connected = FALSE; 3138 cm_entry->x_waitdis = TRUE; 3139 connmgr_snddis(cm_entry); 3140 have_connmgr_lock = 0; 3141 } 3142 break; 3143 3144 case T_ERROR_ACK: 3145 case T_OK_ACK: 3146 cm_entry->x_waitdis = FALSE; 3147 cv_signal(&cm_entry->x_dis_cv); 3148 mutex_exit(&connmgr_lock); 3149 return; 3150 3151 case T_DISCON_REQ: 3152 if (cm_entry->x_thread) 3153 cm_entry->x_early_disc = TRUE; 3154 else 3155 cm_entry->x_connected = FALSE; 3156 cm_entry->x_waitdis = TRUE; 3157 3158 connmgr_snddis(cm_entry); 3159 have_connmgr_lock = 0; 3160 break; 3161 3162 case T_DISCON_IND: 3163 default: 3164 /* 3165 * if we're getting a disconnect before 3166 * we've finished our connect attempt, 3167 * mark it for later processing 3168 */ 3169 if (cm_entry->x_closing) { 3170 cm_entry->x_dead = TRUE; 3171 mutex_exit(&connmgr_lock); 3172 have_connmgr_lock = 0; 3173 if (clnt_stop_idle != NULL) 3174 (*clnt_stop_idle)(q); 3175 } else { 3176 if (cm_entry->x_thread) { 3177 cm_entry->x_early_disc = TRUE; 3178 } else { 3179 cm_entry->x_dead = TRUE; 3180 cm_entry->x_connected = FALSE; 3181 } 3182 } 3183 break; 3184 } 3185 break; 3186 } 3187 } 3188 3189 if (have_connmgr_lock) 3190 mutex_exit(&connmgr_lock); 3191 3192 if (msg_type == T_ERROR_ACK || msg_type == T_OK_ACK) { 3193 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find " 3194 "connmgr entry for discon ack\n", (void *)q); 3195 return; 3196 } 3197 3198 /* 3199 * Then kick all the clnt_pending calls out of their wait. There 3200 * should be no clnt_pending calls in the case of rpcmod's idle 3201 * timer firing. 3202 */ 3203 for (i = 0; i < clnt_cots_hash_size; i++) { 3204 ctp = &cots_call_ht[i]; 3205 mutex_enter(&ctp->ct_lock); 3206 for (e = ctp->ct_call_next; 3207 e != (calllist_t *)ctp; 3208 e = e->call_next) { 3209 if (e->call_wq == q && e->call_notified == FALSE) { 3210 RPCLOG(1, 3211 "clnt_dispatch_notifyall for queue %p ", 3212 (void *)q); 3213 RPCLOG(1, "aborting clnt_pending call %p\n", 3214 (void *)e); 3215 3216 if (msg_type == T_DISCON_IND) 3217 e->call_reason = reason; 3218 e->call_notified = TRUE; 3219 e->call_status = RPC_XPRTFAILED; 3220 cv_signal(&e->call_cv); 3221 } 3222 } 3223 mutex_exit(&ctp->ct_lock); 3224 } 3225 3226 mutex_enter(&clnt_pending_lock); 3227 for (e = clnt_pending; e; e = e->call_next) { 3228 /* 3229 * Only signal those RPC handles that haven't been 3230 * signalled yet. Otherwise we can get a bogus call_reason. 3231 * This can happen if thread A is making a call over a 3232 * connection. If the server is killed, it will cause 3233 * reset, and reason will default to EIO as a result of 3234 * a T_ORDREL_IND. Thread B then attempts to recreate 3235 * the connection but gets a T_DISCON_IND. If we set the 3236 * call_reason code for all threads, then if thread A 3237 * hasn't been dispatched yet, it will get the wrong 3238 * reason. The bogus call_reason can make it harder to 3239 * discriminate between calls that fail because the 3240 * connection attempt failed versus those where the call 3241 * may have been executed on the server. 3242 */ 3243 if (e->call_wq == q && e->call_notified == FALSE) { 3244 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ", 3245 (void *)q); 3246 RPCLOG(1, " aborting clnt_pending call %p\n", 3247 (void *)e); 3248 3249 if (msg_type == T_DISCON_IND) 3250 e->call_reason = reason; 3251 e->call_notified = TRUE; 3252 /* 3253 * Let the caller timeout, else he will retry 3254 * immediately. 3255 */ 3256 e->call_status = RPC_XPRTFAILED; 3257 3258 /* 3259 * We used to just signal those threads 3260 * waiting for a connection, (call_xid = 0). 3261 * That meant that threads waiting for a response 3262 * waited till their timeout expired. This 3263 * could be a long time if they've specified a 3264 * maximum timeout. (2^31 - 1). So we 3265 * Signal all threads now. 3266 */ 3267 cv_signal(&e->call_cv); 3268 } 3269 } 3270 mutex_exit(&clnt_pending_lock); 3271 } 3272 3273 3274 /*ARGSUSED*/ 3275 /* 3276 * after resuming a system that's been suspended for longer than the 3277 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall() 3278 * generates "NFS server X not responding" and "NFS server X ok" messages; 3279 * here we reset inet connections to cause a re-connect and avoid those 3280 * NFS messages. see 4045054 3281 */ 3282 boolean_t 3283 connmgr_cpr_reset(void *arg, int code) 3284 { 3285 struct cm_xprt *cxp; 3286 3287 if (code == CB_CODE_CPR_CHKPT) 3288 return (B_TRUE); 3289 3290 if (mutex_tryenter(&connmgr_lock) == 0) 3291 return (B_FALSE); 3292 for (cxp = cm_hd; cxp; cxp = cxp->x_next) { 3293 if ((cxp->x_family == AF_INET || cxp->x_family == AF_INET6) && 3294 cxp->x_connected == TRUE) { 3295 if (cxp->x_thread) 3296 cxp->x_early_disc = TRUE; 3297 else 3298 cxp->x_connected = FALSE; 3299 cxp->x_needdis = TRUE; 3300 } 3301 } 3302 mutex_exit(&connmgr_lock); 3303 return (B_TRUE); 3304 } 3305 3306 void 3307 clnt_cots_stats_init(zoneid_t zoneid, struct rpc_cots_client **statsp) 3308 { 3309 3310 *statsp = (struct rpc_cots_client *)rpcstat_zone_init_common(zoneid, 3311 "unix", "rpc_cots_client", (const kstat_named_t *)&cots_rcstat_tmpl, 3312 sizeof (cots_rcstat_tmpl)); 3313 } 3314 3315 void 3316 clnt_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_client **statsp) 3317 { 3318 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_client"); 3319 kmem_free(*statsp, sizeof (cots_rcstat_tmpl)); 3320 } 3321 3322 void 3323 clnt_cots_init(void) 3324 { 3325 mutex_init(&connmgr_lock, NULL, MUTEX_DEFAULT, NULL); 3326 mutex_init(&clnt_pending_lock, NULL, MUTEX_DEFAULT, NULL); 3327 3328 if (clnt_cots_hash_size < DEFAULT_MIN_HASH_SIZE) 3329 clnt_cots_hash_size = DEFAULT_MIN_HASH_SIZE; 3330 3331 cots_call_ht = call_table_init(clnt_cots_hash_size); 3332 zone_key_create(&zone_cots_key, NULL, NULL, clnt_zone_destroy); 3333 } 3334 3335 void 3336 clnt_cots_fini(void) 3337 { 3338 (void) zone_key_delete(zone_cots_key); 3339 } 3340 3341 /* 3342 * Wait for TPI ack, returns success only if expected ack is received 3343 * within timeout period. 3344 */ 3345 3346 static int 3347 waitforack(calllist_t *e, t_scalar_t ack_prim, const struct timeval *waitp, 3348 bool_t nosignal) 3349 { 3350 union T_primitives *tpr; 3351 clock_t timout; 3352 int cv_stat = 1; 3353 3354 ASSERT(MUTEX_HELD(&clnt_pending_lock)); 3355 while (e->call_reply == NULL) { 3356 if (waitp != NULL) { 3357 timout = waitp->tv_sec * drv_usectohz(MICROSEC) + 3358 drv_usectohz(waitp->tv_usec) + lbolt; 3359 if (nosignal) 3360 cv_stat = cv_timedwait(&e->call_cv, 3361 &clnt_pending_lock, timout); 3362 else 3363 cv_stat = cv_timedwait_sig(&e->call_cv, 3364 &clnt_pending_lock, timout); 3365 } else { 3366 if (nosignal) 3367 cv_wait(&e->call_cv, &clnt_pending_lock); 3368 else 3369 cv_stat = cv_wait_sig(&e->call_cv, 3370 &clnt_pending_lock); 3371 } 3372 if (cv_stat == -1) 3373 return (ETIME); 3374 if (cv_stat == 0) 3375 return (EINTR); 3376 } 3377 tpr = (union T_primitives *)e->call_reply->b_rptr; 3378 if (tpr->type == ack_prim) 3379 return (0); /* Success */ 3380 3381 if (tpr->type == T_ERROR_ACK) { 3382 if (tpr->error_ack.TLI_error == TSYSERR) 3383 return (tpr->error_ack.UNIX_error); 3384 else 3385 return (t_tlitosyserr(tpr->error_ack.TLI_error)); 3386 } 3387 3388 return (EPROTO); /* unknown or unexpected primitive */ 3389 } 3390