1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 29 * All Rights Reserved 30 */ 31 32 /* 33 * Portions of this source code were derived from Berkeley 4.3 BSD 34 * under license from the Regents of the University of California. 35 */ 36 37 #pragma ident "%Z%%M% %I% %E% SMI" 38 39 /* 40 * Implements a kernel based, client side RPC over Connection Oriented 41 * Transports (COTS). 42 */ 43 44 /* 45 * Much of this file has been re-written to let NFS work better over slow 46 * transports. A description follows. 47 * 48 * One of the annoying things about kRPC/COTS is that it will temporarily 49 * create more than one connection between a client and server. This 50 * happens because when a connection is made, the end-points entry in the 51 * linked list of connections (headed by cm_hd), is removed so that other 52 * threads don't mess with it. Went ahead and bit the bullet by keeping 53 * the endpoint on the connection list and introducing state bits, 54 * condition variables etc. to the connection entry data structure (struct 55 * cm_xprt). 56 * 57 * Here is a summary of the changes to cm-xprt: 58 * 59 * x_ctime is the timestamp of when the endpoint was last 60 * connected or disconnected. If an end-point is ever disconnected 61 * or re-connected, then any outstanding RPC request is presumed 62 * lost, telling clnt_cots_kcallit that it needs to re-send the 63 * request, not just wait for the original request's reply to 64 * arrive. 65 * 66 * x_thread flag which tells us if a thread is doing a connection attempt. 67 * 68 * x_waitdis flag which tells us we are waiting a disconnect ACK. 69 * 70 * x_needdis flag which tells us we need to send a T_DISCONN_REQ 71 * to kill the connection. 72 * 73 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to 74 * gracefully close the connection. 75 * 76 * #defined bitmasks for the all the b_* bits so that more 77 * efficient (and at times less clumsy) masks can be used to 78 * manipulated state in cases where multiple bits have to 79 * set/cleared/checked in the same critical section. 80 * 81 * x_conn_cv and x_dis-_cv are new condition variables to let 82 * threads knows when the connection attempt is done, and to let 83 * the connecting thread know when the disconnect handshake is 84 * done. 85 * 86 * Added the CONN_HOLD() macro so that all reference holds have the same 87 * look and feel. 88 * 89 * In the private (cku_private) portion of the client handle, 90 * 91 * cku_flags replaces the cku_sent a boolean. cku_flags keeps 92 * track of whether a request as been sent, and whether the 93 * client's handles call record is on the dispatch list (so that 94 * the reply can be matched by XID to the right client handle). 95 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit() 96 * and still have the response find the right client handle so 97 * that the retry of CLNT_CALL() gets the result. Testing, found 98 * situations where if the timeout was increased, performance 99 * degraded. This was due to us hitting a window where the thread 100 * was back in rfscall() (probably printing server not responding) 101 * while the response came back but no place to put it. 102 * 103 * cku_ctime is just a cache of x_ctime. If they match, 104 * clnt_cots_kcallit() won't to send a retry (unless the maximum 105 * receive count limit as been reached). If the don't match, then 106 * we assume the request has been lost, and a retry of the request 107 * is needed. 108 * 109 * cku_recv_attempts counts the number of receive count attempts 110 * after one try is sent on the wire. 111 * 112 * Added the clnt_delay() routine so that interruptible and 113 * noninterruptible delays are possible. 114 * 115 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to 116 * control how long the client delays before returned after getting 117 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash 118 * a server that may be booting and not yet started nfsd. 119 * 120 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable) 121 * Why don't we just wait forever (receive an infinite # of times)? 122 * Because the server may have rebooted. More insidious is that some 123 * servers (ours) will drop NFS/TCP requests in some cases. This is bad, 124 * but it is a reality. 125 * 126 * The case of a server doing orderly release really messes up the 127 * client's recovery, especially if the server's TCP implementation is 128 * buggy. It was found was that the kRPC/COTS client was breaking some 129 * TPI rules, such as not waiting for the acknowledgement of a 130 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and 131 * T_DISCON_REQ in clnt_dispatch_notifyall()). 132 * 133 * One of things that we've seen is that a kRPC TCP endpoint goes into 134 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because 135 * that the TIMEWAIT state takes a while to finish. If a server sends a 136 * T_ORDREL_IND, there is little point in an RPC client doing a 137 * T_ORDREL_REQ, because the RPC request isn't going to make it (the 138 * server is saying that it won't accept any more data). So kRPC was 139 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the 140 * connection skips the TIMEWAIT state and goes straight to a bound state 141 * that kRPC can quickly switch to connected. 142 * 143 * Code that issues TPI request must use waitforack() to wait for the 144 * corresponding ack (assuming there is one) in any future modifications. 145 * This works around problems that may be introduced by breaking TPI rules 146 * (by submitting new calls before earlier requests have been acked) in the 147 * case of a signal or other early return. waitforack() depends on 148 * clnt_dispatch_notifyconn() to issue the wakeup when the ack 149 * arrives, so adding new TPI calls may require corresponding changes 150 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on 151 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure 152 * not to set it too low or TPI ACKS will be lost. 153 */ 154 155 #include <sys/param.h> 156 #include <sys/types.h> 157 #include <sys/user.h> 158 #include <sys/systm.h> 159 #include <sys/sysmacros.h> 160 #include <sys/proc.h> 161 #include <sys/socket.h> 162 #include <sys/file.h> 163 #include <sys/stream.h> 164 #include <sys/strsubr.h> 165 #include <sys/stropts.h> 166 #include <sys/strsun.h> 167 #include <sys/timod.h> 168 #include <sys/tiuser.h> 169 #include <sys/tihdr.h> 170 #include <sys/t_kuser.h> 171 #include <sys/fcntl.h> 172 #include <sys/errno.h> 173 #include <sys/kmem.h> 174 #include <sys/debug.h> 175 #include <sys/systm.h> 176 #include <sys/kstat.h> 177 #include <sys/t_lock.h> 178 #include <sys/ddi.h> 179 #include <sys/cmn_err.h> 180 #include <sys/time.h> 181 #include <sys/isa_defs.h> 182 #include <sys/callb.h> 183 #include <sys/sunddi.h> 184 #include <sys/atomic.h> 185 186 #include <netinet/in.h> 187 #include <netinet/tcp.h> 188 189 #include <rpc/types.h> 190 #include <rpc/xdr.h> 191 #include <rpc/auth.h> 192 #include <rpc/clnt.h> 193 #include <rpc/rpc_msg.h> 194 195 #define COTS_DEFAULT_ALLOCSIZE 2048 196 197 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */ 198 #define MSG_OFFSET 128 /* offset of call into the mblk */ 199 200 const char *kinet_ntop6(uchar_t *, char *, size_t); 201 202 static int clnt_cots_ksettimers(CLIENT *, struct rpc_timers *, 203 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 204 static enum clnt_stat clnt_cots_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 205 caddr_t, xdrproc_t, caddr_t, struct timeval); 206 static void clnt_cots_kabort(CLIENT *); 207 static void clnt_cots_kerror(CLIENT *, struct rpc_err *); 208 static bool_t clnt_cots_kfreeres(CLIENT *, xdrproc_t, caddr_t); 209 static void clnt_cots_kdestroy(CLIENT *); 210 static bool_t clnt_cots_kcontrol(CLIENT *, int, char *); 211 212 213 /* List of transports managed by the connection manager. */ 214 struct cm_xprt { 215 TIUSER *x_tiptr; /* transport handle */ 216 queue_t *x_wq; /* send queue */ 217 clock_t x_time; /* last time we handed this xprt out */ 218 clock_t x_ctime; /* time we went to CONNECTED */ 219 int x_tidu_size; /* TIDU size of this transport */ 220 union { 221 struct { 222 unsigned int 223 #ifdef _BIT_FIELDS_HTOL 224 b_closing: 1, /* we've sent a ord rel on this conn */ 225 b_dead: 1, /* transport is closed or disconn */ 226 b_doomed: 1, /* too many conns, let this go idle */ 227 b_connected: 1, /* this connection is connected */ 228 229 b_ordrel: 1, /* do an orderly release? */ 230 b_thread: 1, /* thread doing connect */ 231 b_waitdis: 1, /* waiting for disconnect ACK */ 232 b_needdis: 1, /* need T_DISCON_REQ */ 233 234 b_needrel: 1, /* need T_ORDREL_REQ */ 235 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 236 /* disconnect during connect */ 237 238 b_pad: 22; 239 240 #endif 241 242 #ifdef _BIT_FIELDS_LTOH 243 b_pad: 22, 244 245 b_early_disc: 1, /* got a T_ORDREL_IND or T_DISCON_IND */ 246 /* disconnect during connect */ 247 b_needrel: 1, /* need T_ORDREL_REQ */ 248 249 b_needdis: 1, /* need T_DISCON_REQ */ 250 b_waitdis: 1, /* waiting for disconnect ACK */ 251 b_thread: 1, /* thread doing connect */ 252 b_ordrel: 1, /* do an orderly release? */ 253 254 b_connected: 1, /* this connection is connected */ 255 b_doomed: 1, /* too many conns, let this go idle */ 256 b_dead: 1, /* transport is closed or disconn */ 257 b_closing: 1; /* we've sent a ord rel on this conn */ 258 #endif 259 } bit; unsigned int word; 260 261 #define x_closing x_state.bit.b_closing 262 #define x_dead x_state.bit.b_dead 263 #define x_doomed x_state.bit.b_doomed 264 #define x_connected x_state.bit.b_connected 265 266 #define x_ordrel x_state.bit.b_ordrel 267 #define x_thread x_state.bit.b_thread 268 #define x_waitdis x_state.bit.b_waitdis 269 #define x_needdis x_state.bit.b_needdis 270 271 #define x_needrel x_state.bit.b_needrel 272 #define x_early_disc x_state.bit.b_early_disc 273 274 #define x_state_flags x_state.word 275 276 #define X_CLOSING 0x80000000 277 #define X_DEAD 0x40000000 278 #define X_DOOMED 0x20000000 279 #define X_CONNECTED 0x10000000 280 281 #define X_ORDREL 0x08000000 282 #define X_THREAD 0x04000000 283 #define X_WAITDIS 0x02000000 284 #define X_NEEDDIS 0x01000000 285 286 #define X_NEEDREL 0x00800000 287 #define X_EARLYDISC 0x00400000 288 289 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED) 290 291 } x_state; 292 int x_ref; /* number of users of this xprt */ 293 int x_family; /* address family of transport */ 294 dev_t x_rdev; /* device number of transport */ 295 struct cm_xprt *x_next; 296 297 struct netbuf x_server; /* destination address */ 298 struct netbuf x_src; /* src address (for retries) */ 299 kmutex_t x_lock; /* lock on this entry */ 300 kcondvar_t x_cv; /* to signal when can be closed */ 301 kcondvar_t x_conn_cv; /* to signal when connection attempt */ 302 /* is complete */ 303 kstat_t *x_ksp; 304 305 kcondvar_t x_dis_cv; /* to signal when disconnect attempt */ 306 /* is complete */ 307 zoneid_t x_zoneid; /* zone this xprt belongs to */ 308 }; 309 310 typedef struct cm_kstat_xprt { 311 kstat_named_t x_wq; 312 kstat_named_t x_server; 313 kstat_named_t x_family; 314 kstat_named_t x_rdev; 315 kstat_named_t x_time; 316 kstat_named_t x_state; 317 kstat_named_t x_ref; 318 kstat_named_t x_port; 319 } cm_kstat_xprt_t; 320 321 static cm_kstat_xprt_t cm_kstat_template = { 322 { "write_queue", KSTAT_DATA_UINT32 }, 323 { "server", KSTAT_DATA_STRING }, 324 { "addr_family", KSTAT_DATA_UINT32 }, 325 { "device", KSTAT_DATA_UINT32 }, 326 { "time_stamp", KSTAT_DATA_UINT32 }, 327 { "status", KSTAT_DATA_UINT32 }, 328 { "ref_count", KSTAT_DATA_INT32 }, 329 { "port", KSTAT_DATA_UINT32 }, 330 }; 331 332 /* 333 * The inverse of this is connmgr_release(). 334 */ 335 #define CONN_HOLD(Cm_entry) {\ 336 mutex_enter(&(Cm_entry)->x_lock); \ 337 (Cm_entry)->x_ref++; \ 338 mutex_exit(&(Cm_entry)->x_lock); \ 339 } 340 341 342 /* 343 * Private data per rpc handle. This structure is allocated by 344 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy. 345 */ 346 typedef struct cku_private_s { 347 CLIENT cku_client; /* client handle */ 348 calllist_t cku_call; /* for dispatching calls */ 349 struct rpc_err cku_err; /* error status */ 350 351 struct netbuf cku_srcaddr; /* source address for retries */ 352 int cku_addrfmly; /* for binding port */ 353 struct netbuf cku_addr; /* remote address */ 354 dev_t cku_device; /* device to use */ 355 uint_t cku_flags; 356 #define CKU_ONQUEUE 0x1 357 #define CKU_SENT 0x2 358 359 bool_t cku_progress; /* for CLSET_PROGRESS */ 360 uint32_t cku_xid; /* current XID */ 361 clock_t cku_ctime; /* time stamp of when */ 362 /* connection was created */ 363 uint_t cku_recv_attempts; 364 XDR cku_outxdr; /* xdr routine for output */ 365 XDR cku_inxdr; /* xdr routine for input */ 366 char cku_rpchdr[WIRE_HDR_SIZE + 4]; 367 /* pre-serialized rpc header */ 368 369 uint_t cku_outbuflen; /* default output mblk length */ 370 struct cred *cku_cred; /* credentials */ 371 bool_t cku_nodelayonerr; 372 /* for CLSET_NODELAYONERR */ 373 int cku_useresvport; /* Use reserved port */ 374 struct rpc_cots_client *cku_stats; /* stats for zone */ 375 } cku_private_t; 376 377 static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *, 378 const struct timeval *, struct netbuf *, int, struct netbuf *, 379 struct rpc_err *, bool_t, bool_t); 380 381 static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *, 382 int, calllist_t *, int *, bool_t reconnect, 383 const struct timeval *, bool_t); 384 385 static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *); 386 static void connmgr_sndrel(struct cm_xprt *); 387 static void connmgr_snddis(struct cm_xprt *); 388 static void connmgr_close(struct cm_xprt *); 389 static void connmgr_release(struct cm_xprt *); 390 static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *, 391 cku_private_t *); 392 393 static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *, 394 struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t, 395 bool_t, int); 396 397 static void connmgr_cancelconn(struct cm_xprt *); 398 static enum clnt_stat connmgr_cwait(struct cm_xprt *, const struct timeval *, 399 bool_t); 400 static void connmgr_dis_and_wait(struct cm_xprt *); 401 402 static void clnt_dispatch_send(queue_t *, mblk_t *, calllist_t *, uint_t, 403 uint_t); 404 405 static int clnt_delay(clock_t, bool_t); 406 407 static int waitforack(calllist_t *, t_scalar_t, const struct timeval *, bool_t); 408 409 /* 410 * Operations vector for TCP/IP based RPC 411 */ 412 static struct clnt_ops tcp_ops = { 413 clnt_cots_kcallit, /* do rpc call */ 414 clnt_cots_kabort, /* abort call */ 415 clnt_cots_kerror, /* return error status */ 416 clnt_cots_kfreeres, /* free results */ 417 clnt_cots_kdestroy, /* destroy rpc handle */ 418 clnt_cots_kcontrol, /* the ioctl() of rpc */ 419 clnt_cots_ksettimers, /* set retry timers */ 420 }; 421 422 static int rpc_kstat_instance = 0; /* keeps the current instance */ 423 /* number for the next kstat_create */ 424 425 static struct cm_xprt *cm_hd = NULL; 426 static kmutex_t connmgr_lock; /* for connection mngr's list of transports */ 427 428 extern kmutex_t clnt_max_msg_lock; 429 430 static calllist_t *clnt_pending = NULL; 431 extern kmutex_t clnt_pending_lock; 432 433 static int clnt_cots_hash_size = DEFAULT_HASH_SIZE; 434 435 static call_table_t *cots_call_ht; 436 437 static const struct rpc_cots_client { 438 kstat_named_t rccalls; 439 kstat_named_t rcbadcalls; 440 kstat_named_t rcbadxids; 441 kstat_named_t rctimeouts; 442 kstat_named_t rcnewcreds; 443 kstat_named_t rcbadverfs; 444 kstat_named_t rctimers; 445 kstat_named_t rccantconn; 446 kstat_named_t rcnomem; 447 kstat_named_t rcintrs; 448 } cots_rcstat_tmpl = { 449 { "calls", KSTAT_DATA_UINT64 }, 450 { "badcalls", KSTAT_DATA_UINT64 }, 451 { "badxids", KSTAT_DATA_UINT64 }, 452 { "timeouts", KSTAT_DATA_UINT64 }, 453 { "newcreds", KSTAT_DATA_UINT64 }, 454 { "badverfs", KSTAT_DATA_UINT64 }, 455 { "timers", KSTAT_DATA_UINT64 }, 456 { "cantconn", KSTAT_DATA_UINT64 }, 457 { "nomem", KSTAT_DATA_UINT64 }, 458 { "interrupts", KSTAT_DATA_UINT64 } 459 }; 460 461 #define COTSRCSTAT_INCR(p, x) \ 462 atomic_add_64(&(p)->x.value.ui64, 1) 463 464 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */ 465 static int clnt_max_conns = CLNT_MAX_CONNS; 466 467 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */ 468 /* connection reset */ 469 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */ 470 471 472 static int clnt_cots_min_tout = CLNT_MIN_TIMEOUT; 473 static int clnt_cots_min_conntout = CLNT_MIN_CONNTIMEOUT; 474 475 /* 476 * Limit the number of times we will attempt to receive a reply without 477 * re-sending a response. 478 */ 479 #define CLNT_MAXRECV_WITHOUT_RETRY 3 480 static uint_t clnt_cots_maxrecv = CLNT_MAXRECV_WITHOUT_RETRY; 481 482 uint_t *clnt_max_msg_sizep; 483 void (*clnt_stop_idle)(queue_t *wq); 484 485 #define ptoh(p) (&((p)->cku_client)) 486 #define htop(h) ((cku_private_t *)((h)->cl_private)) 487 488 /* 489 * Times to retry 490 */ 491 #define REFRESHES 2 /* authentication refreshes */ 492 493 static int clnt_cots_do_bindresvport = 0; /* bind to a non-reserved port */ 494 495 static zone_key_t zone_cots_key; 496 497 /* 498 * We need to do this after all kernel threads in the zone have exited. 499 */ 500 /* ARGSUSED */ 501 static void 502 clnt_zone_destroy(zoneid_t zoneid, void *unused) 503 { 504 struct cm_xprt **cmp; 505 struct cm_xprt *cm_entry; 506 struct cm_xprt *freelist = NULL; 507 508 mutex_enter(&connmgr_lock); 509 cmp = &cm_hd; 510 while ((cm_entry = *cmp) != NULL) { 511 if (cm_entry->x_zoneid == zoneid) { 512 *cmp = cm_entry->x_next; 513 cm_entry->x_next = freelist; 514 freelist = cm_entry; 515 } else { 516 cmp = &cm_entry->x_next; 517 } 518 } 519 mutex_exit(&connmgr_lock); 520 while ((cm_entry = freelist) != NULL) { 521 freelist = cm_entry->x_next; 522 connmgr_close(cm_entry); 523 } 524 } 525 526 int 527 clnt_cots_kcreate(dev_t dev, struct netbuf *addr, int family, rpcprog_t prog, 528 rpcvers_t vers, uint_t max_msgsize, cred_t *cred, CLIENT **ncl) 529 { 530 CLIENT *h; 531 cku_private_t *p; 532 struct rpc_msg call_msg; 533 struct rpcstat *rpcstat; 534 535 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog); 536 537 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 538 ASSERT(rpcstat != NULL); 539 540 /* Allocate and intialize the client handle. */ 541 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 542 543 h = ptoh(p); 544 545 h->cl_private = (caddr_t)p; 546 h->cl_auth = authkern_create(); 547 h->cl_ops = &tcp_ops; 548 549 cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL); 550 mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL); 551 552 /* 553 * If the current sanity check size in rpcmod is smaller 554 * than the size needed, then increase the sanity check. 555 */ 556 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 557 max_msgsize > *clnt_max_msg_sizep) { 558 mutex_enter(&clnt_max_msg_lock); 559 if (max_msgsize > *clnt_max_msg_sizep) 560 *clnt_max_msg_sizep = max_msgsize; 561 mutex_exit(&clnt_max_msg_lock); 562 } 563 564 p->cku_outbuflen = COTS_DEFAULT_ALLOCSIZE; 565 566 /* Preserialize the call message header */ 567 568 call_msg.rm_xid = 0; 569 call_msg.rm_direction = CALL; 570 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 571 call_msg.rm_call.cb_prog = prog; 572 call_msg.rm_call.cb_vers = vers; 573 574 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, WIRE_HDR_SIZE, XDR_ENCODE); 575 576 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 577 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization " 578 "error\n"); 579 auth_destroy(h->cl_auth); 580 kmem_free(p, sizeof (cku_private_t)); 581 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n"); 582 return (EINVAL); /* XXX */ 583 } 584 585 /* 586 * The zalloc initialized the fields below. 587 * p->cku_xid = 0; 588 * p->cku_flags = 0; 589 * p->cku_srcaddr.len = 0; 590 * p->cku_srcaddr.maxlen = 0; 591 */ 592 593 p->cku_cred = cred; 594 p->cku_device = dev; 595 p->cku_addrfmly = family; 596 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 597 p->cku_addr.maxlen = addr->maxlen; 598 p->cku_addr.len = addr->len; 599 bcopy(addr->buf, p->cku_addr.buf, addr->len); 600 p->cku_stats = rpcstat->rpc_cots_client; 601 p->cku_useresvport = -1; /* value is has not been set */ 602 603 *ncl = h; 604 return (0); 605 } 606 607 /*ARGSUSED*/ 608 static void 609 clnt_cots_kabort(CLIENT *h) 610 { 611 } 612 613 /* 614 * Return error info on this handle. 615 */ 616 static void 617 clnt_cots_kerror(CLIENT *h, struct rpc_err *err) 618 { 619 /* LINTED pointer alignment */ 620 cku_private_t *p = htop(h); 621 622 *err = p->cku_err; 623 } 624 625 static bool_t 626 clnt_cots_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 627 { 628 /* LINTED pointer alignment */ 629 cku_private_t *p = htop(h); 630 XDR *xdrs; 631 632 xdrs = &(p->cku_outxdr); 633 xdrs->x_op = XDR_FREE; 634 return ((*xdr_res)(xdrs, res_ptr)); 635 } 636 637 static bool_t 638 clnt_cots_kcontrol(CLIENT *h, int cmd, char *arg) 639 { 640 cku_private_t *p = htop(h); 641 642 switch (cmd) { 643 case CLSET_PROGRESS: 644 p->cku_progress = TRUE; 645 return (TRUE); 646 647 case CLSET_XID: 648 if (arg == NULL) 649 return (FALSE); 650 651 p->cku_xid = *((uint32_t *)arg); 652 return (TRUE); 653 654 case CLGET_XID: 655 if (arg == NULL) 656 return (FALSE); 657 658 *((uint32_t *)arg) = p->cku_xid; 659 return (TRUE); 660 661 case CLSET_NODELAYONERR: 662 if (arg == NULL) 663 return (FALSE); 664 665 if (*((bool_t *)arg) == TRUE) { 666 p->cku_nodelayonerr = TRUE; 667 return (TRUE); 668 } 669 if (*((bool_t *)arg) == FALSE) { 670 p->cku_nodelayonerr = FALSE; 671 return (TRUE); 672 } 673 return (FALSE); 674 675 case CLGET_NODELAYONERR: 676 if (arg == NULL) 677 return (FALSE); 678 679 *((bool_t *)arg) = p->cku_nodelayonerr; 680 return (TRUE); 681 682 case CLSET_BINDRESVPORT: 683 if (arg == NULL) 684 return (FALSE); 685 686 if (*(int *)arg != 1 && *(int *)arg != 0) 687 return (FALSE); 688 689 p->cku_useresvport = *(int *)arg; 690 691 return (TRUE); 692 693 case CLGET_BINDRESVPORT: 694 if (arg == NULL) 695 return (FALSE); 696 697 *(int *)arg = p->cku_useresvport; 698 699 return (TRUE); 700 701 default: 702 return (FALSE); 703 } 704 } 705 706 /* 707 * Destroy rpc handle. Frees the space used for output buffer, 708 * private data, and handle structure. 709 */ 710 static void 711 clnt_cots_kdestroy(CLIENT *h) 712 { 713 /* LINTED pointer alignment */ 714 cku_private_t *p = htop(h); 715 calllist_t *call = &p->cku_call; 716 717 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h); 718 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p->cku_xid); 719 720 if (p->cku_flags & CKU_ONQUEUE) { 721 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x " 722 "from dispatch list\n", p->cku_xid); 723 call_table_remove(call); 724 } 725 726 if (call->call_reply) 727 freemsg(call->call_reply); 728 cv_destroy(&call->call_cv); 729 mutex_destroy(&call->call_lock); 730 731 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen); 732 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 733 kmem_free(p, sizeof (*p)); 734 } 735 736 static int clnt_cots_pulls; 737 #define RM_HDR_SIZE 4 /* record mark header size */ 738 739 /* 740 * Call remote procedure. 741 */ 742 static enum clnt_stat 743 clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 744 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 745 { 746 /* LINTED pointer alignment */ 747 cku_private_t *p = htop(h); 748 calllist_t *call = &p->cku_call; 749 XDR *xdrs; 750 struct rpc_msg reply_msg; 751 mblk_t *mp; 752 #ifdef RPCDEBUG 753 clock_t time_sent; 754 #endif 755 struct netbuf *retryaddr; 756 struct cm_xprt *cm_entry = NULL; 757 queue_t *wq; 758 int len; 759 int mpsize; 760 int refreshes = REFRESHES; 761 int interrupted; 762 int tidu_size; 763 enum clnt_stat status; 764 struct timeval cwait; 765 bool_t delay_first = FALSE; 766 clock_t ticks; 767 768 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum); 769 COTSRCSTAT_INCR(p->cku_stats, rccalls); 770 771 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec); 772 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec); 773 774 /* 775 * Bug ID 1240234: 776 * Look out for zero length timeouts. We don't want to 777 * wait zero seconds for a connection to be established. 778 */ 779 if (wait.tv_sec < clnt_cots_min_conntout) { 780 cwait.tv_sec = clnt_cots_min_conntout; 781 cwait.tv_usec = 0; 782 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,", 783 wait.tv_sec); 784 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout); 785 } else { 786 cwait = wait; 787 } 788 789 call_again: 790 if (cm_entry) { 791 connmgr_release(cm_entry); 792 cm_entry = NULL; 793 } 794 795 mp = NULL; 796 797 /* 798 * If the call is not a retry, allocate a new xid and cache it 799 * for future retries. 800 * Bug ID 1246045: 801 * Treat call as a retry for purposes of binding the source 802 * port only if we actually attempted to send anything on 803 * the previous call. 804 */ 805 if (p->cku_xid == 0) { 806 p->cku_xid = alloc_xid(); 807 /* 808 * We need to ASSERT here that our xid != 0 because this 809 * determines whether or not our call record gets placed on 810 * the hash table or the linked list. By design, we mandate 811 * that RPC calls over cots must have xid's != 0, so we can 812 * ensure proper management of the hash table. 813 */ 814 ASSERT(p->cku_xid != 0); 815 816 retryaddr = NULL; 817 p->cku_flags &= ~CKU_SENT; 818 819 if (p->cku_flags & CKU_ONQUEUE) { 820 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old" 821 " one (%p)\n", (void *)call); 822 call_table_remove(call); 823 p->cku_flags &= ~CKU_ONQUEUE; 824 RPCLOG(64, "clnt_cots_kcallit: removing call from " 825 "dispatch list because xid was zero (now 0x%x)\n", 826 p->cku_xid); 827 } 828 829 if (call->call_reply != NULL) { 830 freemsg(call->call_reply); 831 call->call_reply = NULL; 832 } 833 } else if (p->cku_srcaddr.buf == NULL || p->cku_srcaddr.len == 0) { 834 retryaddr = NULL; 835 836 } else if (p->cku_flags & CKU_SENT) { 837 retryaddr = &p->cku_srcaddr; 838 839 } else { 840 /* 841 * Bug ID 1246045: Nothing was sent, so set retryaddr to 842 * NULL and let connmgr_get() bind to any source port it 843 * can get. 844 */ 845 retryaddr = NULL; 846 } 847 848 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p->cku_xid); 849 RPCLOG(64, " flags = 0x%x\n", p->cku_flags); 850 851 p->cku_err.re_status = RPC_TIMEDOUT; 852 p->cku_err.re_errno = p->cku_err.re_terrno = 0; 853 854 cm_entry = connmgr_wrapget(retryaddr, &cwait, p); 855 856 if (cm_entry == NULL) { 857 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n", 858 clnt_sperrno(p->cku_err.re_status)); 859 860 /* 861 * The reasons why we fail to create a connection are 862 * varied. In most cases we don't want the caller to 863 * immediately retry. This could have one or more 864 * bad effects. This includes flooding the net with 865 * connect requests to ports with no listener; a hard 866 * kernel loop due to all the "reserved" TCP ports being 867 * in use. 868 */ 869 delay_first = TRUE; 870 871 /* 872 * Even if we end up returning EINTR, we still count a 873 * a "can't connect", because the connection manager 874 * might have been committed to waiting for or timing out on 875 * a connection. 876 */ 877 COTSRCSTAT_INCR(p->cku_stats, rccantconn); 878 switch (p->cku_err.re_status) { 879 case RPC_INTR: 880 p->cku_err.re_errno = EINTR; 881 882 /* 883 * No need to delay because a UNIX signal(2) 884 * interrupted us. The caller likely won't 885 * retry the CLNT_CALL() and even if it does, 886 * we assume the caller knows what it is doing. 887 */ 888 delay_first = FALSE; 889 break; 890 891 case RPC_TIMEDOUT: 892 p->cku_err.re_errno = ETIMEDOUT; 893 894 /* 895 * No need to delay because timed out already 896 * on the connection request and assume that the 897 * transport time out is longer than our minimum 898 * timeout, or least not too much smaller. 899 */ 900 delay_first = FALSE; 901 break; 902 903 case RPC_SYSTEMERROR: 904 case RPC_TLIERROR: 905 /* 906 * We want to delay here because a transient 907 * system error has a better chance of going away 908 * if we delay a bit. If it's not transient, then 909 * we don't want end up in a hard kernel loop 910 * due to retries. 911 */ 912 ASSERT(p->cku_err.re_errno != 0); 913 break; 914 915 916 case RPC_CANTCONNECT: 917 /* 918 * RPC_CANTCONNECT is set on T_ERROR_ACK which 919 * implies some error down in the TCP layer or 920 * below. If cku_nodelayonerror is set then we 921 * assume the caller knows not to try too hard. 922 */ 923 RPCLOG0(8, "clnt_cots_kcallit: connection failed,"); 924 RPCLOG0(8, " re_status=RPC_CANTCONNECT,"); 925 RPCLOG(8, " re_errno=%d,", p->cku_err.re_errno); 926 RPCLOG(8, " cku_nodelayonerr=%d", p->cku_nodelayonerr); 927 if (p->cku_nodelayonerr == TRUE) 928 delay_first = FALSE; 929 930 p->cku_err.re_errno = EIO; 931 932 break; 933 934 case RPC_XPRTFAILED: 935 /* 936 * We want to delay here because we likely 937 * got a refused connection. 938 */ 939 if (p->cku_err.re_errno != 0) 940 break; 941 942 /* fall thru */ 943 944 default: 945 /* 946 * We delay here because it is better to err 947 * on the side of caution. If we got here then 948 * status could have been RPC_SUCCESS, but we 949 * know that we did not get a connection, so 950 * force the rpc status to RPC_CANTCONNECT. 951 */ 952 p->cku_err.re_status = RPC_CANTCONNECT; 953 p->cku_err.re_errno = EIO; 954 break; 955 } 956 if (delay_first == TRUE) 957 ticks = clnt_cots_min_tout * drv_usectohz(1000000); 958 goto cots_done; 959 } 960 961 /* 962 * If we've never sent any request on this connection (send count 963 * is zero, or the connection has been reset), cache the 964 * the connection's create time and send a request (possibly a retry) 965 */ 966 if ((p->cku_flags & CKU_SENT) == 0 || 967 p->cku_ctime != cm_entry->x_ctime) { 968 p->cku_ctime = cm_entry->x_ctime; 969 970 } else if ((p->cku_flags & CKU_SENT) && (p->cku_flags & CKU_ONQUEUE) && 971 (call->call_reply != NULL || 972 p->cku_recv_attempts < clnt_cots_maxrecv)) { 973 974 /* 975 * If we've sent a request and our call is on the dispatch 976 * queue and we haven't made too many receive attempts, then 977 * don't re-send, just receive. 978 */ 979 p->cku_recv_attempts++; 980 goto read_again; 981 } 982 983 /* 984 * Now we create the RPC request in a STREAMS message. We have to do 985 * this after the call to connmgr_get so that we have the correct 986 * TIDU size for the transport. 987 */ 988 tidu_size = cm_entry->x_tidu_size; 989 len = MSG_OFFSET + MAX(tidu_size, RM_HDR_SIZE + WIRE_HDR_SIZE); 990 991 while ((mp = allocb(len, BPRI_MED)) == NULL) { 992 if (strwaitbuf(len, BPRI_MED)) { 993 p->cku_err.re_status = RPC_SYSTEMERROR; 994 p->cku_err.re_errno = ENOSR; 995 COTSRCSTAT_INCR(p->cku_stats, rcnomem); 996 goto cots_done; 997 } 998 } 999 xdrs = &p->cku_outxdr; 1000 xdrmblk_init(xdrs, mp, XDR_ENCODE, tidu_size); 1001 mpsize = MBLKSIZE(mp); 1002 ASSERT(mpsize >= len); 1003 ASSERT(mp->b_rptr == mp->b_datap->db_base); 1004 1005 /* 1006 * If the size of mblk is not appreciably larger than what we 1007 * asked, then resize the mblk to exactly len bytes. The reason for 1008 * this: suppose len is 1600 bytes, the tidu is 1460 bytes 1009 * (from TCP over ethernet), and the arguments to the RPC require 1010 * 2800 bytes. Ideally we want the protocol to render two 1011 * ~1400 byte segments over the wire. However if allocb() gives us a 2k 1012 * mblk, and we allocate a second mblk for the remainder, the protocol 1013 * module may generate 3 segments over the wire: 1014 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and 1015 * 892 for the third. If we "waste" 448 bytes in the first mblk, 1016 * the XDR encoding will generate two ~1400 byte mblks, and the 1017 * protocol module is more likely to produce properly sized segments. 1018 */ 1019 if ((mpsize >> 1) <= len) 1020 mp->b_rptr += (mpsize - len); 1021 1022 /* 1023 * Adjust b_rptr to reserve space for the non-data protocol headers 1024 * any downstream modules might like to add, and for the 1025 * record marking header. 1026 */ 1027 mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE); 1028 1029 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 1030 /* Copy in the preserialized RPC header information. */ 1031 bcopy(p->cku_rpchdr, mp->b_rptr, WIRE_HDR_SIZE); 1032 1033 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */ 1034 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base + 1035 WIRE_HDR_SIZE)); 1036 1037 ASSERT((mp->b_wptr - mp->b_rptr) == WIRE_HDR_SIZE); 1038 1039 /* Serialize the procedure number and the arguments. */ 1040 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 1041 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 1042 (!(*xdr_args)(xdrs, argsp))) { 1043 p->cku_err.re_status = RPC_CANTENCODEARGS; 1044 p->cku_err.re_errno = EIO; 1045 goto cots_done; 1046 } 1047 1048 (*(uint32_t *)(mp->b_rptr)) = p->cku_xid; 1049 } else { 1050 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[WIRE_HDR_SIZE]; 1051 IXDR_PUT_U_INT32(uproc, procnum); 1052 1053 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 1054 1055 /* Use XDR_SETPOS() to set the b_wptr. */ 1056 XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base)); 1057 1058 /* Serialize the procedure number and the arguments. */ 1059 if (!AUTH_WRAP(h->cl_auth, p->cku_rpchdr, WIRE_HDR_SIZE+4, 1060 xdrs, xdr_args, argsp)) { 1061 p->cku_err.re_status = RPC_CANTENCODEARGS; 1062 p->cku_err.re_errno = EIO; 1063 goto cots_done; 1064 } 1065 } 1066 1067 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n", 1068 tidu_size); 1069 1070 wq = cm_entry->x_wq; 1071 clnt_dispatch_send(wq, mp, call, p->cku_xid, 1072 (p->cku_flags & CKU_ONQUEUE)); 1073 1074 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n", 1075 (uint_t)p->cku_xid); 1076 p->cku_flags = (CKU_ONQUEUE|CKU_SENT); 1077 p->cku_recv_attempts = 1; 1078 1079 #ifdef RPCDEBUG 1080 time_sent = lbolt; 1081 #endif 1082 1083 /* 1084 * Wait for a reply or a timeout. If there is no error or timeout, 1085 * (both indicated by call_status), call->call_reply will contain 1086 * the RPC reply message. 1087 */ 1088 read_again: 1089 mutex_enter(&call->call_lock); 1090 interrupted = 0; 1091 if (call->call_status == RPC_TIMEDOUT) { 1092 /* 1093 * Indicate that the lwp is not to be stopped while waiting 1094 * for this network traffic. This is to avoid deadlock while 1095 * debugging a process via /proc and also to avoid recursive 1096 * mutex_enter()s due to NFS page faults while stopping 1097 * (NFS holds locks when it calls here). 1098 */ 1099 clock_t cv_wait_ret; 1100 clock_t timout; 1101 clock_t oldlbolt; 1102 1103 klwp_t *lwp = ttolwp(curthread); 1104 1105 if (lwp != NULL) 1106 lwp->lwp_nostop++; 1107 1108 oldlbolt = lbolt; 1109 timout = wait.tv_sec * drv_usectohz(1000000) + 1110 drv_usectohz(wait.tv_usec) + oldlbolt; 1111 /* 1112 * Iterate until the call_status is changed to something 1113 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns 1114 * something <=0 zero. The latter means that we timed 1115 * out. 1116 */ 1117 if (h->cl_nosignal) 1118 while ((cv_wait_ret = cv_timedwait(&call->call_cv, 1119 &call->call_lock, timout)) > 0 && 1120 call->call_status == RPC_TIMEDOUT); 1121 else 1122 while ((cv_wait_ret = cv_timedwait_sig( 1123 &call->call_cv, 1124 &call->call_lock, timout)) > 0 && 1125 call->call_status == RPC_TIMEDOUT); 1126 1127 switch (cv_wait_ret) { 1128 case 0: 1129 /* 1130 * If we got out of the above loop with 1131 * cv_timedwait_sig() returning 0, then we were 1132 * interrupted regardless what call_status is. 1133 */ 1134 interrupted = 1; 1135 break; 1136 case -1: 1137 /* cv_timedwait_sig() timed out */ 1138 break; 1139 default: 1140 1141 /* 1142 * We were cv_signaled(). If we didn't 1143 * get a successful call_status and returned 1144 * before time expired, delay up to clnt_cots_min_tout 1145 * seconds so that the caller doesn't immediately 1146 * try to call us again and thus force the 1147 * same condition that got us here (such 1148 * as a RPC_XPRTFAILED due to the server not 1149 * listening on the end-point. 1150 */ 1151 if (call->call_status != RPC_SUCCESS) { 1152 clock_t curlbolt; 1153 clock_t diff; 1154 1155 curlbolt = ddi_get_lbolt(); 1156 ticks = clnt_cots_min_tout * 1157 drv_usectohz(1000000); 1158 diff = curlbolt - oldlbolt; 1159 if (diff < ticks) { 1160 delay_first = TRUE; 1161 if (diff > 0) 1162 ticks -= diff; 1163 } 1164 } 1165 break; 1166 } 1167 1168 if (lwp != NULL) 1169 lwp->lwp_nostop--; 1170 } 1171 /* 1172 * Get the reply message, if any. This will be freed at the end 1173 * whether or not an error occurred. 1174 */ 1175 mp = call->call_reply; 1176 call->call_reply = NULL; 1177 1178 /* 1179 * call_err is the error info when the call is on dispatch queue. 1180 * cku_err is the error info returned to the caller. 1181 * Sync cku_err with call_err for local message processing. 1182 */ 1183 1184 status = call->call_status; 1185 p->cku_err = call->call_err; 1186 mutex_exit(&call->call_lock); 1187 1188 if (status != RPC_SUCCESS) { 1189 switch (status) { 1190 case RPC_TIMEDOUT: 1191 if (interrupted) { 1192 COTSRCSTAT_INCR(p->cku_stats, rcintrs); 1193 p->cku_err.re_status = RPC_INTR; 1194 p->cku_err.re_errno = EINTR; 1195 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x", 1196 p->cku_xid); 1197 RPCLOG(1, "signal interrupted at %ld", lbolt); 1198 RPCLOG(1, ", was sent at %ld\n", time_sent); 1199 } else { 1200 COTSRCSTAT_INCR(p->cku_stats, rctimeouts); 1201 p->cku_err.re_errno = ETIMEDOUT; 1202 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld", 1203 lbolt); 1204 RPCLOG(1, ", was sent at %ld\n", time_sent); 1205 } 1206 break; 1207 1208 case RPC_XPRTFAILED: 1209 if (p->cku_err.re_errno == 0) 1210 p->cku_err.re_errno = EIO; 1211 1212 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n", 1213 p->cku_err.re_errno); 1214 break; 1215 1216 case RPC_SYSTEMERROR: 1217 ASSERT(p->cku_err.re_errno); 1218 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n", 1219 p->cku_err.re_errno); 1220 break; 1221 1222 default: 1223 p->cku_err.re_status = RPC_SYSTEMERROR; 1224 p->cku_err.re_errno = EIO; 1225 RPCLOG(1, "clnt_cots_kcallit: error: %s\n", 1226 clnt_sperrno(status)); 1227 break; 1228 } 1229 if (p->cku_err.re_status != RPC_TIMEDOUT) { 1230 1231 if (p->cku_flags & CKU_ONQUEUE) { 1232 call_table_remove(call); 1233 p->cku_flags &= ~CKU_ONQUEUE; 1234 } 1235 1236 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x " 1237 "taken off dispatch list\n", p->cku_xid); 1238 if (call->call_reply) { 1239 freemsg(call->call_reply); 1240 call->call_reply = NULL; 1241 } 1242 } else if (wait.tv_sec != 0) { 1243 /* 1244 * We've sent the request over TCP and so we have 1245 * every reason to believe it will get 1246 * delivered. In which case returning a timeout is not 1247 * appropriate. 1248 */ 1249 if (p->cku_progress == TRUE && 1250 p->cku_recv_attempts < clnt_cots_maxrecv) { 1251 p->cku_err.re_status = RPC_INPROGRESS; 1252 } 1253 } 1254 goto cots_done; 1255 } 1256 1257 xdrs = &p->cku_inxdr; 1258 xdrmblk_init(xdrs, mp, XDR_DECODE, 0); 1259 1260 reply_msg.rm_direction = REPLY; 1261 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1262 reply_msg.acpted_rply.ar_stat = SUCCESS; 1263 1264 reply_msg.acpted_rply.ar_verf = _null_auth; 1265 /* 1266 * xdr_results will be done in AUTH_UNWRAP. 1267 */ 1268 reply_msg.acpted_rply.ar_results.where = NULL; 1269 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1270 1271 if (xdr_replymsg(xdrs, &reply_msg)) { 1272 enum clnt_stat re_status; 1273 1274 _seterr_reply(&reply_msg, &p->cku_err); 1275 1276 re_status = p->cku_err.re_status; 1277 if (re_status == RPC_SUCCESS) { 1278 /* 1279 * Reply is good, check auth. 1280 */ 1281 if (!AUTH_VALIDATE(h->cl_auth, 1282 &reply_msg.acpted_rply.ar_verf)) { 1283 COTSRCSTAT_INCR(p->cku_stats, rcbadverfs); 1284 RPCLOG0(1, "clnt_cots_kcallit: validation " 1285 "failure\n"); 1286 freemsg(mp); 1287 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1288 mutex_enter(&call->call_lock); 1289 if (call->call_reply == NULL) 1290 call->call_status = RPC_TIMEDOUT; 1291 mutex_exit(&call->call_lock); 1292 goto read_again; 1293 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs, 1294 xdr_results, resultsp)) { 1295 RPCLOG0(1, "clnt_cots_kcallit: validation " 1296 "failure (unwrap)\n"); 1297 p->cku_err.re_status = RPC_CANTDECODERES; 1298 p->cku_err.re_errno = EIO; 1299 } 1300 } else { 1301 /* set errno in case we can't recover */ 1302 if (re_status != RPC_VERSMISMATCH && 1303 re_status != RPC_AUTHERROR && 1304 re_status != RPC_PROGVERSMISMATCH) 1305 p->cku_err.re_errno = EIO; 1306 1307 if (re_status == RPC_AUTHERROR) { 1308 /* 1309 * Maybe our credential need to be refreshed 1310 */ 1311 if (cm_entry) { 1312 /* 1313 * There is the potential that the 1314 * cm_entry has/will be marked dead, 1315 * so drop the connection altogether, 1316 * force REFRESH to establish new 1317 * connection. 1318 */ 1319 connmgr_cancelconn(cm_entry); 1320 cm_entry = NULL; 1321 } 1322 1323 if ((refreshes > 0) && 1324 AUTH_REFRESH(h->cl_auth, &reply_msg, 1325 p->cku_cred)) { 1326 refreshes--; 1327 (void) xdr_rpc_free_verifier(xdrs, 1328 &reply_msg); 1329 freemsg(mp); 1330 mp = NULL; 1331 1332 if (p->cku_flags & CKU_ONQUEUE) { 1333 call_table_remove(call); 1334 p->cku_flags &= ~CKU_ONQUEUE; 1335 } 1336 1337 RPCLOG(64, 1338 "clnt_cots_kcallit: AUTH_ERROR, xid" 1339 " 0x%x removed off dispatch list\n", 1340 p->cku_xid); 1341 if (call->call_reply) { 1342 freemsg(call->call_reply); 1343 call->call_reply = NULL; 1344 } 1345 1346 COTSRCSTAT_INCR(p->cku_stats, 1347 rcbadcalls); 1348 COTSRCSTAT_INCR(p->cku_stats, 1349 rcnewcreds); 1350 goto call_again; 1351 } 1352 1353 /* 1354 * We have used the client handle to 1355 * do an AUTH_REFRESH and the RPC status may 1356 * be set to RPC_SUCCESS; Let's make sure to 1357 * set it to RPC_AUTHERROR. 1358 */ 1359 p->cku_err.re_status = RPC_AUTHERROR; 1360 1361 /* 1362 * Map recoverable and unrecoverable 1363 * authentication errors to appropriate errno 1364 */ 1365 switch (p->cku_err.re_why) { 1366 case AUTH_TOOWEAK: 1367 /* 1368 * This could be a failure where the 1369 * server requires use of a reserved 1370 * port, check and optionally set the 1371 * client handle useresvport trying 1372 * one more time. Next go round we 1373 * fall out with the tooweak error. 1374 */ 1375 if (p->cku_useresvport != 1) { 1376 p->cku_useresvport = 1; 1377 p->cku_xid = 0; 1378 (void) xdr_rpc_free_verifier 1379 (xdrs, &reply_msg); 1380 freemsg(mp); 1381 goto call_again; 1382 } 1383 /* FALLTHRU */ 1384 case AUTH_BADCRED: 1385 case AUTH_BADVERF: 1386 case AUTH_INVALIDRESP: 1387 case AUTH_FAILED: 1388 case RPCSEC_GSS_NOCRED: 1389 case RPCSEC_GSS_FAILED: 1390 p->cku_err.re_errno = EACCES; 1391 break; 1392 case AUTH_REJECTEDCRED: 1393 case AUTH_REJECTEDVERF: 1394 default: p->cku_err.re_errno = EIO; 1395 break; 1396 } 1397 RPCLOG(1, "clnt_cots_kcallit : authentication" 1398 " failed with RPC_AUTHERROR of type %d\n", 1399 (int)p->cku_err.re_why); 1400 } 1401 } 1402 } else { 1403 /* reply didn't decode properly. */ 1404 p->cku_err.re_status = RPC_CANTDECODERES; 1405 p->cku_err.re_errno = EIO; 1406 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n"); 1407 } 1408 1409 (void) xdr_rpc_free_verifier(xdrs, &reply_msg); 1410 1411 if (p->cku_flags & CKU_ONQUEUE) { 1412 call_table_remove(call); 1413 p->cku_flags &= ~CKU_ONQUEUE; 1414 } 1415 1416 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list", 1417 p->cku_xid); 1418 RPCLOG(64, " status is %s\n", clnt_sperrno(p->cku_err.re_status)); 1419 cots_done: 1420 if (cm_entry) 1421 connmgr_release(cm_entry); 1422 1423 if (mp != NULL) 1424 freemsg(mp); 1425 if ((p->cku_flags & CKU_ONQUEUE) == 0 && call->call_reply) { 1426 freemsg(call->call_reply); 1427 call->call_reply = NULL; 1428 } 1429 if (p->cku_err.re_status != RPC_SUCCESS) { 1430 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n"); 1431 COTSRCSTAT_INCR(p->cku_stats, rcbadcalls); 1432 } 1433 1434 /* 1435 * No point in delaying if the zone is going away. 1436 */ 1437 if (delay_first == TRUE && 1438 !(zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)) { 1439 if (clnt_delay(ticks, h->cl_nosignal) == EINTR) { 1440 p->cku_err.re_errno = EINTR; 1441 p->cku_err.re_status = RPC_INTR; 1442 } 1443 } 1444 return (p->cku_err.re_status); 1445 } 1446 1447 /* 1448 * Kinit routine for cots. This sets up the correct operations in 1449 * the client handle, as the handle may have previously been a clts 1450 * handle, and clears the xid field so there is no way a new call 1451 * could be mistaken for a retry. It also sets in the handle the 1452 * information that is passed at create/kinit time but needed at 1453 * call time, as cots creates the transport at call time - device, 1454 * address of the server, protocol family. 1455 */ 1456 void 1457 clnt_cots_kinit(CLIENT *h, dev_t dev, int family, struct netbuf *addr, 1458 int max_msgsize, cred_t *cred) 1459 { 1460 /* LINTED pointer alignment */ 1461 cku_private_t *p = htop(h); 1462 calllist_t *call = &p->cku_call; 1463 1464 h->cl_ops = &tcp_ops; 1465 if (p->cku_flags & CKU_ONQUEUE) { 1466 call_table_remove(call); 1467 p->cku_flags &= ~CKU_ONQUEUE; 1468 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from" 1469 " dispatch list\n", p->cku_xid); 1470 } 1471 1472 if (call->call_reply != NULL) { 1473 freemsg(call->call_reply); 1474 call->call_reply = NULL; 1475 } 1476 1477 call->call_bucket = NULL; 1478 call->call_hash = 0; 1479 1480 /* 1481 * We don't clear cku_flags here, because clnt_cots_kcallit() 1482 * takes care of handling the cku_flags reset. 1483 */ 1484 p->cku_xid = 0; 1485 p->cku_device = dev; 1486 p->cku_addrfmly = family; 1487 p->cku_cred = cred; 1488 1489 if (p->cku_addr.maxlen < addr->len) { 1490 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 1491 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 1492 p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP); 1493 p->cku_addr.maxlen = addr->maxlen; 1494 } 1495 1496 p->cku_addr.len = addr->len; 1497 bcopy(addr->buf, p->cku_addr.buf, addr->len); 1498 1499 /* 1500 * If the current sanity check size in rpcmod is smaller 1501 * than the size needed, then increase the sanity check. 1502 */ 1503 if (max_msgsize != 0 && clnt_max_msg_sizep != NULL && 1504 max_msgsize > *clnt_max_msg_sizep) { 1505 mutex_enter(&clnt_max_msg_lock); 1506 if (max_msgsize > *clnt_max_msg_sizep) 1507 *clnt_max_msg_sizep = max_msgsize; 1508 mutex_exit(&clnt_max_msg_lock); 1509 } 1510 } 1511 1512 /* 1513 * ksettimers is a no-op for cots, with the exception of setting the xid. 1514 */ 1515 /* ARGSUSED */ 1516 static int 1517 clnt_cots_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1518 int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg, 1519 uint32_t xid) 1520 { 1521 /* LINTED pointer alignment */ 1522 cku_private_t *p = htop(h); 1523 1524 if (xid) 1525 p->cku_xid = xid; 1526 COTSRCSTAT_INCR(p->cku_stats, rctimers); 1527 return (0); 1528 } 1529 1530 extern void rpc_poptimod(struct vnode *); 1531 extern int kstr_push(struct vnode *, char *); 1532 1533 int 1534 conn_kstat_update(kstat_t *ksp, int rw) 1535 { 1536 struct cm_xprt *cm_entry; 1537 struct cm_kstat_xprt *cm_ksp_data; 1538 uchar_t *b; 1539 char *fbuf; 1540 1541 if (rw == KSTAT_WRITE) 1542 return (EACCES); 1543 if (ksp == NULL || ksp->ks_private == NULL) 1544 return (EIO); 1545 cm_entry = (struct cm_xprt *)ksp->ks_private; 1546 cm_ksp_data = (struct cm_kstat_xprt *)ksp->ks_data; 1547 1548 cm_ksp_data->x_wq.value.ui32 = (uint32_t)(uintptr_t)cm_entry->x_wq; 1549 cm_ksp_data->x_family.value.ui32 = cm_entry->x_family; 1550 cm_ksp_data->x_rdev.value.ui32 = (uint32_t)cm_entry->x_rdev; 1551 cm_ksp_data->x_time.value.ui32 = cm_entry->x_time; 1552 cm_ksp_data->x_ref.value.ui32 = cm_entry->x_ref; 1553 cm_ksp_data->x_state.value.ui32 = cm_entry->x_state_flags; 1554 1555 if (cm_entry->x_server.buf) { 1556 fbuf = cm_ksp_data->x_server.value.str.addr.ptr; 1557 if (cm_entry->x_family == AF_INET && 1558 cm_entry->x_server.len == 1559 sizeof (struct sockaddr_in)) { 1560 struct sockaddr_in *sa; 1561 sa = (struct sockaddr_in *) 1562 cm_entry->x_server.buf; 1563 b = (uchar_t *)&sa->sin_addr; 1564 (void) sprintf(fbuf, 1565 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1566 b[2] & 0xFF, b[3] & 0xFF); 1567 cm_ksp_data->x_port.value.ui32 = 1568 (uint32_t)sa->sin_port; 1569 } else if (cm_entry->x_family == AF_INET6 && 1570 cm_entry->x_server.len >= 1571 sizeof (struct sockaddr_in6)) { 1572 /* extract server IP address & port */ 1573 struct sockaddr_in6 *sin6; 1574 sin6 = (struct sockaddr_in6 *)cm_entry->x_server.buf; 1575 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, fbuf, 1576 INET6_ADDRSTRLEN); 1577 cm_ksp_data->x_port.value.ui32 = sin6->sin6_port; 1578 } else { 1579 struct sockaddr_in *sa; 1580 1581 sa = (struct sockaddr_in *)cm_entry->x_server.buf; 1582 b = (uchar_t *)&sa->sin_addr; 1583 (void) sprintf(fbuf, 1584 "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, 1585 b[2] & 0xFF, b[3] & 0xFF); 1586 } 1587 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data->x_server) = 1588 strlen(fbuf) + 1; 1589 } 1590 1591 return (0); 1592 } 1593 1594 1595 /* 1596 * We want a version of delay which is interruptible by a UNIX signal 1597 * Return EINTR if an interrupt occured. 1598 */ 1599 static int 1600 clnt_delay(clock_t ticks, bool_t nosignal) 1601 { 1602 if (nosignal == TRUE) { 1603 delay(ticks); 1604 return (0); 1605 } 1606 return (delay_sig(ticks)); 1607 } 1608 1609 /* 1610 * Wait for a connection until a timeout, or until we are 1611 * signalled that there has been a connection state change. 1612 */ 1613 static enum clnt_stat 1614 connmgr_cwait(struct cm_xprt *cm_entry, const struct timeval *waitp, 1615 bool_t nosignal) 1616 { 1617 bool_t interrupted; 1618 clock_t timout, cv_stat; 1619 enum clnt_stat clstat; 1620 unsigned int old_state; 1621 1622 ASSERT(MUTEX_HELD(&connmgr_lock)); 1623 /* 1624 * We wait for the transport connection to be made, or an 1625 * indication that it could not be made. 1626 */ 1627 clstat = RPC_TIMEDOUT; 1628 interrupted = FALSE; 1629 1630 old_state = cm_entry->x_state_flags; 1631 /* 1632 * Now loop until cv_timedwait{_sig} returns because of 1633 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be 1634 * cv_signalled for various other reasons too. So loop 1635 * until there is a state change on the connection. 1636 */ 1637 1638 timout = waitp->tv_sec * drv_usectohz(1000000) + 1639 drv_usectohz(waitp->tv_usec) + lbolt; 1640 1641 if (nosignal) { 1642 while ((cv_stat = cv_timedwait(&cm_entry->x_conn_cv, 1643 &connmgr_lock, timout)) > 0 && 1644 cm_entry->x_state_flags == old_state) 1645 ; 1646 } else { 1647 while ((cv_stat = cv_timedwait_sig(&cm_entry->x_conn_cv, 1648 &connmgr_lock, timout)) > 0 && 1649 cm_entry->x_state_flags == old_state) 1650 ; 1651 1652 if (cv_stat == 0) /* got intr signal? */ 1653 interrupted = TRUE; 1654 } 1655 1656 if ((cm_entry->x_state_flags & (X_BADSTATES|X_CONNECTED)) == 1657 X_CONNECTED) { 1658 clstat = RPC_SUCCESS; 1659 } else { 1660 if (interrupted == TRUE) 1661 clstat = RPC_INTR; 1662 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n", 1663 clnt_sperrno(clstat)); 1664 } 1665 1666 return (clstat); 1667 } 1668 1669 /* 1670 * Primary interface for how RPC grabs a connection. 1671 */ 1672 static struct cm_xprt * 1673 connmgr_wrapget( 1674 struct netbuf *retryaddr, 1675 const struct timeval *waitp, 1676 cku_private_t *p) 1677 { 1678 struct cm_xprt *cm_entry; 1679 1680 cm_entry = connmgr_get(retryaddr, waitp, &p->cku_addr, p->cku_addrfmly, 1681 &p->cku_srcaddr, &p->cku_err, p->cku_device, 1682 p->cku_client.cl_nosignal, p->cku_useresvport); 1683 1684 if (cm_entry == NULL) { 1685 /* 1686 * Re-map the call status to RPC_INTR if the err code is 1687 * EINTR. This can happen if calls status is RPC_TLIERROR. 1688 * However, don't re-map if signalling has been turned off. 1689 * XXX Really need to create a separate thread whenever 1690 * there isn't an existing connection. 1691 */ 1692 if (p->cku_err.re_errno == EINTR) { 1693 if (p->cku_client.cl_nosignal == TRUE) 1694 p->cku_err.re_errno = EIO; 1695 else 1696 p->cku_err.re_status = RPC_INTR; 1697 } 1698 } 1699 1700 return (cm_entry); 1701 } 1702 1703 /* 1704 * Obtains a transport to the server specified in addr. If a suitable transport 1705 * does not already exist in the list of cached transports, a new connection 1706 * is created, connected, and added to the list. The connection is for sending 1707 * only - the reply message may come back on another transport connection. 1708 */ 1709 static struct cm_xprt * 1710 connmgr_get( 1711 struct netbuf *retryaddr, 1712 const struct timeval *waitp, /* changed to a ptr to converse stack */ 1713 struct netbuf *destaddr, 1714 int addrfmly, 1715 struct netbuf *srcaddr, 1716 struct rpc_err *rpcerr, 1717 dev_t device, 1718 bool_t nosignal, 1719 int useresvport) 1720 { 1721 struct cm_xprt *cm_entry; 1722 struct cm_xprt *lru_entry; 1723 struct cm_xprt **cmp; 1724 queue_t *wq; 1725 TIUSER *tiptr; 1726 int i; 1727 int retval; 1728 clock_t prev_time; 1729 int tidu_size; 1730 bool_t connected; 1731 zoneid_t zoneid = getzoneid(); 1732 1733 /* 1734 * If the call is not a retry, look for a transport entry that 1735 * goes to the server of interest. 1736 */ 1737 mutex_enter(&connmgr_lock); 1738 1739 if (retryaddr == NULL) { 1740 use_new_conn: 1741 i = 0; 1742 cm_entry = lru_entry = NULL; 1743 prev_time = lbolt; 1744 1745 cmp = &cm_hd; 1746 while ((cm_entry = *cmp) != NULL) { 1747 ASSERT(cm_entry != cm_entry->x_next); 1748 /* 1749 * Garbage collect conections that are marked 1750 * for needs disconnect. 1751 */ 1752 if (cm_entry->x_needdis) { 1753 CONN_HOLD(cm_entry); 1754 connmgr_dis_and_wait(cm_entry); 1755 connmgr_release(cm_entry); 1756 /* 1757 * connmgr_lock could have been 1758 * dropped for the disconnect 1759 * processing so start over. 1760 */ 1761 goto use_new_conn; 1762 } 1763 1764 /* 1765 * Garbage collect the dead connections that have 1766 * no threads working on them. 1767 */ 1768 if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) == 1769 X_DEAD) { 1770 *cmp = cm_entry->x_next; 1771 mutex_exit(&connmgr_lock); 1772 connmgr_close(cm_entry); 1773 mutex_enter(&connmgr_lock); 1774 goto use_new_conn; 1775 } 1776 1777 1778 if ((cm_entry->x_state_flags & X_BADSTATES) == 0 && 1779 cm_entry->x_zoneid == zoneid && 1780 cm_entry->x_rdev == device && 1781 destaddr->len == cm_entry->x_server.len && 1782 bcmp(destaddr->buf, cm_entry->x_server.buf, 1783 destaddr->len) == 0) { 1784 /* 1785 * If the matching entry isn't connected, 1786 * attempt to reconnect it. 1787 */ 1788 if (cm_entry->x_connected == FALSE) { 1789 /* 1790 * We don't go through trying 1791 * to find the least recently 1792 * used connected because 1793 * connmgr_reconnect() briefly 1794 * dropped the connmgr_lock, 1795 * allowing a window for our 1796 * accounting to be messed up. 1797 * In any case, a re-connected 1798 * connection is as good as 1799 * a LRU connection. 1800 */ 1801 return (connmgr_wrapconnect(cm_entry, 1802 waitp, destaddr, addrfmly, srcaddr, 1803 rpcerr, TRUE, nosignal)); 1804 } 1805 i++; 1806 if (cm_entry->x_time - prev_time <= 0 || 1807 lru_entry == NULL) { 1808 prev_time = cm_entry->x_time; 1809 lru_entry = cm_entry; 1810 } 1811 } 1812 cmp = &cm_entry->x_next; 1813 } 1814 1815 if (i > clnt_max_conns) { 1816 RPCLOG(8, "connmgr_get: too many conns, dooming entry" 1817 " %p\n", (void *)lru_entry->x_tiptr); 1818 lru_entry->x_doomed = TRUE; 1819 goto use_new_conn; 1820 } 1821 1822 /* 1823 * If we are at the maximum number of connections to 1824 * the server, hand back the least recently used one. 1825 */ 1826 if (i == clnt_max_conns) { 1827 /* 1828 * Copy into the handle the source address of 1829 * the connection, which we will use in case of 1830 * a later retry. 1831 */ 1832 if (srcaddr->len != lru_entry->x_src.len) { 1833 if (srcaddr->len > 0) 1834 kmem_free(srcaddr->buf, 1835 srcaddr->maxlen); 1836 srcaddr->buf = kmem_zalloc( 1837 lru_entry->x_src.len, KM_SLEEP); 1838 srcaddr->maxlen = srcaddr->len = 1839 lru_entry->x_src.len; 1840 } 1841 bcopy(lru_entry->x_src.buf, srcaddr->buf, srcaddr->len); 1842 RPCLOG(2, "connmgr_get: call going out on %p\n", 1843 (void *)lru_entry); 1844 lru_entry->x_time = lbolt; 1845 CONN_HOLD(lru_entry); 1846 mutex_exit(&connmgr_lock); 1847 return (lru_entry); 1848 } 1849 1850 } else { 1851 /* 1852 * This is the retry case (retryaddr != NULL). Retries must 1853 * be sent on the same source port as the original call. 1854 */ 1855 1856 /* 1857 * Walk the list looking for a connection with a source address 1858 * that matches the retry address. 1859 */ 1860 cmp = &cm_hd; 1861 while ((cm_entry = *cmp) != NULL) { 1862 ASSERT(cm_entry != cm_entry->x_next); 1863 if (zoneid != cm_entry->x_zoneid || 1864 device != cm_entry->x_rdev || 1865 retryaddr->len != cm_entry->x_src.len || 1866 bcmp(retryaddr->buf, cm_entry->x_src.buf, 1867 retryaddr->len) != 0) { 1868 cmp = &cm_entry->x_next; 1869 continue; 1870 } 1871 1872 /* 1873 * Sanity check: if the connection with our source 1874 * port is going to some other server, something went 1875 * wrong, as we never delete connections (i.e. release 1876 * ports) unless they have been idle. In this case, 1877 * it is probably better to send the call out using 1878 * a new source address than to fail it altogether, 1879 * since that port may never be released. 1880 */ 1881 if (destaddr->len != cm_entry->x_server.len || 1882 bcmp(destaddr->buf, cm_entry->x_server.buf, 1883 destaddr->len) != 0) { 1884 RPCLOG(1, "connmgr_get: tiptr %p" 1885 " is going to a different server" 1886 " with the port that belongs" 1887 " to us!\n", (void *)cm_entry->x_tiptr); 1888 retryaddr = NULL; 1889 goto use_new_conn; 1890 } 1891 1892 /* 1893 * If the connection of interest is not connected and we 1894 * can't reconnect it, then the server is probably 1895 * still down. Return NULL to the caller and let it 1896 * retry later if it wants to. We have a delay so the 1897 * machine doesn't go into a tight retry loop. If the 1898 * entry was already connected, or the reconnected was 1899 * successful, return this entry. 1900 */ 1901 if (cm_entry->x_connected == FALSE) { 1902 return (connmgr_wrapconnect(cm_entry, 1903 waitp, destaddr, addrfmly, NULL, 1904 rpcerr, TRUE, nosignal)); 1905 } else { 1906 CONN_HOLD(cm_entry); 1907 1908 cm_entry->x_time = lbolt; 1909 mutex_exit(&connmgr_lock); 1910 RPCLOG(2, "connmgr_get: found old " 1911 "transport %p for retry\n", 1912 (void *)cm_entry); 1913 return (cm_entry); 1914 } 1915 } 1916 1917 /* 1918 * We cannot find an entry in the list for this retry. 1919 * Either the entry has been removed temporarily to be 1920 * reconnected by another thread, or the original call 1921 * got a port but never got connected, 1922 * and hence the transport never got put in the 1923 * list. Fall through to the "create new connection" code - 1924 * the former case will fail there trying to rebind the port, 1925 * and the later case (and any other pathological cases) will 1926 * rebind and reconnect and not hang the client machine. 1927 */ 1928 RPCLOG0(8, "connmgr_get: no entry in list for retry\n"); 1929 } 1930 /* 1931 * Set up a transport entry in the connection manager's list. 1932 */ 1933 cm_entry = (struct cm_xprt *) 1934 kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP); 1935 1936 cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP); 1937 bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len); 1938 cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len; 1939 1940 cm_entry->x_state_flags = X_THREAD; 1941 cm_entry->x_ref = 1; 1942 cm_entry->x_family = addrfmly; 1943 cm_entry->x_rdev = device; 1944 cm_entry->x_zoneid = zoneid; 1945 mutex_init(&cm_entry->x_lock, NULL, MUTEX_DEFAULT, NULL); 1946 cv_init(&cm_entry->x_cv, NULL, CV_DEFAULT, NULL); 1947 cv_init(&cm_entry->x_conn_cv, NULL, CV_DEFAULT, NULL); 1948 cv_init(&cm_entry->x_dis_cv, NULL, CV_DEFAULT, NULL); 1949 1950 /* 1951 * Note that we add this partially initialized entry to the 1952 * connection list. This is so that we don't have connections to 1953 * the same server. 1954 * 1955 * Note that x_src is not initialized at this point. This is because 1956 * retryaddr might be NULL in which case x_src is whatever 1957 * t_kbind/bindresvport gives us. If another thread wants a 1958 * connection to the same server, seemingly we have an issue, but we 1959 * don't. If the other thread comes in with retryaddr == NULL, then it 1960 * will never look at x_src, and it will end up waiting in 1961 * connmgr_cwait() for the first thread to finish the connection 1962 * attempt. If the other thread comes in with retryaddr != NULL, then 1963 * that means there was a request sent on a connection, in which case 1964 * the the connection should already exist. Thus the first thread 1965 * never gets here ... it finds the connection it its server in the 1966 * connection list. 1967 * 1968 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd 1969 * thread will skip us because x_src.len == 0. 1970 */ 1971 cm_entry->x_next = cm_hd; 1972 cm_hd = cm_entry; 1973 mutex_exit(&connmgr_lock); 1974 1975 /* 1976 * Either we didn't find an entry to the server of interest, or we 1977 * don't have the maximum number of connections to that server - 1978 * create a new connection. 1979 */ 1980 RPCLOG0(8, "connmgr_get: creating new connection\n"); 1981 rpcerr->re_status = RPC_TLIERROR; 1982 1983 i = t_kopen(NULL, device, FREAD|FWRITE|FNDELAY, &tiptr, kcred); 1984 if (i) { 1985 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i); 1986 rpcerr->re_errno = i; 1987 connmgr_cancelconn(cm_entry); 1988 return (NULL); 1989 } 1990 rpc_poptimod(tiptr->fp->f_vnode); 1991 1992 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0, 1993 K_TO_K, kcred, &retval)) { 1994 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i); 1995 (void) t_kclose(tiptr, 1); 1996 rpcerr->re_errno = i; 1997 connmgr_cancelconn(cm_entry); 1998 return (NULL); 1999 } 2000 2001 if (i = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K, 2002 kcred, &retval)) { 2003 RPCLOG(1, "connmgr_get: can't set client status with cots " 2004 "module, %d\n", i); 2005 (void) t_kclose(tiptr, 1); 2006 rpcerr->re_errno = i; 2007 connmgr_cancelconn(cm_entry); 2008 return (NULL); 2009 } 2010 2011 mutex_enter(&connmgr_lock); 2012 2013 wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next; 2014 cm_entry->x_wq = wq; 2015 2016 mutex_exit(&connmgr_lock); 2017 2018 if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0, 2019 K_TO_K, kcred, &retval)) { 2020 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i); 2021 (void) t_kclose(tiptr, 1); 2022 rpcerr->re_errno = i; 2023 connmgr_cancelconn(cm_entry); 2024 return (NULL); 2025 } 2026 2027 /* 2028 * If the caller has not specified reserved port usage then 2029 * take the system default. 2030 */ 2031 if (useresvport == -1) 2032 useresvport = clnt_cots_do_bindresvport; 2033 2034 if ((useresvport || retryaddr != NULL) && 2035 (addrfmly == AF_INET || addrfmly == AF_INET6)) { 2036 bool_t alloc_src = FALSE; 2037 2038 if (srcaddr->len != destaddr->len) { 2039 kmem_free(srcaddr->buf, srcaddr->maxlen); 2040 srcaddr->buf = kmem_zalloc(destaddr->len, KM_SLEEP); 2041 srcaddr->maxlen = destaddr->len; 2042 srcaddr->len = destaddr->len; 2043 alloc_src = TRUE; 2044 } 2045 2046 if ((i = bindresvport(tiptr, retryaddr, srcaddr, TRUE)) != 0) { 2047 (void) t_kclose(tiptr, 1); 2048 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: " 2049 "%p\n", (void *)retryaddr); 2050 2051 /* 2052 * 1225408: If we allocated a source address, then it 2053 * is either garbage or all zeroes. In that case 2054 * we need to clear srcaddr. 2055 */ 2056 if (alloc_src == TRUE) { 2057 kmem_free(srcaddr->buf, srcaddr->maxlen); 2058 srcaddr->maxlen = srcaddr->len = 0; 2059 srcaddr->buf = NULL; 2060 } 2061 rpcerr->re_errno = i; 2062 connmgr_cancelconn(cm_entry); 2063 return (NULL); 2064 } 2065 } else { 2066 if ((i = t_kbind(tiptr, NULL, NULL)) != 0) { 2067 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i); 2068 (void) t_kclose(tiptr, 1); 2069 rpcerr->re_errno = i; 2070 connmgr_cancelconn(cm_entry); 2071 return (NULL); 2072 } 2073 } 2074 2075 { 2076 /* 2077 * Keep the kernel stack lean. Don't move this call 2078 * declaration to the top of this function because a 2079 * call is declared in connmgr_wrapconnect() 2080 */ 2081 calllist_t call; 2082 2083 bzero(&call, sizeof (call)); 2084 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2085 2086 /* 2087 * This is a bound end-point so don't close it's stream. 2088 */ 2089 connected = connmgr_connect(cm_entry, wq, destaddr, addrfmly, 2090 &call, &tidu_size, FALSE, waitp, 2091 nosignal); 2092 *rpcerr = call.call_err; 2093 cv_destroy(&call.call_cv); 2094 2095 } 2096 2097 mutex_enter(&connmgr_lock); 2098 2099 /* 2100 * Set up a transport entry in the connection manager's list. 2101 */ 2102 cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP); 2103 bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len); 2104 cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len; 2105 2106 cm_entry->x_tiptr = tiptr; 2107 cm_entry->x_time = lbolt; 2108 2109 if (tiptr->tp_info.servtype == T_COTS_ORD) 2110 cm_entry->x_ordrel = TRUE; 2111 else 2112 cm_entry->x_ordrel = FALSE; 2113 2114 cm_entry->x_tidu_size = tidu_size; 2115 2116 if (cm_entry->x_early_disc) 2117 cm_entry->x_connected = FALSE; 2118 else 2119 cm_entry->x_connected = connected; 2120 2121 /* 2122 * There could be a discrepancy here such that 2123 * x_early_disc is TRUE yet connected is TRUE as well 2124 * and the connection is actually connected. In that case 2125 * lets be conservative and declare the connection as not 2126 * connected. 2127 */ 2128 cm_entry->x_early_disc = FALSE; 2129 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2130 cm_entry->x_ctime = lbolt; 2131 2132 /* 2133 * Notify any threads waiting that the connection attempt is done. 2134 */ 2135 cm_entry->x_thread = FALSE; 2136 cv_broadcast(&cm_entry->x_conn_cv); 2137 2138 mutex_exit(&connmgr_lock); 2139 2140 if (cm_entry->x_connected == FALSE) { 2141 connmgr_release(cm_entry); 2142 return (NULL); 2143 } 2144 return (cm_entry); 2145 } 2146 2147 /* 2148 * Keep the cm_xprt entry on the connecton list when making a connection. This 2149 * is to prevent multiple connections to a slow server from appearing. 2150 * We use the bit field x_thread to tell if a thread is doing a connection 2151 * which keeps other interested threads from messing with connection. 2152 * Those other threads just wait if x_thread is set. 2153 * 2154 * If x_thread is not set, then we do the actual work of connecting via 2155 * connmgr_connect(). 2156 * 2157 * mutex convention: called with connmgr_lock held, returns with it released. 2158 */ 2159 static struct cm_xprt * 2160 connmgr_wrapconnect( 2161 struct cm_xprt *cm_entry, 2162 const struct timeval *waitp, 2163 struct netbuf *destaddr, 2164 int addrfmly, 2165 struct netbuf *srcaddr, 2166 struct rpc_err *rpcerr, 2167 bool_t reconnect, 2168 bool_t nosignal) 2169 { 2170 ASSERT(MUTEX_HELD(&connmgr_lock)); 2171 /* 2172 * Hold this entry as we are about to drop connmgr_lock. 2173 */ 2174 CONN_HOLD(cm_entry); 2175 2176 /* 2177 * If there is a thread already making a connection for us, then 2178 * wait for it to complete the connection. 2179 */ 2180 if (cm_entry->x_thread == TRUE) { 2181 rpcerr->re_status = connmgr_cwait(cm_entry, waitp, nosignal); 2182 2183 if (rpcerr->re_status != RPC_SUCCESS) { 2184 mutex_exit(&connmgr_lock); 2185 connmgr_release(cm_entry); 2186 return (NULL); 2187 } 2188 } else { 2189 bool_t connected; 2190 calllist_t call; 2191 2192 cm_entry->x_thread = TRUE; 2193 2194 while (cm_entry->x_needrel == TRUE) { 2195 cm_entry->x_needrel = FALSE; 2196 2197 connmgr_sndrel(cm_entry); 2198 delay(drv_usectohz(1000000)); 2199 2200 mutex_enter(&connmgr_lock); 2201 } 2202 2203 /* 2204 * If we need to send a T_DISCON_REQ, send one. 2205 */ 2206 connmgr_dis_and_wait(cm_entry); 2207 2208 mutex_exit(&connmgr_lock); 2209 2210 bzero(&call, sizeof (call)); 2211 cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL); 2212 2213 connected = connmgr_connect(cm_entry, cm_entry->x_wq, 2214 destaddr, addrfmly, &call, 2215 &cm_entry->x_tidu_size, 2216 reconnect, waitp, nosignal); 2217 2218 *rpcerr = call.call_err; 2219 cv_destroy(&call.call_cv); 2220 2221 mutex_enter(&connmgr_lock); 2222 2223 2224 if (cm_entry->x_early_disc) 2225 cm_entry->x_connected = FALSE; 2226 else 2227 cm_entry->x_connected = connected; 2228 2229 /* 2230 * There could be a discrepancy here such that 2231 * x_early_disc is TRUE yet connected is TRUE as well 2232 * and the connection is actually connected. In that case 2233 * lets be conservative and declare the connection as not 2234 * connected. 2235 */ 2236 2237 cm_entry->x_early_disc = FALSE; 2238 cm_entry->x_needdis = (cm_entry->x_connected == FALSE); 2239 2240 2241 /* 2242 * connmgr_connect() may have given up before the connection 2243 * actually timed out. So ensure that before the next 2244 * connection attempt we do a disconnect. 2245 */ 2246 cm_entry->x_ctime = lbolt; 2247 cm_entry->x_thread = FALSE; 2248 2249 cv_broadcast(&cm_entry->x_conn_cv); 2250 2251 if (cm_entry->x_connected == FALSE) { 2252 mutex_exit(&connmgr_lock); 2253 connmgr_release(cm_entry); 2254 return (NULL); 2255 } 2256 } 2257 2258 if (srcaddr != NULL) { 2259 /* 2260 * Copy into the handle the 2261 * source address of the 2262 * connection, which we will use 2263 * in case of a later retry. 2264 */ 2265 if (srcaddr->len != cm_entry->x_src.len) { 2266 if (srcaddr->maxlen > 0) 2267 kmem_free(srcaddr->buf, srcaddr->maxlen); 2268 srcaddr->buf = kmem_zalloc(cm_entry->x_src.len, 2269 KM_SLEEP); 2270 srcaddr->maxlen = srcaddr->len = 2271 cm_entry->x_src.len; 2272 } 2273 bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len); 2274 } 2275 cm_entry->x_time = lbolt; 2276 mutex_exit(&connmgr_lock); 2277 return (cm_entry); 2278 } 2279 2280 /* 2281 * If we need to send a T_DISCON_REQ, send one. 2282 */ 2283 static void 2284 connmgr_dis_and_wait(struct cm_xprt *cm_entry) 2285 { 2286 ASSERT(MUTEX_HELD(&connmgr_lock)); 2287 for (;;) { 2288 while (cm_entry->x_needdis == TRUE) { 2289 RPCLOG(8, "connmgr_dis_and_wait: need " 2290 "T_DISCON_REQ for connection 0x%p\n", 2291 (void *)cm_entry); 2292 cm_entry->x_needdis = FALSE; 2293 cm_entry->x_waitdis = TRUE; 2294 2295 connmgr_snddis(cm_entry); 2296 2297 mutex_enter(&connmgr_lock); 2298 } 2299 2300 if (cm_entry->x_waitdis == TRUE) { 2301 clock_t curlbolt; 2302 clock_t timout; 2303 2304 RPCLOG(8, "connmgr_dis_and_wait waiting for " 2305 "T_DISCON_REQ's ACK for connection %p\n", 2306 (void *)cm_entry); 2307 curlbolt = ddi_get_lbolt(); 2308 2309 timout = clnt_cots_min_conntout * 2310 drv_usectohz(1000000) + curlbolt; 2311 2312 /* 2313 * The TPI spec says that the T_DISCON_REQ 2314 * will get acknowledged, but in practice 2315 * the ACK may never get sent. So don't 2316 * block forever. 2317 */ 2318 (void) cv_timedwait(&cm_entry->x_dis_cv, 2319 &connmgr_lock, timout); 2320 } 2321 /* 2322 * If we got the ACK, break. If we didn't, 2323 * then send another T_DISCON_REQ. 2324 */ 2325 if (cm_entry->x_waitdis == FALSE) { 2326 break; 2327 } else { 2328 RPCLOG(8, "connmgr_dis_and_wait: did" 2329 "not get T_DISCON_REQ's ACK for " 2330 "connection %p\n", (void *)cm_entry); 2331 cm_entry->x_needdis = TRUE; 2332 } 2333 } 2334 } 2335 2336 static void 2337 connmgr_cancelconn(struct cm_xprt *cm_entry) 2338 { 2339 /* 2340 * Mark the connection table entry as dead; the next thread that 2341 * goes through connmgr_release() will notice this and deal with it. 2342 */ 2343 mutex_enter(&connmgr_lock); 2344 cm_entry->x_dead = TRUE; 2345 2346 /* 2347 * Notify any threads waiting for the connection that it isn't 2348 * going to happen. 2349 */ 2350 cm_entry->x_thread = FALSE; 2351 cv_broadcast(&cm_entry->x_conn_cv); 2352 mutex_exit(&connmgr_lock); 2353 2354 connmgr_release(cm_entry); 2355 } 2356 2357 static void 2358 connmgr_close(struct cm_xprt *cm_entry) 2359 { 2360 mutex_enter(&cm_entry->x_lock); 2361 while (cm_entry->x_ref != 0) { 2362 /* 2363 * Must be a noninterruptible wait. 2364 */ 2365 cv_wait(&cm_entry->x_cv, &cm_entry->x_lock); 2366 } 2367 2368 if (cm_entry->x_tiptr != NULL) 2369 (void) t_kclose(cm_entry->x_tiptr, 1); 2370 2371 mutex_exit(&cm_entry->x_lock); 2372 if (cm_entry->x_ksp != NULL) { 2373 mutex_enter(&connmgr_lock); 2374 cm_entry->x_ksp->ks_private = NULL; 2375 mutex_exit(&connmgr_lock); 2376 2377 /* 2378 * Must free the buffer we allocated for the 2379 * server address in the update function 2380 */ 2381 if (((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2382 x_server.value.str.addr.ptr != NULL) 2383 kmem_free(((struct cm_kstat_xprt *)(cm_entry->x_ksp-> 2384 ks_data))->x_server.value.str.addr.ptr, 2385 INET6_ADDRSTRLEN); 2386 kmem_free(cm_entry->x_ksp->ks_data, 2387 cm_entry->x_ksp->ks_data_size); 2388 kstat_delete(cm_entry->x_ksp); 2389 } 2390 2391 mutex_destroy(&cm_entry->x_lock); 2392 cv_destroy(&cm_entry->x_cv); 2393 cv_destroy(&cm_entry->x_conn_cv); 2394 cv_destroy(&cm_entry->x_dis_cv); 2395 2396 if (cm_entry->x_server.buf != NULL) 2397 kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen); 2398 if (cm_entry->x_src.buf != NULL) 2399 kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen); 2400 kmem_free(cm_entry, sizeof (struct cm_xprt)); 2401 } 2402 2403 /* 2404 * Called by KRPC after sending the call message to release the connection 2405 * it was using. 2406 */ 2407 static void 2408 connmgr_release(struct cm_xprt *cm_entry) 2409 { 2410 mutex_enter(&cm_entry->x_lock); 2411 cm_entry->x_ref--; 2412 if (cm_entry->x_ref == 0) 2413 cv_signal(&cm_entry->x_cv); 2414 mutex_exit(&cm_entry->x_lock); 2415 } 2416 2417 /* 2418 * Given an open stream, connect to the remote. Returns true if connected, 2419 * false otherwise. 2420 */ 2421 static bool_t 2422 connmgr_connect( 2423 struct cm_xprt *cm_entry, 2424 queue_t *wq, 2425 struct netbuf *addr, 2426 int addrfmly, 2427 calllist_t *e, 2428 int *tidu_ptr, 2429 bool_t reconnect, 2430 const struct timeval *waitp, 2431 bool_t nosignal) 2432 { 2433 mblk_t *mp; 2434 struct T_conn_req *tcr; 2435 struct T_info_ack *tinfo; 2436 int interrupted, error; 2437 int tidu_size, kstat_instance; 2438 2439 /* if it's a reconnect, flush any lingering data messages */ 2440 if (reconnect) 2441 (void) putctl1(wq, M_FLUSH, FLUSHRW); 2442 2443 mp = allocb(sizeof (*tcr) + addr->len, BPRI_LO); 2444 if (mp == NULL) { 2445 /* 2446 * This is unfortunate, but we need to look up the stats for 2447 * this zone to increment the "memory allocation failed" 2448 * counter. curproc->p_zone is safe since we're initiating a 2449 * connection and not in some strange streams context. 2450 */ 2451 struct rpcstat *rpcstat; 2452 2453 rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone); 2454 ASSERT(rpcstat != NULL); 2455 2456 RPCLOG0(1, "connmgr_connect: cannot alloc mp for " 2457 "sending conn request\n"); 2458 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem); 2459 e->call_status = RPC_SYSTEMERROR; 2460 e->call_reason = ENOSR; 2461 return (FALSE); 2462 } 2463 2464 mp->b_datap->db_type = M_PROTO; 2465 tcr = (struct T_conn_req *)mp->b_rptr; 2466 bzero(tcr, sizeof (*tcr)); 2467 tcr->PRIM_type = T_CONN_REQ; 2468 tcr->DEST_length = addr->len; 2469 tcr->DEST_offset = sizeof (struct T_conn_req); 2470 mp->b_wptr = mp->b_rptr + sizeof (*tcr); 2471 2472 bcopy(addr->buf, mp->b_wptr, tcr->DEST_length); 2473 mp->b_wptr += tcr->DEST_length; 2474 2475 RPCLOG(8, "connmgr_connect: sending conn request on queue " 2476 "%p", (void *)wq); 2477 RPCLOG(8, " call %p\n", (void *)wq); 2478 /* 2479 * We use the entry in the handle that is normally used for 2480 * waiting for RPC replies to wait for the connection accept. 2481 */ 2482 clnt_dispatch_send(wq, mp, e, 0, 0); 2483 2484 mutex_enter(&clnt_pending_lock); 2485 2486 /* 2487 * We wait for the transport connection to be made, or an 2488 * indication that it could not be made. 2489 */ 2490 interrupted = 0; 2491 2492 /* 2493 * waitforack should have been called with T_OK_ACK, but the 2494 * present implementation needs to be passed T_INFO_ACK to 2495 * work correctly. 2496 */ 2497 error = waitforack(e, T_INFO_ACK, waitp, nosignal); 2498 if (error == EINTR) 2499 interrupted = 1; 2500 if (zone_status_get(curproc->p_zone) >= ZONE_IS_EMPTY) { 2501 /* 2502 * No time to lose; we essentially have been signaled to 2503 * quit. 2504 */ 2505 interrupted = 1; 2506 } 2507 #ifdef RPCDEBUG 2508 if (error == ETIME) 2509 RPCLOG0(8, "connmgr_connect: giving up " 2510 "on connection attempt; " 2511 "clnt_dispatch notifyconn " 2512 "diagnostic 'no one waiting for " 2513 "connection' should not be " 2514 "unexpected\n"); 2515 #endif 2516 if (e->call_prev) 2517 e->call_prev->call_next = e->call_next; 2518 else 2519 clnt_pending = e->call_next; 2520 if (e->call_next) 2521 e->call_next->call_prev = e->call_prev; 2522 mutex_exit(&clnt_pending_lock); 2523 2524 if (e->call_status != RPC_SUCCESS || error != 0) { 2525 if (interrupted) 2526 e->call_status = RPC_INTR; 2527 else if (error == ETIME) 2528 e->call_status = RPC_TIMEDOUT; 2529 else if (error == EPROTO) 2530 e->call_status = RPC_SYSTEMERROR; 2531 2532 RPCLOG(8, "connmgr_connect: can't connect, status: " 2533 "%s\n", clnt_sperrno(e->call_status)); 2534 2535 if (e->call_reply) { 2536 freemsg(e->call_reply); 2537 e->call_reply = NULL; 2538 } 2539 2540 return (FALSE); 2541 } 2542 /* 2543 * The result of the "connection accept" is a T_info_ack 2544 * in the call_reply field. 2545 */ 2546 ASSERT(e->call_reply != NULL); 2547 mp = e->call_reply; 2548 e->call_reply = NULL; 2549 tinfo = (struct T_info_ack *)mp->b_rptr; 2550 2551 tidu_size = tinfo->TIDU_size; 2552 tidu_size -= (tidu_size % BYTES_PER_XDR_UNIT); 2553 if (tidu_size > COTS_DEFAULT_ALLOCSIZE || (tidu_size <= 0)) 2554 tidu_size = COTS_DEFAULT_ALLOCSIZE; 2555 *tidu_ptr = tidu_size; 2556 2557 freemsg(mp); 2558 2559 /* 2560 * Set up the pertinent options. NODELAY is so the transport doesn't 2561 * buffer up RPC messages on either end. This may not be valid for 2562 * all transports. Failure to set this option is not cause to 2563 * bail out so we return success anyway. Note that lack of NODELAY 2564 * or some other way to flush the message on both ends will cause 2565 * lots of retries and terrible performance. 2566 */ 2567 if (addrfmly == AF_INET || addrfmly == AF_INET6) { 2568 (void) connmgr_setopt(wq, IPPROTO_TCP, TCP_NODELAY, e); 2569 if (e->call_status == RPC_XPRTFAILED) 2570 return (FALSE); 2571 } 2572 2573 /* 2574 * Since we have a connection, we now need to figure out if 2575 * we need to create a kstat. If x_ksp is not NULL then we 2576 * are reusing a connection and so we do not need to create 2577 * another kstat -- lets just return. 2578 */ 2579 if (cm_entry->x_ksp != NULL) 2580 return (TRUE); 2581 2582 /* 2583 * We need to increment rpc_kstat_instance atomically to prevent 2584 * two kstats being created with the same instance. 2585 */ 2586 kstat_instance = atomic_add_32_nv((uint32_t *)&rpc_kstat_instance, 1); 2587 2588 if ((cm_entry->x_ksp = kstat_create_zone("unix", kstat_instance, 2589 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED, 2590 (uint_t)(sizeof (cm_kstat_xprt_t) / sizeof (kstat_named_t)), 2591 KSTAT_FLAG_VIRTUAL, cm_entry->x_zoneid)) == NULL) { 2592 return (TRUE); 2593 } 2594 2595 cm_entry->x_ksp->ks_lock = &connmgr_lock; 2596 cm_entry->x_ksp->ks_private = cm_entry; 2597 cm_entry->x_ksp->ks_data_size = ((INET6_ADDRSTRLEN * sizeof (char)) 2598 + sizeof (cm_kstat_template)); 2599 cm_entry->x_ksp->ks_data = kmem_alloc(cm_entry->x_ksp->ks_data_size, 2600 KM_SLEEP); 2601 bcopy(&cm_kstat_template, cm_entry->x_ksp->ks_data, 2602 cm_entry->x_ksp->ks_data_size); 2603 ((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))-> 2604 x_server.value.str.addr.ptr = 2605 kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP); 2606 2607 cm_entry->x_ksp->ks_update = conn_kstat_update; 2608 kstat_install(cm_entry->x_ksp); 2609 return (TRUE); 2610 } 2611 2612 /* 2613 * Called by connmgr_connect to set an option on the new stream. 2614 */ 2615 static bool_t 2616 connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e) 2617 { 2618 mblk_t *mp; 2619 struct opthdr *opt; 2620 struct T_optmgmt_req *tor; 2621 struct timeval waitp; 2622 int error; 2623 2624 mp = allocb(sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2625 sizeof (int), BPRI_LO); 2626 if (mp == NULL) { 2627 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option " 2628 "request\n"); 2629 return (FALSE); 2630 } 2631 2632 mp->b_datap->db_type = M_PROTO; 2633 tor = (struct T_optmgmt_req *)(mp->b_rptr); 2634 tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 2635 tor->MGMT_flags = T_NEGOTIATE; 2636 tor->OPT_length = sizeof (struct opthdr) + sizeof (int); 2637 tor->OPT_offset = sizeof (struct T_optmgmt_req); 2638 2639 opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); 2640 opt->level = level; 2641 opt->name = name; 2642 opt->len = sizeof (int); 2643 *(int *)((char *)opt + sizeof (*opt)) = 1; 2644 mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + 2645 sizeof (int); 2646 2647 /* 2648 * We will use this connection regardless 2649 * of whether or not the option is settable. 2650 */ 2651 clnt_dispatch_send(wq, mp, e, 0, 0); 2652 mutex_enter(&clnt_pending_lock); 2653 2654 waitp.tv_sec = clnt_cots_min_conntout; 2655 waitp.tv_usec = 0; 2656 error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1); 2657 2658 if (e->call_prev) 2659 e->call_prev->call_next = e->call_next; 2660 else 2661 clnt_pending = e->call_next; 2662 if (e->call_next) 2663 e->call_next->call_prev = e->call_prev; 2664 mutex_exit(&clnt_pending_lock); 2665 2666 if (e->call_reply != NULL) { 2667 freemsg(e->call_reply); 2668 e->call_reply = NULL; 2669 } 2670 2671 if (e->call_status != RPC_SUCCESS || error != 0) { 2672 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name); 2673 return (FALSE); 2674 } 2675 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name); 2676 return (TRUE); 2677 } 2678 2679 #ifdef DEBUG 2680 2681 /* 2682 * This is a knob to let us force code coverage in allocation failure 2683 * case. 2684 */ 2685 static int connmgr_failsnd; 2686 #define CONN_SND_ALLOC(Size, Pri) \ 2687 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri)) 2688 2689 #else 2690 2691 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri) 2692 2693 #endif 2694 2695 /* 2696 * Sends an orderly release on the specified queue. 2697 * Entered with connmgr_lock. Exited without connmgr_lock 2698 */ 2699 static void 2700 connmgr_sndrel(struct cm_xprt *cm_entry) 2701 { 2702 struct T_ordrel_req *torr; 2703 mblk_t *mp; 2704 queue_t *q = cm_entry->x_wq; 2705 ASSERT(MUTEX_HELD(&connmgr_lock)); 2706 mp = CONN_SND_ALLOC(sizeof (struct T_ordrel_req), BPRI_LO); 2707 if (mp == NULL) { 2708 cm_entry->x_needrel = TRUE; 2709 mutex_exit(&connmgr_lock); 2710 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel " 2711 "to queue %p\n", (void *)q); 2712 return; 2713 } 2714 mutex_exit(&connmgr_lock); 2715 2716 mp->b_datap->db_type = M_PROTO; 2717 torr = (struct T_ordrel_req *)(mp->b_rptr); 2718 torr->PRIM_type = T_ORDREL_REQ; 2719 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_req); 2720 2721 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q); 2722 put(q, mp); 2723 } 2724 2725 /* 2726 * Sends an disconnect on the specified queue. 2727 * Entered with connmgr_lock. Exited without connmgr_lock 2728 */ 2729 static void 2730 connmgr_snddis(struct cm_xprt *cm_entry) 2731 { 2732 struct T_discon_req *tdis; 2733 mblk_t *mp; 2734 queue_t *q = cm_entry->x_wq; 2735 2736 ASSERT(MUTEX_HELD(&connmgr_lock)); 2737 mp = CONN_SND_ALLOC(sizeof (*tdis), BPRI_LO); 2738 if (mp == NULL) { 2739 cm_entry->x_needdis = TRUE; 2740 mutex_exit(&connmgr_lock); 2741 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon " 2742 "to queue %p\n", (void *)q); 2743 return; 2744 } 2745 mutex_exit(&connmgr_lock); 2746 2747 mp->b_datap->db_type = M_PROTO; 2748 tdis = (struct T_discon_req *)mp->b_rptr; 2749 tdis->PRIM_type = T_DISCON_REQ; 2750 mp->b_wptr = mp->b_rptr + sizeof (*tdis); 2751 2752 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q); 2753 put(q, mp); 2754 } 2755 2756 /* 2757 * Sets up the entry for receiving replies, and calls rpcmod's write put proc 2758 * (through put) to send the call. 2759 */ 2760 static void 2761 clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid, 2762 uint_t queue_flag) 2763 { 2764 ASSERT(e != NULL); 2765 2766 e->call_status = RPC_TIMEDOUT; /* optimistic, eh? */ 2767 e->call_reason = 0; 2768 e->call_wq = q; 2769 e->call_xid = xid; 2770 e->call_notified = FALSE; 2771 2772 /* 2773 * If queue_flag is set then the calllist_t is already on the hash 2774 * queue. In this case just send the message and return. 2775 */ 2776 if (queue_flag) { 2777 put(q, mp); 2778 return; 2779 } 2780 2781 /* 2782 * Set up calls for RPC requests (with XID != 0) on the hash 2783 * queue for fast lookups and place other calls (i.e. 2784 * connection management) on the linked list. 2785 */ 2786 if (xid != 0) { 2787 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on " 2788 "dispatch list\n", xid); 2789 e->call_hash = call_hash(xid, clnt_cots_hash_size); 2790 e->call_bucket = &cots_call_ht[e->call_hash]; 2791 call_table_enter(e); 2792 } else { 2793 mutex_enter(&clnt_pending_lock); 2794 if (clnt_pending) 2795 clnt_pending->call_prev = e; 2796 e->call_next = clnt_pending; 2797 e->call_prev = NULL; 2798 clnt_pending = e; 2799 mutex_exit(&clnt_pending_lock); 2800 } 2801 2802 put(q, mp); 2803 } 2804 2805 /* 2806 * Called by rpcmod to notify a client with a clnt_pending call that its reply 2807 * has arrived. If we can't find a client waiting for this reply, we log 2808 * the error and return. 2809 */ 2810 bool_t 2811 clnt_dispatch_notify(mblk_t *mp, zoneid_t zoneid) 2812 { 2813 calllist_t *e = NULL; 2814 call_table_t *chtp; 2815 uint32_t xid; 2816 uint_t hash; 2817 2818 if ((IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) && 2819 (mp->b_wptr - mp->b_rptr) >= sizeof (xid)) 2820 xid = *((uint32_t *)mp->b_rptr); 2821 else { 2822 int i = 0; 2823 unsigned char *p = (unsigned char *)&xid; 2824 unsigned char *rptr; 2825 mblk_t *tmp = mp; 2826 2827 /* 2828 * Copy the xid, byte-by-byte into xid. 2829 */ 2830 while (tmp) { 2831 rptr = tmp->b_rptr; 2832 while (rptr < tmp->b_wptr) { 2833 *p++ = *rptr++; 2834 if (++i >= sizeof (xid)) 2835 goto done_xid_copy; 2836 } 2837 tmp = tmp->b_cont; 2838 } 2839 2840 /* 2841 * If we got here, we ran out of mblk space before the 2842 * xid could be copied. 2843 */ 2844 ASSERT(tmp == NULL && i < sizeof (xid)); 2845 2846 RPCLOG0(1, 2847 "clnt_dispatch_notify: message less than size of xid\n"); 2848 return (FALSE); 2849 2850 } 2851 done_xid_copy: 2852 2853 hash = call_hash(xid, clnt_cots_hash_size); 2854 chtp = &cots_call_ht[hash]; 2855 /* call_table_find returns with the hash bucket locked */ 2856 call_table_find(chtp, xid, e); 2857 2858 if (e != NULL) { 2859 /* 2860 * Found thread waiting for this reply 2861 */ 2862 mutex_enter(&e->call_lock); 2863 if (e->call_reply) 2864 /* 2865 * This can happen under the following scenario: 2866 * clnt_cots_kcallit() times out on the response, 2867 * rfscall() repeats the CLNT_CALL() with 2868 * the same xid, clnt_cots_kcallit() sends the retry, 2869 * thereby putting the clnt handle on the pending list, 2870 * the first response arrives, signalling the thread 2871 * in clnt_cots_kcallit(). Before that thread is 2872 * dispatched, the second response arrives as well, 2873 * and clnt_dispatch_notify still finds the handle on 2874 * the pending list, with call_reply set. So free the 2875 * old reply now. 2876 * 2877 * It is also possible for a response intended for 2878 * an RPC call with a different xid to reside here. 2879 * This can happen if the thread that owned this 2880 * client handle prior to the current owner bailed 2881 * out and left its call record on the dispatch 2882 * queue. A window exists where the response can 2883 * arrive before the current owner dispatches its 2884 * RPC call. 2885 * 2886 * In any case, this is the very last point where we 2887 * can safely check the call_reply field before 2888 * placing the new response there. 2889 */ 2890 freemsg(e->call_reply); 2891 e->call_reply = mp; 2892 e->call_status = RPC_SUCCESS; 2893 e->call_notified = TRUE; 2894 cv_signal(&e->call_cv); 2895 mutex_exit(&e->call_lock); 2896 mutex_exit(&chtp->ct_lock); 2897 return (TRUE); 2898 } else { 2899 zone_t *zone; 2900 struct rpcstat *rpcstat; 2901 2902 mutex_exit(&chtp->ct_lock); 2903 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n", 2904 xid); 2905 /* 2906 * This is unfortunate, but we need to lookup the zone so we 2907 * can increment its "rcbadxids" counter. 2908 */ 2909 zone = zone_find_by_id(zoneid); 2910 if (zone == NULL) { 2911 /* 2912 * The zone went away... 2913 */ 2914 return (FALSE); 2915 } 2916 rpcstat = zone_getspecific(rpcstat_zone_key, zone); 2917 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 2918 /* 2919 * Not interested 2920 */ 2921 zone_rele(zone); 2922 return (FALSE); 2923 } 2924 COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcbadxids); 2925 zone_rele(zone); 2926 } 2927 return (FALSE); 2928 } 2929 2930 /* 2931 * Called by rpcmod when a non-data indication arrives. The ones in which we 2932 * are interested are connection indications and options acks. We dispatch 2933 * based on the queue the indication came in on. If we are not interested in 2934 * what came in, we return false to rpcmod, who will then pass it upstream. 2935 */ 2936 bool_t 2937 clnt_dispatch_notifyconn(queue_t *q, mblk_t *mp) 2938 { 2939 calllist_t *e; 2940 int type; 2941 2942 ASSERT((q->q_flag & QREADR) == 0); 2943 2944 type = ((union T_primitives *)mp->b_rptr)->type; 2945 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n", 2946 rpc_tpiprim2name(type)); 2947 mutex_enter(&clnt_pending_lock); 2948 for (e = clnt_pending; /* NO CONDITION */; e = e->call_next) { 2949 if (e == NULL) { 2950 mutex_exit(&clnt_pending_lock); 2951 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting " 2952 "for connection on queue 0x%p\n", (void *)q); 2953 return (FALSE); 2954 } 2955 if (e->call_wq == q) 2956 break; 2957 } 2958 2959 switch (type) { 2960 case T_CONN_CON: 2961 /* 2962 * The transport is now connected, send a T_INFO_REQ to get 2963 * the tidu size. 2964 */ 2965 mutex_exit(&clnt_pending_lock); 2966 ASSERT(mp->b_datap->db_lim - mp->b_datap->db_base >= 2967 sizeof (struct T_info_req)); 2968 mp->b_rptr = mp->b_datap->db_base; 2969 ((union T_primitives *)mp->b_rptr)->type = T_INFO_REQ; 2970 mp->b_wptr = mp->b_rptr + sizeof (struct T_info_req); 2971 mp->b_datap->db_type = M_PCPROTO; 2972 put(q, mp); 2973 return (TRUE); 2974 case T_INFO_ACK: 2975 case T_OPTMGMT_ACK: 2976 e->call_status = RPC_SUCCESS; 2977 e->call_reply = mp; 2978 e->call_notified = TRUE; 2979 cv_signal(&e->call_cv); 2980 break; 2981 case T_ERROR_ACK: 2982 e->call_status = RPC_CANTCONNECT; 2983 e->call_reply = mp; 2984 e->call_notified = TRUE; 2985 cv_signal(&e->call_cv); 2986 break; 2987 case T_OK_ACK: 2988 /* 2989 * Great, but we are really waiting for a T_CONN_CON 2990 */ 2991 freemsg(mp); 2992 break; 2993 default: 2994 mutex_exit(&clnt_pending_lock); 2995 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type); 2996 return (FALSE); 2997 } 2998 2999 mutex_exit(&clnt_pending_lock); 3000 return (TRUE); 3001 } 3002 3003 /* 3004 * Called by rpcmod when the transport is (or should be) going away. Informs 3005 * all callers waiting for replies and marks the entry in the connection 3006 * manager's list as unconnected, and either closing (close handshake in 3007 * progress) or dead. 3008 */ 3009 void 3010 clnt_dispatch_notifyall(queue_t *q, int32_t msg_type, int32_t reason) 3011 { 3012 calllist_t *e; 3013 call_table_t *ctp; 3014 struct cm_xprt *cm_entry; 3015 int have_connmgr_lock; 3016 int i; 3017 3018 ASSERT((q->q_flag & QREADR) == 0); 3019 3020 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q); 3021 RPCLOG(1, " received a notifcation prim type [%s]", 3022 rpc_tpiprim2name(msg_type)); 3023 RPCLOG(1, " and reason %d\n", reason); 3024 3025 /* 3026 * Find the transport entry in the connection manager's list, close 3027 * the transport and delete the entry. In the case where rpcmod's 3028 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we 3029 * should gracefully close the connection. 3030 */ 3031 have_connmgr_lock = 1; 3032 mutex_enter(&connmgr_lock); 3033 for (cm_entry = cm_hd; cm_entry; cm_entry = cm_entry->x_next) { 3034 ASSERT(cm_entry != cm_entry->x_next); 3035 if (cm_entry->x_wq == q) { 3036 ASSERT(MUTEX_HELD(&connmgr_lock)); 3037 ASSERT(have_connmgr_lock == 1); 3038 switch (msg_type) { 3039 case T_ORDREL_REQ: 3040 3041 if (cm_entry->x_dead) { 3042 RPCLOG(1, "idle timeout on dead " 3043 "connection: %p\n", 3044 (void *)cm_entry); 3045 if (clnt_stop_idle != NULL) 3046 (*clnt_stop_idle)(q); 3047 break; 3048 } 3049 3050 /* 3051 * Only mark the connection as dead if it is 3052 * connected and idle. 3053 * An unconnected connection has probably 3054 * gone idle because the server is down, 3055 * and when it comes back up there will be 3056 * retries that need to use that connection. 3057 */ 3058 if (cm_entry->x_connected || 3059 cm_entry->x_doomed) { 3060 if (cm_entry->x_ordrel) { 3061 if (cm_entry->x_closing == TRUE) { 3062 /* 3063 * The connection is obviously 3064 * wedged due to a bug or problem 3065 * with the transport. Mark it 3066 * as dead. Otherwise we can leak 3067 * connections. 3068 */ 3069 cm_entry->x_dead = TRUE; 3070 mutex_exit(&connmgr_lock); 3071 have_connmgr_lock = 0; 3072 if (clnt_stop_idle != NULL) 3073 (*clnt_stop_idle)(q); 3074 break; 3075 } 3076 cm_entry->x_closing = TRUE; 3077 connmgr_sndrel(cm_entry); 3078 have_connmgr_lock = 0; 3079 } else { 3080 cm_entry->x_dead = TRUE; 3081 mutex_exit(&connmgr_lock); 3082 have_connmgr_lock = 0; 3083 if (clnt_stop_idle != NULL) 3084 (*clnt_stop_idle)(q); 3085 } 3086 } else { 3087 /* 3088 * We don't mark the connection 3089 * as dead, but we turn off the 3090 * idle timer. 3091 */ 3092 mutex_exit(&connmgr_lock); 3093 have_connmgr_lock = 0; 3094 if (clnt_stop_idle != NULL) 3095 (*clnt_stop_idle)(q); 3096 RPCLOG(1, "clnt_dispatch_notifyall:" 3097 " ignoring timeout from rpcmod" 3098 " (q %p) because we are not " 3099 " connected\n", (void *)q); 3100 } 3101 break; 3102 case T_ORDREL_IND: 3103 /* 3104 * If this entry is marked closing, then we are 3105 * completing a close handshake, and the 3106 * connection is dead. Otherwise, the server is 3107 * trying to close. Since the server will not 3108 * be sending any more RPC replies, we abort 3109 * the connection, including flushing 3110 * any RPC requests that are in-transit. 3111 */ 3112 if (cm_entry->x_closing) { 3113 cm_entry->x_dead = TRUE; 3114 mutex_exit(&connmgr_lock); 3115 have_connmgr_lock = 0; 3116 if (clnt_stop_idle != NULL) 3117 (*clnt_stop_idle)(q); 3118 } else { 3119 /* 3120 * if we're getting a disconnect 3121 * before we've finished our 3122 * connect attempt, mark it for 3123 * later processing 3124 */ 3125 if (cm_entry->x_thread) 3126 cm_entry->x_early_disc = TRUE; 3127 else 3128 cm_entry->x_connected = FALSE; 3129 cm_entry->x_waitdis = TRUE; 3130 connmgr_snddis(cm_entry); 3131 have_connmgr_lock = 0; 3132 } 3133 break; 3134 3135 case T_ERROR_ACK: 3136 case T_OK_ACK: 3137 cm_entry->x_waitdis = FALSE; 3138 cv_signal(&cm_entry->x_dis_cv); 3139 mutex_exit(&connmgr_lock); 3140 return; 3141 3142 case T_DISCON_REQ: 3143 if (cm_entry->x_thread) 3144 cm_entry->x_early_disc = TRUE; 3145 else 3146 cm_entry->x_connected = FALSE; 3147 cm_entry->x_waitdis = TRUE; 3148 3149 connmgr_snddis(cm_entry); 3150 have_connmgr_lock = 0; 3151 break; 3152 3153 case T_DISCON_IND: 3154 default: 3155 /* 3156 * if we're getting a disconnect before 3157 * we've finished our connect attempt, 3158 * mark it for later processing 3159 */ 3160 if (cm_entry->x_closing) { 3161 cm_entry->x_dead = TRUE; 3162 mutex_exit(&connmgr_lock); 3163 have_connmgr_lock = 0; 3164 if (clnt_stop_idle != NULL) 3165 (*clnt_stop_idle)(q); 3166 } else { 3167 if (cm_entry->x_thread) { 3168 cm_entry->x_early_disc = TRUE; 3169 } else { 3170 cm_entry->x_dead = TRUE; 3171 cm_entry->x_connected = FALSE; 3172 } 3173 } 3174 break; 3175 } 3176 break; 3177 } 3178 } 3179 3180 if (have_connmgr_lock) 3181 mutex_exit(&connmgr_lock); 3182 3183 if (msg_type == T_ERROR_ACK || msg_type == T_OK_ACK) { 3184 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find " 3185 "connmgr entry for discon ack\n", (void *)q); 3186 return; 3187 } 3188 3189 /* 3190 * Then kick all the clnt_pending calls out of their wait. There 3191 * should be no clnt_pending calls in the case of rpcmod's idle 3192 * timer firing. 3193 */ 3194 for (i = 0; i < clnt_cots_hash_size; i++) { 3195 ctp = &cots_call_ht[i]; 3196 mutex_enter(&ctp->ct_lock); 3197 for (e = ctp->ct_call_next; 3198 e != (calllist_t *)ctp; 3199 e = e->call_next) { 3200 if (e->call_wq == q && e->call_notified == FALSE) { 3201 RPCLOG(1, 3202 "clnt_dispatch_notifyall for queue %p ", 3203 (void *)q); 3204 RPCLOG(1, "aborting clnt_pending call %p\n", 3205 (void *)e); 3206 3207 if (msg_type == T_DISCON_IND) 3208 e->call_reason = reason; 3209 e->call_notified = TRUE; 3210 e->call_status = RPC_XPRTFAILED; 3211 cv_signal(&e->call_cv); 3212 } 3213 } 3214 mutex_exit(&ctp->ct_lock); 3215 } 3216 3217 mutex_enter(&clnt_pending_lock); 3218 for (e = clnt_pending; e; e = e->call_next) { 3219 /* 3220 * Only signal those RPC handles that haven't been 3221 * signalled yet. Otherwise we can get a bogus call_reason. 3222 * This can happen if thread A is making a call over a 3223 * connection. If the server is killed, it will cause 3224 * reset, and reason will default to EIO as a result of 3225 * a T_ORDREL_IND. Thread B then attempts to recreate 3226 * the connection but gets a T_DISCON_IND. If we set the 3227 * call_reason code for all threads, then if thread A 3228 * hasn't been dispatched yet, it will get the wrong 3229 * reason. The bogus call_reason can make it harder to 3230 * discriminate between calls that fail because the 3231 * connection attempt failed versus those where the call 3232 * may have been executed on the server. 3233 */ 3234 if (e->call_wq == q && e->call_notified == FALSE) { 3235 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ", 3236 (void *)q); 3237 RPCLOG(1, " aborting clnt_pending call %p\n", 3238 (void *)e); 3239 3240 if (msg_type == T_DISCON_IND) 3241 e->call_reason = reason; 3242 e->call_notified = TRUE; 3243 /* 3244 * Let the caller timeout, else he will retry 3245 * immediately. 3246 */ 3247 e->call_status = RPC_XPRTFAILED; 3248 3249 /* 3250 * We used to just signal those threads 3251 * waiting for a connection, (call_xid = 0). 3252 * That meant that threads waiting for a response 3253 * waited till their timeout expired. This 3254 * could be a long time if they've specified a 3255 * maximum timeout. (2^31 - 1). So we 3256 * Signal all threads now. 3257 */ 3258 cv_signal(&e->call_cv); 3259 } 3260 } 3261 mutex_exit(&clnt_pending_lock); 3262 } 3263 3264 3265 /*ARGSUSED*/ 3266 /* 3267 * after resuming a system that's been suspended for longer than the 3268 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall() 3269 * generates "NFS server X not responding" and "NFS server X ok" messages; 3270 * here we reset inet connections to cause a re-connect and avoid those 3271 * NFS messages. see 4045054 3272 */ 3273 boolean_t 3274 connmgr_cpr_reset(void *arg, int code) 3275 { 3276 struct cm_xprt *cxp; 3277 3278 if (code == CB_CODE_CPR_CHKPT) 3279 return (B_TRUE); 3280 3281 if (mutex_tryenter(&connmgr_lock) == 0) 3282 return (B_FALSE); 3283 for (cxp = cm_hd; cxp; cxp = cxp->x_next) { 3284 if ((cxp->x_family == AF_INET || cxp->x_family == AF_INET6) && 3285 cxp->x_connected == TRUE) { 3286 if (cxp->x_thread) 3287 cxp->x_early_disc = TRUE; 3288 else 3289 cxp->x_connected = FALSE; 3290 cxp->x_needdis = TRUE; 3291 } 3292 } 3293 mutex_exit(&connmgr_lock); 3294 return (B_TRUE); 3295 } 3296 3297 void 3298 clnt_cots_stats_init(zoneid_t zoneid, struct rpc_cots_client **statsp) 3299 { 3300 3301 *statsp = (struct rpc_cots_client *)rpcstat_zone_init_common(zoneid, 3302 "unix", "rpc_cots_client", (const kstat_named_t *)&cots_rcstat_tmpl, 3303 sizeof (cots_rcstat_tmpl)); 3304 } 3305 3306 void 3307 clnt_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_client **statsp) 3308 { 3309 rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_client"); 3310 kmem_free(*statsp, sizeof (cots_rcstat_tmpl)); 3311 } 3312 3313 void 3314 clnt_cots_init(void) 3315 { 3316 mutex_init(&connmgr_lock, NULL, MUTEX_DEFAULT, NULL); 3317 mutex_init(&clnt_pending_lock, NULL, MUTEX_DEFAULT, NULL); 3318 3319 if (clnt_cots_hash_size < DEFAULT_MIN_HASH_SIZE) 3320 clnt_cots_hash_size = DEFAULT_MIN_HASH_SIZE; 3321 3322 cots_call_ht = call_table_init(clnt_cots_hash_size); 3323 zone_key_create(&zone_cots_key, NULL, NULL, clnt_zone_destroy); 3324 } 3325 3326 void 3327 clnt_cots_fini(void) 3328 { 3329 (void) zone_key_delete(zone_cots_key); 3330 } 3331 3332 /* 3333 * Wait for TPI ack, returns success only if expected ack is received 3334 * within timeout period. 3335 */ 3336 3337 static int 3338 waitforack(calllist_t *e, t_scalar_t ack_prim, const struct timeval *waitp, 3339 bool_t nosignal) 3340 { 3341 union T_primitives *tpr; 3342 clock_t timout; 3343 int cv_stat = 1; 3344 3345 ASSERT(MUTEX_HELD(&clnt_pending_lock)); 3346 while (e->call_reply == NULL) { 3347 if (waitp != NULL) { 3348 timout = waitp->tv_sec * drv_usectohz(MICROSEC) + 3349 drv_usectohz(waitp->tv_usec) + lbolt; 3350 if (nosignal) 3351 cv_stat = cv_timedwait(&e->call_cv, 3352 &clnt_pending_lock, timout); 3353 else 3354 cv_stat = cv_timedwait_sig(&e->call_cv, 3355 &clnt_pending_lock, timout); 3356 } else { 3357 if (nosignal) 3358 cv_wait(&e->call_cv, &clnt_pending_lock); 3359 else 3360 cv_stat = cv_wait_sig(&e->call_cv, 3361 &clnt_pending_lock); 3362 } 3363 if (cv_stat == -1) 3364 return (ETIME); 3365 if (cv_stat == 0) 3366 return (EINTR); 3367 } 3368 tpr = (union T_primitives *)e->call_reply->b_rptr; 3369 if (tpr->type == ack_prim) 3370 return (0); /* Success */ 3371 3372 if (tpr->type == T_ERROR_ACK) { 3373 if (tpr->error_ack.TLI_error == TSYSERR) 3374 return (tpr->error_ack.UNIX_error); 3375 else 3376 return (t_tlitosyserr(tpr->error_ack.TLI_error)); 3377 } 3378 3379 return (EPROTO); /* unknown or unexpected primitive */ 3380 } 3381