1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* 29 * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved. 30 */ 31 32 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 33 /* All Rights Reserved */ 34 35 /* 36 * Portions of this source code were derived from Berkeley 4.3 BSD 37 * under license from the Regents of the University of California. 38 */ 39 40 #pragma ident "%Z%%M% %I% %E% SMI" 41 42 /* 43 * Server-side remote procedure call interface. 44 * 45 * Master transport handle (SVCMASTERXPRT). 46 * The master transport handle structure is shared among service 47 * threads processing events on the transport. Some fields in the 48 * master structure are protected by locks 49 * - xp_req_lock protects the request queue: 50 * xp_req_head, xp_req_tail 51 * - xp_thread_lock protects the thread (clone) counts 52 * xp_threads, xp_detached_threads, xp_wq 53 * Each master transport is registered to exactly one thread pool. 54 * 55 * Clone transport handle (SVCXPRT) 56 * The clone transport handle structure is a per-service-thread handle 57 * to the transport. The structure carries all the fields/buffers used 58 * for request processing. A service thread or, in other words, a clone 59 * structure, can be linked to an arbitrary master structure to process 60 * requests on this transport. The master handle keeps track of reference 61 * counts of threads (clones) linked to it. A service thread can switch 62 * to another transport by unlinking its clone handle from the current 63 * transport and linking to a new one. Switching is relatively inexpensive 64 * but it involves locking (master's xprt->xp_thread_lock). 65 * 66 * Pools. 67 * A pool represents a kernel RPC service (NFS, Lock Manager, etc.). 68 * Transports related to the service are registered to the service pool. 69 * Service threads can switch between different transports in the pool. 70 * Thus, each service has its own pool of service threads. The maximum 71 * number of threads in a pool is pool->p_maxthreads. This limit allows 72 * to restrict resource usage by the service. Some fields are protected 73 * by locks: 74 * - p_req_lock protects several counts and flags: 75 * p_reqs, p_walkers, p_asleep, p_drowsy, p_req_cv 76 * - p_thread_lock governs other thread counts: 77 * p_threads, p_detached_threads, p_reserved_threads, p_closing 78 * 79 * In addition, each pool contains a doubly-linked list of transports, 80 * an `xprt-ready' queue and a creator thread (see below). Threads in 81 * the pool share some other parameters such as stack size and 82 * polling timeout. 83 * 84 * Pools are initialized through the svc_pool_create() function called from 85 * the nfssys() system call. However, thread creation must be done by 86 * the userland agent. This is done by using SVCPOOL_WAIT and 87 * SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and 88 * svc_do_run(), respectively. Once the pool has been initialized, 89 * the userland process must set up a 'creator' thread. This thread 90 * should park itself in the kernel by calling svc_wait(). If 91 * svc_wait() returns successfully, it should fork off a new worker 92 * thread, which then calls svc_do_run() in order to get work. When 93 * that thread is complete, svc_do_run() will return, and the user 94 * program should call thr_exit(). 95 * 96 * When we try to register a new pool and there is an old pool with 97 * the same id in the doubly linked pool list (this happens when we kill 98 * and restart nfsd or lockd), then we unlink the old pool from the list 99 * and mark its state as `closing'. After that the transports can still 100 * process requests but new transports won't be registered. When all the 101 * transports and service threads associated with the pool are gone the 102 * creator thread (see below) will clean up the pool structure and exit. 103 * 104 * svc_queuereq() and svc_run(). 105 * The kernel RPC server is interrupt driven. The svc_queuereq() interrupt 106 * routine is called to deliver an RPC request. The service threads 107 * loop in svc_run(). The interrupt function queues a request on the 108 * transport's queue and it makes sure that the request is serviced. 109 * It may either wake up one of sleeping threads, or ask for a new thread 110 * to be created, or, if the previous request is just being picked up, do 111 * nothing. In the last case the service thread that is picking up the 112 * previous request will wake up or create the next thread. After a service 113 * thread processes a request and sends a reply it returns to svc_run() 114 * and svc_run() calls svc_poll() to find new input. 115 * 116 * There is an "inconsistent" but "safe" optimization in the 117 * svc_queuereq() code. The request is queued under the transport's 118 * request lock, while the `pending-requests' count is incremented 119 * independently under the pool request lock. Thus, a request can be picked 120 * up by a service thread before the counter is incremented. It may also 121 * happen that the service thread will win the race condition on the pool 122 * lock and it will decrement the count even before the interrupt thread 123 * increments it (so the count can be temporarily negative). 124 * 125 * svc_poll(). 126 * In order to avoid unnecessary locking, which causes performance 127 * problems, we always look for a pending request on the current transport. 128 * If there is none we take a hint from the pool's `xprt-ready' queue. 129 * If the queue had an overflow we switch to the `drain' mode checking 130 * each transport in the pool's transport list. Once we find a 131 * master transport handle with a pending request we latch the request 132 * lock on this transport and return to svc_run(). If the request 133 * belongs to a transport different than the one the service thread is 134 * linked to we need to unlink and link again. 135 * 136 * A service thread goes asleep when there are no pending 137 * requests on the transports registered on the pool's transports. 138 * All the pool's threads sleep on the same condition variable. 139 * If a thread has been sleeping for too long period of time 140 * (by default 5 seconds) it wakes up and exits. Also when a transport 141 * is closing sleeping threads wake up to unlink from this transport. 142 * 143 * The `xprt-ready' queue. 144 * If a service thread finds no request on a transport it is currently linked 145 * to it will find another transport with a pending request. To make 146 * this search more efficient each pool has an `xprt-ready' queue. 147 * The queue is a FIFO. When the interrupt routine queues a request it also 148 * inserts a pointer to the transport into the `xprt-ready' queue. A 149 * thread looking for a transport with a pending request can pop up a 150 * transport and check for a request. The request can be already gone 151 * since it could be taken by a thread linked to that transport. In such a 152 * case we try the next hint. The `xprt-ready' queue has fixed size (by 153 * default 256 nodes). If it overflows svc_poll() has to switch to the 154 * less efficient but safe `drain' mode and walk through the pool's 155 * transport list. 156 * 157 * Both the svc_poll() loop and the `xprt-ready' queue are optimized 158 * for the peak load case that is for the situation when the queue is not 159 * empty, there are all the time few pending requests, and a service 160 * thread which has just processed a request does not go asleep but picks 161 * up immediately the next request. 162 * 163 * Thread creator. 164 * Each pool has a thread creator associated with it. The creator thread 165 * sleeps on a condition variable and waits for a signal to create a 166 * service thread. The actual thread creation is done in userland by 167 * the method described in "Pools" above. 168 * 169 * Signaling threads should turn on the `creator signaled' flag, and 170 * can avoid sending signals when the flag is on. The flag is cleared 171 * when the thread is created. 172 * 173 * When the pool is in closing state (ie it has been already unregistered 174 * from the pool list) the last thread on the last transport in the pool 175 * should turn the p_creator_exit flag on. The creator thread will 176 * clean up the pool structure and exit. 177 * 178 * Thread reservation; Detaching service threads. 179 * A service thread can detach itself to block for an extended amount 180 * of time. However, to keep the service active we need to guarantee 181 * at least pool->p_redline non-detached threads that can process incoming 182 * requests. This, the maximum number of detached and reserved threads is 183 * p->p_maxthreads - p->p_redline. A service thread should first acquire 184 * a reservation, and if the reservation was granted it can detach itself. 185 * If a reservation was granted but the thread does not detach itself 186 * it should cancel the reservation before it returns to svc_run(). 187 */ 188 189 #include <sys/param.h> 190 #include <sys/types.h> 191 #include <rpc/types.h> 192 #include <sys/socket.h> 193 #include <sys/time.h> 194 #include <sys/tiuser.h> 195 #include <sys/t_kuser.h> 196 #include <netinet/in.h> 197 #include <rpc/xdr.h> 198 #include <rpc/auth.h> 199 #include <rpc/clnt.h> 200 #include <rpc/rpc_msg.h> 201 #include <rpc/svc.h> 202 #include <sys/proc.h> 203 #include <sys/user.h> 204 #include <sys/stream.h> 205 #include <sys/strsubr.h> 206 #include <sys/tihdr.h> 207 #include <sys/debug.h> 208 #include <sys/cmn_err.h> 209 #include <sys/file.h> 210 #include <sys/systm.h> 211 #include <sys/callb.h> 212 #include <sys/vtrace.h> 213 #include <sys/zone.h> 214 #include <nfs/nfs.h> 215 216 #define RQCRED_SIZE 400 /* this size is excessive */ 217 218 /* 219 * Defines for svc_poll() 220 */ 221 #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1) /* Transport is closing */ 222 #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2) /* Timeout */ 223 #define SVC_EINTR ((SVCMASTERXPRT *)3) /* Interrupted by signal */ 224 225 /* 226 * Default stack size for service threads. 227 */ 228 #define DEFAULT_SVC_RUN_STKSIZE (0) /* default kernel stack */ 229 230 int svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE; 231 232 /* 233 * Default polling timeout for service threads. 234 * Multiplied by hz when used. 235 */ 236 #define DEFAULT_SVC_POLL_TIMEOUT (5) /* seconds */ 237 238 clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT; 239 240 /* 241 * Size of the `xprt-ready' queue. 242 */ 243 #define DEFAULT_SVC_QSIZE (256) /* qnodes */ 244 245 size_t svc_default_qsize = DEFAULT_SVC_QSIZE; 246 247 /* 248 * Default limit for the number of service threads. 249 */ 250 #define DEFAULT_SVC_MAXTHREADS (INT16_MAX) 251 252 int svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS; 253 254 /* 255 * Maximum number of requests from the same transport (in `drain' mode). 256 */ 257 #define DEFAULT_SVC_MAX_SAME_XPRT (8) 258 259 int svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT; 260 261 262 /* 263 * Default `Redline' of non-detached threads. 264 * Total number of detached and reserved threads in an RPC server 265 * thread pool is limited to pool->p_maxthreads - svc_redline. 266 */ 267 #define DEFAULT_SVC_REDLINE (1) 268 269 int svc_default_redline = DEFAULT_SVC_REDLINE; 270 271 /* 272 * A node for the `xprt-ready' queue. 273 * See below. 274 */ 275 struct __svcxprt_qnode { 276 __SVCXPRT_QNODE *q_next; 277 SVCMASTERXPRT *q_xprt; 278 }; 279 280 /* 281 * Global SVC variables (private). 282 */ 283 struct svc_globals { 284 SVCPOOL *svc_pools; 285 kmutex_t svc_plock; 286 }; 287 288 /* 289 * Debug variable to check for rdma based 290 * transport startup and cleanup. Contorlled 291 * through /etc/system. Off by default. 292 */ 293 int rdma_check = 0; 294 295 /* 296 * Authentication parameters list. 297 */ 298 static caddr_t rqcred_head; 299 static kmutex_t rqcred_lock; 300 301 /* 302 * Pointers to transport specific `rele' routines in rpcmod (set from rpcmod). 303 */ 304 void (*rpc_rele)(queue_t *, mblk_t *) = NULL; 305 void (*mir_rele)(queue_t *, mblk_t *) = NULL; 306 307 /* ARGSUSED */ 308 void 309 rpc_rdma_rele(queue_t *q, mblk_t *mp) 310 { 311 } 312 void (*rdma_rele)(queue_t *, mblk_t *) = rpc_rdma_rele; 313 314 315 /* 316 * This macro picks which `rele' routine to use, based on the transport type. 317 */ 318 #define RELE_PROC(xprt) \ 319 ((xprt)->xp_type == T_RDMA ? rdma_rele : \ 320 (((xprt)->xp_type == T_CLTS) ? rpc_rele : mir_rele)) 321 322 /* 323 * If true, then keep quiet about version mismatch. 324 * This macro is for broadcast RPC only. We have no broadcast RPC in 325 * kernel now but one may define a flag in the transport structure 326 * and redefine this macro. 327 */ 328 #define version_keepquiet(xprt) (FALSE) 329 330 /* 331 * ZSD key used to retrieve zone-specific svc globals 332 */ 333 static zone_key_t svc_zone_key; 334 335 static void svc_callout_free(SVCMASTERXPRT *); 336 static void svc_xprt_qinit(SVCPOOL *, size_t); 337 static void svc_xprt_qdestroy(SVCPOOL *); 338 static void svc_thread_creator(SVCPOOL *); 339 static void svc_creator_signal(SVCPOOL *); 340 static void svc_creator_signalexit(SVCPOOL *); 341 static void svc_pool_unregister(struct svc_globals *, SVCPOOL *); 342 static int svc_run(SVCPOOL *); 343 344 /* ARGSUSED */ 345 static void * 346 svc_zoneinit(zoneid_t zoneid) 347 { 348 struct svc_globals *svc; 349 350 svc = kmem_alloc(sizeof (*svc), KM_SLEEP); 351 mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL); 352 svc->svc_pools = NULL; 353 return (svc); 354 } 355 356 /* ARGSUSED */ 357 static void 358 svc_zoneshutdown(zoneid_t zoneid, void *arg) 359 { 360 struct svc_globals *svc = arg; 361 SVCPOOL *pool; 362 363 mutex_enter(&svc->svc_plock); 364 while ((pool = svc->svc_pools) != NULL) { 365 svc_pool_unregister(svc, pool); 366 } 367 mutex_exit(&svc->svc_plock); 368 } 369 370 /* ARGSUSED */ 371 static void 372 svc_zonefini(zoneid_t zoneid, void *arg) 373 { 374 struct svc_globals *svc = arg; 375 376 ASSERT(svc->svc_pools == NULL); 377 mutex_destroy(&svc->svc_plock); 378 kmem_free(svc, sizeof (*svc)); 379 } 380 381 /* 382 * Global SVC init routine. 383 * Initialize global generic and transport type specific structures 384 * used by the kernel RPC server side. This routine is called only 385 * once when the module is being loaded. 386 */ 387 void 388 svc_init() 389 { 390 zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown, 391 svc_zonefini); 392 svc_cots_init(); 393 svc_clts_init(); 394 } 395 396 /* 397 * Destroy the SVCPOOL structure. 398 */ 399 static void 400 svc_pool_cleanup(SVCPOOL *pool) 401 { 402 ASSERT(pool->p_threads + pool->p_detached_threads == 0); 403 ASSERT(pool->p_lcount == 0); 404 ASSERT(pool->p_closing); 405 406 /* 407 * Call the user supplied shutdown function. This is done 408 * here so the user of the pool will be able to cleanup 409 * service related resources. 410 */ 411 if (pool->p_shutdown != NULL) 412 (pool->p_shutdown)(); 413 414 /* Destroy `xprt-ready' queue */ 415 svc_xprt_qdestroy(pool); 416 417 /* Destroy transport list */ 418 rw_destroy(&pool->p_lrwlock); 419 420 /* Destroy locks and condition variables */ 421 mutex_destroy(&pool->p_thread_lock); 422 mutex_destroy(&pool->p_req_lock); 423 cv_destroy(&pool->p_req_cv); 424 425 /* Destroy creator's locks and condition variables */ 426 mutex_destroy(&pool->p_creator_lock); 427 cv_destroy(&pool->p_creator_cv); 428 mutex_destroy(&pool->p_user_lock); 429 cv_destroy(&pool->p_user_cv); 430 431 /* Free pool structure */ 432 kmem_free(pool, sizeof (SVCPOOL)); 433 } 434 435 /* 436 * If all the transports and service threads are already gone 437 * signal the creator thread to clean up and exit. 438 */ 439 static bool_t 440 svc_pool_tryexit(SVCPOOL *pool) 441 { 442 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 443 ASSERT(pool->p_closing); 444 445 if (pool->p_threads + pool->p_detached_threads == 0) { 446 rw_enter(&pool->p_lrwlock, RW_READER); 447 if (pool->p_lcount == 0) { 448 /* 449 * Release the locks before sending a signal. 450 */ 451 rw_exit(&pool->p_lrwlock); 452 mutex_exit(&pool->p_thread_lock); 453 454 /* 455 * Notify the creator thread to clean up and exit 456 * 457 * NOTICE: No references to the pool beyond this point! 458 * The pool is being destroyed. 459 */ 460 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 461 svc_creator_signalexit(pool); 462 463 return (TRUE); 464 } 465 rw_exit(&pool->p_lrwlock); 466 } 467 468 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 469 return (FALSE); 470 } 471 472 /* 473 * Find a pool with a given id. 474 */ 475 static SVCPOOL * 476 svc_pool_find(struct svc_globals *svc, int id) 477 { 478 SVCPOOL *pool; 479 480 ASSERT(MUTEX_HELD(&svc->svc_plock)); 481 482 /* 483 * Search the list for a pool with a matching id 484 * and register the transport handle with that pool. 485 */ 486 for (pool = svc->svc_pools; pool; pool = pool->p_next) 487 if (pool->p_id == id) 488 return (pool); 489 490 return (NULL); 491 } 492 493 /* 494 * PSARC 2003/523 Contract Private Interface 495 * svc_do_run 496 * Changes must be reviewed by Solaris File Sharing 497 * Changes must be communicated to contract-2003-523@sun.com 498 */ 499 int 500 svc_do_run(int id) 501 { 502 SVCPOOL *pool; 503 int err = 0; 504 struct svc_globals *svc; 505 506 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 507 mutex_enter(&svc->svc_plock); 508 509 pool = svc_pool_find(svc, id); 510 511 mutex_exit(&svc->svc_plock); 512 513 if (pool == NULL) 514 return (ENOENT); 515 516 /* 517 * Increment counter of pool threads now 518 * that a thread has been created. 519 */ 520 mutex_enter(&pool->p_thread_lock); 521 pool->p_threads++; 522 mutex_exit(&pool->p_thread_lock); 523 524 /* Give work to the new thread. */ 525 err = svc_run(pool); 526 527 return (err); 528 } 529 530 /* 531 * Unregister a pool from the pool list. 532 * Set the closing state. If all the transports and service threads 533 * are already gone signal the creator thread to clean up and exit. 534 */ 535 static void 536 svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool) 537 { 538 SVCPOOL *next = pool->p_next; 539 SVCPOOL *prev = pool->p_prev; 540 541 ASSERT(MUTEX_HELD(&svc->svc_plock)); 542 543 /* Remove from the list */ 544 if (pool == svc->svc_pools) 545 svc->svc_pools = next; 546 if (next) 547 next->p_prev = prev; 548 if (prev) 549 prev->p_next = next; 550 pool->p_next = pool->p_prev = NULL; 551 552 /* 553 * Offline the pool. Mark the pool as closing. 554 * If there are no transports in this pool notify 555 * the creator thread to clean it up and exit. 556 */ 557 mutex_enter(&pool->p_thread_lock); 558 if (pool->p_offline != NULL) 559 (pool->p_offline)(); 560 pool->p_closing = TRUE; 561 if (svc_pool_tryexit(pool)) 562 return; 563 mutex_exit(&pool->p_thread_lock); 564 } 565 566 /* 567 * Register a pool with a given id in the global doubly linked pool list. 568 * - if there is a pool with the same id in the list then unregister it 569 * - insert the new pool into the list. 570 */ 571 static void 572 svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id) 573 { 574 SVCPOOL *old_pool; 575 576 /* 577 * If there is a pool with the same id then remove it from 578 * the list and mark the pool as closing. 579 */ 580 mutex_enter(&svc->svc_plock); 581 582 if (old_pool = svc_pool_find(svc, id)) 583 svc_pool_unregister(svc, old_pool); 584 585 /* Insert into the doubly linked list */ 586 pool->p_id = id; 587 pool->p_next = svc->svc_pools; 588 pool->p_prev = NULL; 589 if (svc->svc_pools) 590 svc->svc_pools->p_prev = pool; 591 svc->svc_pools = pool; 592 593 mutex_exit(&svc->svc_plock); 594 } 595 596 /* 597 * Initialize a newly created pool structure 598 */ 599 static int 600 svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline, 601 uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt) 602 { 603 klwp_t *lwp = ttolwp(curthread); 604 605 ASSERT(pool); 606 607 if (maxthreads == 0) 608 maxthreads = svc_default_maxthreads; 609 if (redline == 0) 610 redline = svc_default_redline; 611 if (qsize == 0) 612 qsize = svc_default_qsize; 613 if (timeout == 0) 614 timeout = svc_default_timeout; 615 if (stksize == 0) 616 stksize = svc_default_stksize; 617 if (max_same_xprt == 0) 618 max_same_xprt = svc_default_max_same_xprt; 619 620 if (maxthreads < redline) 621 return (EINVAL); 622 623 /* Allocate and initialize the `xprt-ready' queue */ 624 svc_xprt_qinit(pool, qsize); 625 626 /* Initialize doubly-linked xprt list */ 627 rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL); 628 629 /* 630 * Setting lwp_childstksz on the current lwp so that 631 * descendants of this lwp get the modified stacksize, if 632 * it is defined. It is important that either this lwp or 633 * one of its descendants do the actual servicepool thread 634 * creation to maintain the stacksize inheritance. 635 */ 636 if (lwp != NULL) 637 lwp->lwp_childstksz = stksize; 638 639 /* Initialize thread limits, locks and condition variables */ 640 pool->p_maxthreads = maxthreads; 641 pool->p_redline = redline; 642 pool->p_timeout = timeout * hz; 643 pool->p_stksize = stksize; 644 pool->p_max_same_xprt = max_same_xprt; 645 mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL); 646 mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL); 647 cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL); 648 649 /* Initialize userland creator */ 650 pool->p_user_exit = FALSE; 651 pool->p_signal_create_thread = FALSE; 652 pool->p_user_waiting = FALSE; 653 mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL); 654 cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL); 655 656 /* Initialize the creator and start the creator thread */ 657 pool->p_creator_exit = FALSE; 658 mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL); 659 cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL); 660 661 (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator, 662 pool, 0, minclsyspri); 663 664 return (0); 665 } 666 667 /* 668 * PSARC 2003/523 Contract Private Interface 669 * svc_pool_create 670 * Changes must be reviewed by Solaris File Sharing 671 * Changes must be communicated to contract-2003-523@sun.com 672 * 673 * Create an kernel RPC server-side thread/transport pool. 674 * 675 * This is public interface for creation of a server RPC thread pool 676 * for a given service provider. Transports registered with the pool's id 677 * will be served by a pool's threads. This function is called from the 678 * nfssys() system call. 679 */ 680 int 681 svc_pool_create(struct svcpool_args *args) 682 { 683 SVCPOOL *pool; 684 int error; 685 struct svc_globals *svc; 686 687 /* 688 * Caller should check credentials in a way appropriate 689 * in the context of the call. 690 */ 691 692 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 693 /* Allocate a new pool */ 694 pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP); 695 696 /* 697 * Initialize the pool structure and create a creator thread. 698 */ 699 error = svc_pool_init(pool, args->maxthreads, args->redline, 700 args->qsize, args->timeout, args->stksize, args->max_same_xprt); 701 702 if (error) { 703 kmem_free(pool, sizeof (SVCPOOL)); 704 return (error); 705 } 706 707 /* Register the pool with the global pool list */ 708 svc_pool_register(svc, pool, args->id); 709 710 return (0); 711 } 712 713 int 714 svc_pool_control(int id, int cmd, void *arg) 715 { 716 SVCPOOL *pool; 717 struct svc_globals *svc; 718 719 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 720 721 switch (cmd) { 722 case SVCPSET_SHUTDOWN_PROC: 723 /* 724 * Search the list for a pool with a matching id 725 * and register the transport handle with that pool. 726 */ 727 mutex_enter(&svc->svc_plock); 728 729 if ((pool = svc_pool_find(svc, id)) == NULL) { 730 mutex_exit(&svc->svc_plock); 731 return (ENOENT); 732 } 733 /* 734 * Grab the transport list lock before releasing the 735 * pool list lock 736 */ 737 rw_enter(&pool->p_lrwlock, RW_WRITER); 738 mutex_exit(&svc->svc_plock); 739 740 pool->p_shutdown = *((void (*)())arg); 741 742 rw_exit(&pool->p_lrwlock); 743 744 return (0); 745 case SVCPSET_UNREGISTER_PROC: 746 /* 747 * Search the list for a pool with a matching id 748 * and register the unregister callback handle with that pool. 749 */ 750 mutex_enter(&svc->svc_plock); 751 752 if ((pool = svc_pool_find(svc, id)) == NULL) { 753 mutex_exit(&svc->svc_plock); 754 return (ENOENT); 755 } 756 /* 757 * Grab the transport list lock before releasing the 758 * pool list lock 759 */ 760 rw_enter(&pool->p_lrwlock, RW_WRITER); 761 mutex_exit(&svc->svc_plock); 762 763 pool->p_offline = *((void (*)())arg); 764 765 rw_exit(&pool->p_lrwlock); 766 767 return (0); 768 default: 769 return (EINVAL); 770 } 771 } 772 773 /* 774 * Pool's transport list manipulation routines. 775 * - svc_xprt_register() 776 * - svc_xprt_unregister() 777 * 778 * svc_xprt_register() is called from svc_tli_kcreate() to 779 * insert a new master transport handle into the doubly linked 780 * list of server transport handles (one list per pool). 781 * 782 * The list is used by svc_poll(), when it operates in `drain' 783 * mode, to search for a next transport with a pending request. 784 */ 785 786 int 787 svc_xprt_register(SVCMASTERXPRT *xprt, int id) 788 { 789 SVCMASTERXPRT *prev, *next; 790 SVCPOOL *pool; 791 struct svc_globals *svc; 792 793 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 794 /* 795 * Search the list for a pool with a matching id 796 * and register the transport handle with that pool. 797 */ 798 mutex_enter(&svc->svc_plock); 799 800 if ((pool = svc_pool_find(svc, id)) == NULL) { 801 mutex_exit(&svc->svc_plock); 802 return (ENOENT); 803 } 804 805 /* Grab the transport list lock before releasing the pool list lock */ 806 rw_enter(&pool->p_lrwlock, RW_WRITER); 807 mutex_exit(&svc->svc_plock); 808 809 /* Don't register new transports when the pool is in closing state */ 810 if (pool->p_closing) { 811 rw_exit(&pool->p_lrwlock); 812 return (EBUSY); 813 } 814 815 /* 816 * Initialize xp_pool to point to the pool. 817 * We don't want to go through the pool list every time. 818 */ 819 xprt->xp_pool = pool; 820 821 /* 822 * Insert a transport handle into the list. 823 * The list head points to the most recently inserted transport. 824 */ 825 if (pool->p_lhead == NULL) 826 pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt; 827 else { 828 next = pool->p_lhead; 829 prev = pool->p_lhead->xp_prev; 830 831 xprt->xp_next = next; 832 xprt->xp_prev = prev; 833 834 pool->p_lhead = prev->xp_next = next->xp_prev = xprt; 835 } 836 837 /* Increment the transports count */ 838 pool->p_lcount++; 839 840 rw_exit(&pool->p_lrwlock); 841 return (0); 842 } 843 844 /* 845 * Called from svc_xprt_cleanup() to remove a master transport handle 846 * from the pool's list of server transports (when a transport is 847 * being destroyed). 848 */ 849 void 850 svc_xprt_unregister(SVCMASTERXPRT *xprt) 851 { 852 SVCPOOL *pool = xprt->xp_pool; 853 854 /* 855 * Unlink xprt from the list. 856 * If the list head points to this xprt then move it 857 * to the next xprt or reset to NULL if this is the last 858 * xprt in the list. 859 */ 860 rw_enter(&pool->p_lrwlock, RW_WRITER); 861 862 if (xprt == xprt->xp_next) 863 pool->p_lhead = NULL; 864 else { 865 SVCMASTERXPRT *next = xprt->xp_next; 866 SVCMASTERXPRT *prev = xprt->xp_prev; 867 868 next->xp_prev = prev; 869 prev->xp_next = next; 870 871 if (pool->p_lhead == xprt) 872 pool->p_lhead = next; 873 } 874 875 xprt->xp_next = xprt->xp_prev = NULL; 876 877 /* Decrement list count */ 878 pool->p_lcount--; 879 880 rw_exit(&pool->p_lrwlock); 881 } 882 883 static void 884 svc_xprt_qdestroy(SVCPOOL *pool) 885 { 886 mutex_destroy(&pool->p_qend_lock); 887 kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE)); 888 } 889 890 /* 891 * Initialize an `xprt-ready' queue for a given pool. 892 */ 893 static void 894 svc_xprt_qinit(SVCPOOL *pool, size_t qsize) 895 { 896 int i; 897 898 pool->p_qsize = qsize; 899 pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE), 900 KM_SLEEP); 901 902 for (i = 0; i < pool->p_qsize - 1; i++) 903 pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]); 904 905 pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]); 906 pool->p_qtop = &(pool->p_qbody[0]); 907 pool->p_qend = &(pool->p_qbody[0]); 908 909 mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL); 910 } 911 912 /* 913 * Called from the svc_queuereq() interrupt routine to queue 914 * a hint for svc_poll() which transport has a pending request. 915 * - insert a pointer to xprt into the xprt-ready queue (FIFO) 916 * - if the xprt-ready queue is full turn the overflow flag on. 917 * 918 * NOTICE: pool->p_qtop is protected by the the pool's request lock 919 * and the caller (svc_queuereq()) must hold the lock. 920 */ 921 static void 922 svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt) 923 { 924 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 925 926 /* If the overflow flag is there is nothing we can do */ 927 if (pool->p_qoverflow) 928 return; 929 930 /* If the queue is full turn the overflow flag on and exit */ 931 if (pool->p_qtop->q_next == pool->p_qend) { 932 mutex_enter(&pool->p_qend_lock); 933 if (pool->p_qtop->q_next == pool->p_qend) { 934 pool->p_qoverflow = TRUE; 935 mutex_exit(&pool->p_qend_lock); 936 return; 937 } 938 mutex_exit(&pool->p_qend_lock); 939 } 940 941 /* Insert a hint and move pool->p_qtop */ 942 pool->p_qtop->q_xprt = xprt; 943 pool->p_qtop = pool->p_qtop->q_next; 944 } 945 946 /* 947 * Called from svc_poll() to get a hint which transport has a 948 * pending request. Returns a pointer to a transport or NULL if the 949 * `xprt-ready' queue is empty. 950 * 951 * Since we do not acquire the pool's request lock while checking if 952 * the queue is empty we may miss a request that is just being delivered. 953 * However this is ok since svc_poll() will retry again until the 954 * count indicates that there are pending requests for this pool. 955 */ 956 static SVCMASTERXPRT * 957 svc_xprt_qget(SVCPOOL *pool) 958 { 959 SVCMASTERXPRT *xprt; 960 961 mutex_enter(&pool->p_qend_lock); 962 do { 963 /* 964 * If the queue is empty return NULL. 965 * Since we do not acquire the pool's request lock which 966 * protects pool->p_qtop this is not exact check. However, 967 * this is safe - if we miss a request here svc_poll() 968 * will retry again. 969 */ 970 if (pool->p_qend == pool->p_qtop) { 971 mutex_exit(&pool->p_qend_lock); 972 return (NULL); 973 } 974 975 /* Get a hint and move pool->p_qend */ 976 xprt = pool->p_qend->q_xprt; 977 pool->p_qend = pool->p_qend->q_next; 978 979 /* Skip fields deleted by svc_xprt_qdelete() */ 980 } while (xprt == NULL); 981 mutex_exit(&pool->p_qend_lock); 982 983 return (xprt); 984 } 985 986 /* 987 * Reset an overflow in the xprt-ready queue after 988 * all the pending requests has been drained. 989 * This switches svc_poll back to getting hints from the 990 * xprt-ready queue. 991 * 992 * NOTICE: pool->p_qtop is protected by the the pool's request lock 993 * and the caller (svc_poll()) must hold the lock. 994 */ 995 static void 996 svc_xprt_qreset(SVCPOOL *pool) 997 { 998 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 999 1000 pool->p_qend = pool->p_qtop; 1001 pool->p_qoverflow = FALSE; 1002 } 1003 1004 /* 1005 * Delete all the references to a transport handle that 1006 * is being destroyed from the xprt-ready queue. 1007 * Deleted pointers are replaced with NULLs. 1008 */ 1009 static void 1010 svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt) 1011 { 1012 __SVCXPRT_QNODE *q = pool->p_qend; 1013 __SVCXPRT_QNODE *qtop = pool->p_qtop; 1014 1015 /* 1016 * Delete all the references to xprt between the current 1017 * position of pool->p_qend and current pool->p_qtop. 1018 */ 1019 for (;;) { 1020 if (q->q_xprt == xprt) 1021 q->q_xprt = NULL; 1022 if (q == qtop) 1023 return; 1024 q = q->q_next; 1025 } 1026 } 1027 1028 /* 1029 * Destructor for a master server transport handle. 1030 * - if there are no more non-detached threads linked to this transport 1031 * then, if requested, call xp_closeproc (we don't wait for detached 1032 * threads linked to this transport to complete). 1033 * - if there are no more threads linked to this 1034 * transport then 1035 * a) remove references to this transport from the xprt-ready queue 1036 * b) remove a reference to this transport from the pool's transport list 1037 * c) call a transport specific `destroy' function 1038 * d) cancel remaining thread reservations. 1039 * 1040 * NOTICE: Caller must hold the transport's thread lock. 1041 */ 1042 static void 1043 svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached) 1044 { 1045 ASSERT(MUTEX_HELD(&xprt->xp_thread_lock)); 1046 ASSERT(xprt->xp_wq == NULL); 1047 1048 /* 1049 * If called from the last non-detached thread 1050 * it should call the closeproc on this transport. 1051 */ 1052 if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) { 1053 (*(xprt->xp_closeproc)) (xprt); 1054 } 1055 1056 if (xprt->xp_threads + xprt->xp_detached_threads > 0) 1057 mutex_exit(&xprt->xp_thread_lock); 1058 else { 1059 /* Remove references to xprt from the `xprt-ready' queue */ 1060 svc_xprt_qdelete(xprt->xp_pool, xprt); 1061 1062 /* Unregister xprt from the pool's transport list */ 1063 svc_xprt_unregister(xprt); 1064 svc_callout_free(xprt); 1065 SVC_DESTROY(xprt); 1066 } 1067 } 1068 1069 /* 1070 * Find a dispatch routine for a given prog/vers pair. 1071 * This function is called from svc_getreq() to search the callout 1072 * table for an entry with a matching RPC program number `prog' 1073 * and a version range that covers `vers'. 1074 * - if it finds a matching entry it returns pointer to the dispatch routine 1075 * - otherwise it returns NULL and, if `minp' or `maxp' are not NULL, 1076 * fills them with, respectively, lowest version and highest version 1077 * supported for the program `prog' 1078 */ 1079 static SVC_DISPATCH * 1080 svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers, 1081 rpcvers_t *vers_min, rpcvers_t *vers_max) 1082 { 1083 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1084 int i; 1085 1086 *vers_min = ~(rpcvers_t)0; 1087 *vers_max = 0; 1088 1089 for (i = 0; i < sct->sct_size; i++) { 1090 SVC_CALLOUT *sc = &sct->sct_sc[i]; 1091 1092 if (prog == sc->sc_prog) { 1093 if (vers >= sc->sc_versmin && vers <= sc->sc_versmax) 1094 return (sc->sc_dispatch); 1095 1096 if (*vers_max < sc->sc_versmax) 1097 *vers_max = sc->sc_versmax; 1098 if (*vers_min > sc->sc_versmin) 1099 *vers_min = sc->sc_versmin; 1100 } 1101 } 1102 1103 return (NULL); 1104 } 1105 1106 /* 1107 * Optionally free callout table allocated for this transport by 1108 * the service provider. 1109 */ 1110 static void 1111 svc_callout_free(SVCMASTERXPRT *xprt) 1112 { 1113 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1114 1115 if (sct->sct_free) { 1116 kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT)); 1117 kmem_free(sct, sizeof (SVC_CALLOUT_TABLE)); 1118 } 1119 } 1120 1121 /* 1122 * Send a reply to an RPC request 1123 * 1124 * PSARC 2003/523 Contract Private Interface 1125 * svc_sendreply 1126 * Changes must be reviewed by Solaris File Sharing 1127 * Changes must be communicated to contract-2003-523@sun.com 1128 */ 1129 bool_t 1130 svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results, 1131 const caddr_t xdr_location) 1132 { 1133 struct rpc_msg rply; 1134 1135 rply.rm_direction = REPLY; 1136 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1137 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1138 rply.acpted_rply.ar_stat = SUCCESS; 1139 rply.acpted_rply.ar_results.where = xdr_location; 1140 rply.acpted_rply.ar_results.proc = xdr_results; 1141 1142 return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply)); 1143 } 1144 1145 /* 1146 * No procedure error reply 1147 * 1148 * PSARC 2003/523 Contract Private Interface 1149 * svcerr_noproc 1150 * Changes must be reviewed by Solaris File Sharing 1151 * Changes must be communicated to contract-2003-523@sun.com 1152 */ 1153 void 1154 svcerr_noproc(const SVCXPRT *clone_xprt) 1155 { 1156 struct rpc_msg rply; 1157 1158 rply.rm_direction = REPLY; 1159 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1160 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1161 rply.acpted_rply.ar_stat = PROC_UNAVAIL; 1162 SVC_FREERES((SVCXPRT *)clone_xprt); 1163 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1164 } 1165 1166 /* 1167 * Can't decode arguments error reply 1168 * 1169 * PSARC 2003/523 Contract Private Interface 1170 * svcerr_decode 1171 * Changes must be reviewed by Solaris File Sharing 1172 * Changes must be communicated to contract-2003-523@sun.com 1173 */ 1174 void 1175 svcerr_decode(const SVCXPRT *clone_xprt) 1176 { 1177 struct rpc_msg rply; 1178 1179 rply.rm_direction = REPLY; 1180 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1181 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1182 rply.acpted_rply.ar_stat = GARBAGE_ARGS; 1183 SVC_FREERES((SVCXPRT *)clone_xprt); 1184 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1185 } 1186 1187 /* 1188 * Some system error 1189 */ 1190 void 1191 svcerr_systemerr(const SVCXPRT *clone_xprt) 1192 { 1193 struct rpc_msg rply; 1194 1195 rply.rm_direction = REPLY; 1196 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1197 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1198 rply.acpted_rply.ar_stat = SYSTEM_ERR; 1199 SVC_FREERES((SVCXPRT *)clone_xprt); 1200 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1201 } 1202 1203 /* 1204 * Authentication error reply 1205 */ 1206 void 1207 svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why) 1208 { 1209 struct rpc_msg rply; 1210 1211 rply.rm_direction = REPLY; 1212 rply.rm_reply.rp_stat = MSG_DENIED; 1213 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1214 rply.rjcted_rply.rj_why = why; 1215 SVC_FREERES((SVCXPRT *)clone_xprt); 1216 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1217 } 1218 1219 /* 1220 * Authentication too weak error reply 1221 */ 1222 void 1223 svcerr_weakauth(const SVCXPRT *clone_xprt) 1224 { 1225 svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK); 1226 } 1227 1228 /* 1229 * Program unavailable error reply 1230 * 1231 * PSARC 2003/523 Contract Private Interface 1232 * svcerr_noprog 1233 * Changes must be reviewed by Solaris File Sharing 1234 * Changes must be communicated to contract-2003-523@sun.com 1235 */ 1236 void 1237 svcerr_noprog(const SVCXPRT *clone_xprt) 1238 { 1239 struct rpc_msg rply; 1240 1241 rply.rm_direction = REPLY; 1242 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1243 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1244 rply.acpted_rply.ar_stat = PROG_UNAVAIL; 1245 SVC_FREERES((SVCXPRT *)clone_xprt); 1246 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1247 } 1248 1249 /* 1250 * Program version mismatch error reply 1251 * 1252 * PSARC 2003/523 Contract Private Interface 1253 * svcerr_progvers 1254 * Changes must be reviewed by Solaris File Sharing 1255 * Changes must be communicated to contract-2003-523@sun.com 1256 */ 1257 void 1258 svcerr_progvers(const SVCXPRT *clone_xprt, 1259 const rpcvers_t low_vers, const rpcvers_t high_vers) 1260 { 1261 struct rpc_msg rply; 1262 1263 rply.rm_direction = REPLY; 1264 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1265 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1266 rply.acpted_rply.ar_stat = PROG_MISMATCH; 1267 rply.acpted_rply.ar_vers.low = low_vers; 1268 rply.acpted_rply.ar_vers.high = high_vers; 1269 SVC_FREERES((SVCXPRT *)clone_xprt); 1270 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1271 } 1272 1273 /* 1274 * Get server side input from some transport. 1275 * 1276 * Statement of authentication parameters management: 1277 * This function owns and manages all authentication parameters, specifically 1278 * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and 1279 * the "cooked" credentials (rqst->rq_clntcred). 1280 * However, this function does not know the structure of the cooked 1281 * credentials, so it make the following assumptions: 1282 * a) the structure is contiguous (no pointers), and 1283 * b) the cred structure size does not exceed RQCRED_SIZE bytes. 1284 * In all events, all three parameters are freed upon exit from this routine. 1285 * The storage is trivially managed on the call stack in user land, but 1286 * is malloced in kernel land. 1287 * 1288 * Note: the xprt's xp_svc_lock is not held while the service's dispatch 1289 * routine is running. If we decide to implement svc_unregister(), we'll 1290 * need to decide whether it's okay for a thread to unregister a service 1291 * while a request is being processed. If we decide that this is a 1292 * problem, we can probably use some sort of reference counting scheme to 1293 * keep the callout entry from going away until the request has completed. 1294 */ 1295 static void 1296 svc_getreq( 1297 SVCXPRT *clone_xprt, /* clone transport handle */ 1298 mblk_t *mp) 1299 { 1300 struct rpc_msg msg; 1301 struct svc_req r; 1302 char *cred_area; /* too big to allocate on call stack */ 1303 1304 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START, 1305 "svc_getreq_start:"); 1306 1307 ASSERT(clone_xprt->xp_master != NULL); 1308 1309 /* 1310 * Firstly, allocate the authentication parameters' storage 1311 */ 1312 mutex_enter(&rqcred_lock); 1313 if (rqcred_head) { 1314 cred_area = rqcred_head; 1315 1316 /* LINTED pointer alignment */ 1317 rqcred_head = *(caddr_t *)rqcred_head; 1318 mutex_exit(&rqcred_lock); 1319 } else { 1320 mutex_exit(&rqcred_lock); 1321 cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE, 1322 KM_SLEEP); 1323 } 1324 msg.rm_call.cb_cred.oa_base = cred_area; 1325 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1326 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1327 1328 /* 1329 * Now receive a message from the transport. 1330 */ 1331 if (SVC_RECV(clone_xprt, mp, &msg)) { 1332 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1333 rpcvers_t vers_min; 1334 rpcvers_t vers_max; 1335 bool_t no_dispatch; 1336 enum auth_stat why; 1337 1338 /* 1339 * Find the registered program and call its 1340 * dispatch routine. 1341 */ 1342 r.rq_xprt = clone_xprt; 1343 r.rq_prog = msg.rm_call.cb_prog; 1344 r.rq_vers = msg.rm_call.cb_vers; 1345 r.rq_proc = msg.rm_call.cb_proc; 1346 r.rq_cred = msg.rm_call.cb_cred; 1347 1348 /* 1349 * First authenticate the message. 1350 */ 1351 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START, 1352 "svc_getreq_auth_start:"); 1353 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) { 1354 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1355 "svc_getreq_auth_end:(%S)", "failed"); 1356 svcerr_auth(clone_xprt, why); 1357 /* 1358 * Free the arguments. 1359 */ 1360 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1361 } else if (no_dispatch) { 1362 /* 1363 * XXX - when bug id 4053736 is done, remove 1364 * the SVC_FREEARGS() call. 1365 */ 1366 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1367 } else { 1368 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1369 "svc_getreq_auth_end:(%S)", "good"); 1370 1371 dispatchroutine = svc_callout_find(clone_xprt, 1372 r.rq_prog, r.rq_vers, &vers_min, &vers_max); 1373 1374 if (dispatchroutine) { 1375 (*dispatchroutine) (&r, clone_xprt); 1376 } else { 1377 /* 1378 * If we got here, the program or version 1379 * is not served ... 1380 */ 1381 if (vers_max == 0 || 1382 version_keepquiet(clone_xprt)) 1383 svcerr_noprog(clone_xprt); 1384 else 1385 svcerr_progvers(clone_xprt, vers_min, 1386 vers_max); 1387 1388 /* 1389 * Free the arguments. For successful calls 1390 * this is done by the dispatch routine. 1391 */ 1392 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1393 /* Fall through to ... */ 1394 } 1395 /* 1396 * Call cleanup procedure for RPCSEC_GSS. 1397 * This is a hack since there is currently no 1398 * op, such as SVC_CLEANAUTH. rpc_gss_cleanup 1399 * should only be called for a non null proc. 1400 * Null procs in RPC GSS are overloaded to 1401 * provide context setup and control. The main 1402 * purpose of rpc_gss_cleanup is to decrement the 1403 * reference count associated with the cached 1404 * GSS security context. We should never get here 1405 * for an RPCSEC_GSS null proc since *no_dispatch 1406 * would have been set to true from sec_svc_msg above. 1407 */ 1408 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1409 rpc_gss_cleanup(clone_xprt); 1410 } 1411 } 1412 1413 /* 1414 * Free authentication parameters' storage 1415 */ 1416 mutex_enter(&rqcred_lock); 1417 /* LINTED pointer alignment */ 1418 *(caddr_t *)cred_area = rqcred_head; 1419 rqcred_head = cred_area; 1420 mutex_exit(&rqcred_lock); 1421 } 1422 1423 /* 1424 * Allocate new clone transport handle. 1425 */ 1426 static SVCXPRT * 1427 svc_clone_init(void) 1428 { 1429 SVCXPRT *clone_xprt; 1430 1431 clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP); 1432 clone_xprt->xp_cred = crget(); 1433 return (clone_xprt); 1434 } 1435 1436 /* 1437 * Free memory allocated by svc_clone_init. 1438 */ 1439 static void 1440 svc_clone_free(SVCXPRT *clone_xprt) 1441 { 1442 /* Fre credentials from crget() */ 1443 if (clone_xprt->xp_cred) 1444 crfree(clone_xprt->xp_cred); 1445 1446 kmem_free(clone_xprt, sizeof (SVCXPRT)); 1447 } 1448 1449 /* 1450 * Link a per-thread clone transport handle to a master 1451 * - increment a thread reference count on the master 1452 * - copy some of the master's fields to the clone 1453 * - call a transport specific clone routine. 1454 */ 1455 static void 1456 svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1457 { 1458 cred_t *cred = clone_xprt->xp_cred; 1459 1460 ASSERT(cred); 1461 1462 /* 1463 * Bump up master's thread count. 1464 * Linking a per-thread clone transport handle to a master 1465 * associates a service thread with the master. 1466 */ 1467 mutex_enter(&xprt->xp_thread_lock); 1468 xprt->xp_threads++; 1469 mutex_exit(&xprt->xp_thread_lock); 1470 1471 /* Clear everything */ 1472 bzero(clone_xprt, sizeof (SVCXPRT)); 1473 1474 /* Set pointer to the master transport stucture */ 1475 clone_xprt->xp_master = xprt; 1476 1477 /* Structure copy of all the common fields */ 1478 clone_xprt->xp_xpc = xprt->xp_xpc; 1479 1480 /* Restore per-thread fields (xp_cred) */ 1481 clone_xprt->xp_cred = cred; 1482 1483 /* 1484 * NOTICE: There is no transport-type specific code now. 1485 * If you want to add a transport-type specific cloning code 1486 * add one more operation (e.g. xp_clone()) to svc_ops, 1487 * implement it for each transport type, and call it here 1488 * through an appropriate macro (e.g. SVC_CLONE()). 1489 */ 1490 } 1491 1492 /* 1493 * Unlink a non-detached clone transport handle from a master 1494 * - decrement a thread reference count on the master 1495 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1496 * if this is the last non-detached/absolute thread on this transport 1497 * then it will close/destroy the transport 1498 * - call transport specific function to destroy the clone handle 1499 * - clear xp_master to avoid recursion. 1500 */ 1501 static void 1502 svc_clone_unlink(SVCXPRT *clone_xprt) 1503 { 1504 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1505 1506 /* This cannot be a detached thread */ 1507 ASSERT(!clone_xprt->xp_detached); 1508 ASSERT(xprt->xp_threads > 0); 1509 1510 /* Decrement a reference count on the transport */ 1511 mutex_enter(&xprt->xp_thread_lock); 1512 xprt->xp_threads--; 1513 1514 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1515 if (xprt->xp_wq) 1516 mutex_exit(&xprt->xp_thread_lock); 1517 else 1518 svc_xprt_cleanup(xprt, FALSE); 1519 1520 /* Call a transport specific clone `destroy' function */ 1521 SVC_CLONE_DESTROY(clone_xprt); 1522 1523 /* Clear xp_master */ 1524 clone_xprt->xp_master = NULL; 1525 } 1526 1527 /* 1528 * Unlink a detached clone transport handle from a master 1529 * - decrement the thread count on the master 1530 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1531 * if this is the last thread on this transport then it will destroy 1532 * the transport. 1533 * - call a transport specific function to destroy the clone handle 1534 * - clear xp_master to avoid recursion. 1535 */ 1536 static void 1537 svc_clone_unlinkdetached(SVCXPRT *clone_xprt) 1538 { 1539 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1540 1541 /* This must be a detached thread */ 1542 ASSERT(clone_xprt->xp_detached); 1543 ASSERT(xprt->xp_detached_threads > 0); 1544 ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0); 1545 1546 /* Grab xprt->xp_thread_lock and decrement link counts */ 1547 mutex_enter(&xprt->xp_thread_lock); 1548 xprt->xp_detached_threads--; 1549 1550 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1551 if (xprt->xp_wq) 1552 mutex_exit(&xprt->xp_thread_lock); 1553 else 1554 svc_xprt_cleanup(xprt, TRUE); 1555 1556 /* Call transport specific clone `destroy' function */ 1557 SVC_CLONE_DESTROY(clone_xprt); 1558 1559 /* Clear xp_master */ 1560 clone_xprt->xp_master = NULL; 1561 } 1562 1563 /* 1564 * Try to exit a non-detached service thread 1565 * - check if there are enough threads left 1566 * - if this thread (ie its clone transport handle) are linked 1567 * to a master transport then unlink it 1568 * - free the clone structure 1569 * - return to userland for thread exit 1570 * 1571 * If this is the last non-detached or the last thread on this 1572 * transport then the call to svc_clone_unlink() will, respectively, 1573 * close and/or destroy the transport. 1574 */ 1575 static void 1576 svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt) 1577 { 1578 if (clone_xprt->xp_master) 1579 svc_clone_unlink(clone_xprt); 1580 svc_clone_free(clone_xprt); 1581 1582 mutex_enter(&pool->p_thread_lock); 1583 pool->p_threads--; 1584 if (pool->p_closing && svc_pool_tryexit(pool)) 1585 /* return - thread exit will be handled at user level */ 1586 return; 1587 mutex_exit(&pool->p_thread_lock); 1588 1589 /* return - thread exit will be handled at user level */ 1590 } 1591 1592 /* 1593 * Exit a detached service thread that returned to svc_run 1594 * - decrement the `detached thread' count for the pool 1595 * - unlink the detached clone transport handle from the master 1596 * - free the clone structure 1597 * - return to userland for thread exit 1598 * 1599 * If this is the last thread on this transport then the call 1600 * to svc_clone_unlinkdetached() will destroy the transport. 1601 */ 1602 static void 1603 svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt) 1604 { 1605 /* This must be a detached thread */ 1606 ASSERT(clone_xprt->xp_master); 1607 ASSERT(clone_xprt->xp_detached); 1608 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 1609 1610 svc_clone_unlinkdetached(clone_xprt); 1611 svc_clone_free(clone_xprt); 1612 1613 mutex_enter(&pool->p_thread_lock); 1614 1615 ASSERT(pool->p_reserved_threads >= 0); 1616 ASSERT(pool->p_detached_threads > 0); 1617 1618 pool->p_detached_threads--; 1619 if (pool->p_closing && svc_pool_tryexit(pool)) 1620 /* return - thread exit will be handled at user level */ 1621 return; 1622 mutex_exit(&pool->p_thread_lock); 1623 1624 /* return - thread exit will be handled at user level */ 1625 } 1626 1627 /* 1628 * PSARC 2003/523 Contract Private Interface 1629 * svc_wait 1630 * Changes must be reviewed by Solaris File Sharing 1631 * Changes must be communicated to contract-2003-523@sun.com 1632 */ 1633 int 1634 svc_wait(int id) 1635 { 1636 SVCPOOL *pool; 1637 int err = 0; 1638 struct svc_globals *svc; 1639 1640 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 1641 mutex_enter(&svc->svc_plock); 1642 pool = svc_pool_find(svc, id); 1643 mutex_exit(&svc->svc_plock); 1644 1645 if (pool == NULL) 1646 return (ENOENT); 1647 1648 mutex_enter(&pool->p_user_lock); 1649 1650 /* Check if there's already a user thread waiting on this pool */ 1651 if (pool->p_user_waiting) { 1652 mutex_exit(&pool->p_user_lock); 1653 return (EBUSY); 1654 } 1655 1656 pool->p_user_waiting = TRUE; 1657 1658 /* Go to sleep, waiting for the signaled flag. */ 1659 while (!pool->p_signal_create_thread && !pool->p_user_exit) { 1660 if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) { 1661 /* Interrupted, return to handle exit or signal */ 1662 pool->p_user_waiting = FALSE; 1663 pool->p_signal_create_thread = FALSE; 1664 mutex_exit(&pool->p_user_lock); 1665 1666 /* 1667 * Thread has been interrupted and therefore 1668 * the service daemon is leaving as well so 1669 * let's go ahead and remove the service 1670 * pool at this time. 1671 */ 1672 mutex_enter(&svc->svc_plock); 1673 svc_pool_unregister(svc, pool); 1674 mutex_exit(&svc->svc_plock); 1675 1676 return (EINTR); 1677 } 1678 } 1679 1680 pool->p_signal_create_thread = FALSE; 1681 pool->p_user_waiting = FALSE; 1682 1683 /* 1684 * About to exit the service pool. Set return value 1685 * to let the userland code know our intent. Signal 1686 * svc_thread_creator() so that it can clean up the 1687 * pool structure. 1688 */ 1689 if (pool->p_user_exit) { 1690 err = ECANCELED; 1691 cv_signal(&pool->p_user_cv); 1692 } 1693 1694 mutex_exit(&pool->p_user_lock); 1695 1696 /* Return to userland with error code, for possible thread creation. */ 1697 return (err); 1698 } 1699 1700 /* 1701 * `Service threads' creator thread. 1702 * The creator thread waits for a signal to create new thread. 1703 */ 1704 static void 1705 svc_thread_creator(SVCPOOL *pool) 1706 { 1707 callb_cpr_t cpr_info; /* CPR info for the creator thread */ 1708 1709 CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr, 1710 "svc_thread_creator"); 1711 1712 for (;;) { 1713 mutex_enter(&pool->p_creator_lock); 1714 1715 /* Check if someone set the exit flag */ 1716 if (pool->p_creator_exit) 1717 break; 1718 1719 /* Clear the `signaled' flag and go asleep */ 1720 pool->p_creator_signaled = FALSE; 1721 1722 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1723 cv_wait(&pool->p_creator_cv, &pool->p_creator_lock); 1724 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1725 1726 /* Check if someone signaled to exit */ 1727 if (pool->p_creator_exit) 1728 break; 1729 1730 mutex_exit(&pool->p_creator_lock); 1731 1732 mutex_enter(&pool->p_thread_lock); 1733 1734 /* 1735 * When the pool is in closing state and all the transports 1736 * are gone the creator should not create any new threads. 1737 */ 1738 if (pool->p_closing) { 1739 rw_enter(&pool->p_lrwlock, RW_READER); 1740 if (pool->p_lcount == 0) { 1741 rw_exit(&pool->p_lrwlock); 1742 mutex_exit(&pool->p_thread_lock); 1743 continue; 1744 } 1745 rw_exit(&pool->p_lrwlock); 1746 } 1747 1748 /* 1749 * Create a new service thread now. 1750 */ 1751 ASSERT(pool->p_reserved_threads >= 0); 1752 ASSERT(pool->p_detached_threads >= 0); 1753 1754 if (pool->p_threads + pool->p_detached_threads < 1755 pool->p_maxthreads) { 1756 /* 1757 * Signal the service pool wait thread 1758 * only if it hasn't already been signaled. 1759 */ 1760 mutex_enter(&pool->p_user_lock); 1761 if (pool->p_signal_create_thread == FALSE) { 1762 pool->p_signal_create_thread = TRUE; 1763 cv_signal(&pool->p_user_cv); 1764 } 1765 mutex_exit(&pool->p_user_lock); 1766 1767 } 1768 1769 mutex_exit(&pool->p_thread_lock); 1770 } 1771 1772 /* 1773 * Pool is closed. Cleanup and exit. 1774 */ 1775 1776 /* Signal userland creator thread that it can stop now. */ 1777 mutex_enter(&pool->p_user_lock); 1778 pool->p_user_exit = TRUE; 1779 cv_broadcast(&pool->p_user_cv); 1780 mutex_exit(&pool->p_user_lock); 1781 1782 /* Wait for svc_wait() to be done with the pool */ 1783 mutex_enter(&pool->p_user_lock); 1784 while (pool->p_user_waiting) { 1785 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1786 cv_wait(&pool->p_user_cv, &pool->p_user_lock); 1787 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1788 } 1789 mutex_exit(&pool->p_user_lock); 1790 1791 CALLB_CPR_EXIT(&cpr_info); 1792 svc_pool_cleanup(pool); 1793 zthread_exit(); 1794 } 1795 1796 /* 1797 * If the creator thread is idle signal it to create 1798 * a new service thread. 1799 */ 1800 static void 1801 svc_creator_signal(SVCPOOL *pool) 1802 { 1803 mutex_enter(&pool->p_creator_lock); 1804 if (pool->p_creator_signaled == FALSE) { 1805 pool->p_creator_signaled = TRUE; 1806 cv_signal(&pool->p_creator_cv); 1807 } 1808 mutex_exit(&pool->p_creator_lock); 1809 } 1810 1811 /* 1812 * Notify the creator thread to clean up and exit. 1813 */ 1814 static void 1815 svc_creator_signalexit(SVCPOOL *pool) 1816 { 1817 mutex_enter(&pool->p_creator_lock); 1818 pool->p_creator_exit = TRUE; 1819 cv_signal(&pool->p_creator_cv); 1820 mutex_exit(&pool->p_creator_lock); 1821 } 1822 1823 /* 1824 * Polling part of the svc_run(). 1825 * - search for a transport with a pending request 1826 * - when one is found then latch the request lock and return to svc_run() 1827 * - if there is no request go asleep and wait for a signal 1828 * - handle two exceptions: 1829 * a) current transport is closing 1830 * b) timeout waiting for a new request 1831 * in both cases return to svc_run() 1832 */ 1833 static SVCMASTERXPRT * 1834 svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1835 { 1836 /* 1837 * Main loop iterates until 1838 * a) we find a pending request, 1839 * b) detect that the current transport is closing 1840 * c) time out waiting for a new request. 1841 */ 1842 for (;;) { 1843 SVCMASTERXPRT *next; 1844 clock_t timeleft; 1845 1846 /* 1847 * Step 1. 1848 * Check if there is a pending request on the current 1849 * transport handle so that we can avoid cloning. 1850 * If so then decrement the `pending-request' count for 1851 * the pool and return to svc_run(). 1852 * 1853 * We need to prevent a potential starvation. When 1854 * a selected transport has all pending requests coming in 1855 * all the time then the service threads will never switch to 1856 * another transport. With a limited number of service 1857 * threads some transports may be never serviced. 1858 * To prevent such a scenario we pick up at most 1859 * pool->p_max_same_xprt requests from the same transport 1860 * and then take a hint from the xprt-ready queue or walk 1861 * the transport list. 1862 */ 1863 if (xprt && xprt->xp_req_head && (!pool->p_qoverflow || 1864 clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) { 1865 mutex_enter(&xprt->xp_req_lock); 1866 if (xprt->xp_req_head) { 1867 mutex_enter(&pool->p_req_lock); 1868 pool->p_reqs--; 1869 mutex_exit(&pool->p_req_lock); 1870 1871 return (xprt); 1872 } 1873 mutex_exit(&xprt->xp_req_lock); 1874 } 1875 clone_xprt->xp_same_xprt = 0; 1876 1877 /* 1878 * Step 2. 1879 * If there is no request on the current transport try to 1880 * find another transport with a pending request. 1881 */ 1882 mutex_enter(&pool->p_req_lock); 1883 pool->p_walkers++; 1884 mutex_exit(&pool->p_req_lock); 1885 1886 /* 1887 * Make sure that transports will not be destroyed just 1888 * while we are checking them. 1889 */ 1890 rw_enter(&pool->p_lrwlock, RW_READER); 1891 1892 for (;;) { 1893 SVCMASTERXPRT *hint; 1894 1895 /* 1896 * Get the next transport from the xprt-ready queue. 1897 * This is a hint. There is no guarantee that the 1898 * transport still has a pending request since it 1899 * could be picked up by another thread in step 1. 1900 * 1901 * If the transport has a pending request then keep 1902 * it locked. Decrement the `pending-requests' for 1903 * the pool and `walking-threads' counts, and return 1904 * to svc_run(). 1905 */ 1906 hint = svc_xprt_qget(pool); 1907 1908 if (hint && hint->xp_req_head) { 1909 mutex_enter(&hint->xp_req_lock); 1910 if (hint->xp_req_head) { 1911 rw_exit(&pool->p_lrwlock); 1912 1913 mutex_enter(&pool->p_req_lock); 1914 pool->p_reqs--; 1915 pool->p_walkers--; 1916 mutex_exit(&pool->p_req_lock); 1917 1918 return (hint); 1919 } 1920 mutex_exit(&hint->xp_req_lock); 1921 } 1922 1923 /* 1924 * If there was no hint in the xprt-ready queue then 1925 * - if there is less pending requests than polling 1926 * threads go asleep 1927 * - otherwise check if there was an overflow in the 1928 * xprt-ready queue; if so, then we need to break 1929 * the `drain' mode 1930 */ 1931 if (hint == NULL) { 1932 if (pool->p_reqs < pool->p_walkers) { 1933 mutex_enter(&pool->p_req_lock); 1934 if (pool->p_reqs < pool->p_walkers) 1935 goto sleep; 1936 mutex_exit(&pool->p_req_lock); 1937 } 1938 if (pool->p_qoverflow) { 1939 break; 1940 } 1941 } 1942 } 1943 1944 /* 1945 * If there was an overflow in the xprt-ready queue then we 1946 * need to switch to the `drain' mode, i.e. walk through the 1947 * pool's transport list and search for a transport with a 1948 * pending request. If we manage to drain all the pending 1949 * requests then we can clear the overflow flag. This will 1950 * switch svc_poll() back to taking hints from the xprt-ready 1951 * queue (which is generally more efficient). 1952 * 1953 * If there are no registered transports simply go asleep. 1954 */ 1955 if (xprt == NULL && pool->p_lhead == NULL) { 1956 mutex_enter(&pool->p_req_lock); 1957 goto sleep; 1958 } 1959 1960 /* 1961 * `Walk' through the pool's list of master server 1962 * transport handles. Continue to loop until there are less 1963 * looping threads then pending requests. 1964 */ 1965 next = xprt ? xprt->xp_next : pool->p_lhead; 1966 1967 for (;;) { 1968 /* 1969 * Check if there is a request on this transport. 1970 * 1971 * Since blocking on a locked mutex is very expensive 1972 * check for a request without a lock first. If we miss 1973 * a request that is just being delivered but this will 1974 * cost at most one full walk through the list. 1975 */ 1976 if (next->xp_req_head) { 1977 /* 1978 * Check again, now with a lock. 1979 */ 1980 mutex_enter(&next->xp_req_lock); 1981 if (next->xp_req_head) { 1982 rw_exit(&pool->p_lrwlock); 1983 1984 mutex_enter(&pool->p_req_lock); 1985 pool->p_reqs--; 1986 pool->p_walkers--; 1987 mutex_exit(&pool->p_req_lock); 1988 1989 return (next); 1990 } 1991 mutex_exit(&next->xp_req_lock); 1992 } 1993 1994 /* 1995 * Continue to `walk' through the pool's 1996 * transport list until there is less requests 1997 * than walkers. Check this condition without 1998 * a lock first to avoid contention on a mutex. 1999 */ 2000 if (pool->p_reqs < pool->p_walkers) { 2001 /* 2002 * Check again, now with the lock. 2003 * If all the pending requests have been 2004 * picked up than clear the overflow flag. 2005 */ 2006 mutex_enter(&pool->p_req_lock); 2007 if (pool->p_reqs <= 0) 2008 svc_xprt_qreset(pool); 2009 if (pool->p_reqs < pool->p_walkers) 2010 break; /* goto sleep */ 2011 mutex_exit(&pool->p_req_lock); 2012 } 2013 2014 next = next->xp_next; 2015 } 2016 2017 sleep: 2018 /* 2019 * No work to do. Stop the `walk' and go asleep. 2020 * Decrement the `walking-threads' count for the pool. 2021 */ 2022 pool->p_walkers--; 2023 rw_exit(&pool->p_lrwlock); 2024 2025 /* 2026 * Count us as asleep, mark this thread as safe 2027 * for suspend and wait for a request. 2028 */ 2029 pool->p_asleep++; 2030 timeleft = cv_timedwait_sig(&pool->p_req_cv, &pool->p_req_lock, 2031 pool->p_timeout + lbolt); 2032 2033 /* 2034 * If the drowsy flag is on this means that 2035 * someone has signaled a wakeup. In such a case 2036 * the `asleep-threads' count has already updated 2037 * so just clear the flag. 2038 * 2039 * If the drowsy flag is off then we need to update 2040 * the `asleep-threads' count. 2041 */ 2042 if (pool->p_drowsy) { 2043 pool->p_drowsy = FALSE; 2044 /* 2045 * If the thread is here because it timedout, 2046 * instead of returning SVC_ETIMEDOUT, it is 2047 * time to do some more work. 2048 */ 2049 if (timeleft == -1) 2050 timeleft = 1; 2051 } else { 2052 pool->p_asleep--; 2053 } 2054 mutex_exit(&pool->p_req_lock); 2055 2056 /* 2057 * If we received a signal while waiting for a 2058 * request, inform svc_run(), so that we can return 2059 * to user level and restart the call. 2060 */ 2061 if (timeleft == 0) 2062 return (SVC_EINTR); 2063 2064 /* 2065 * If the current transport is gone then notify 2066 * svc_run() to unlink from it. 2067 */ 2068 if (xprt && xprt->xp_wq == NULL) 2069 return (SVC_EXPRTGONE); 2070 2071 /* 2072 * If we have timed out waiting for a request inform 2073 * svc_run() that we probably don't need this thread. 2074 */ 2075 if (timeleft == -1) 2076 return (SVC_ETIMEDOUT); 2077 } 2078 } 2079 2080 /* 2081 * Main loop of the kernel RPC server 2082 * - wait for input (find a transport with a pending request). 2083 * - dequeue the request 2084 * - call a registered server routine to process the requests 2085 * 2086 * There can many threads running concurrently in this loop 2087 * on the same or on different transports. 2088 */ 2089 static int 2090 svc_run(SVCPOOL *pool) 2091 { 2092 SVCMASTERXPRT *xprt = NULL; /* master transport handle */ 2093 SVCXPRT *clone_xprt; /* clone for this thread */ 2094 struct svc_globals *svc; 2095 proc_t *p = ttoproc(curthread); 2096 2097 /* Allocate a clone transport handle for this thread */ 2098 clone_xprt = svc_clone_init(); 2099 2100 /* 2101 * The loop iterates until the thread becomes 2102 * idle too long or the transport is gone. 2103 */ 2104 for (;;) { 2105 SVCMASTERXPRT *next; 2106 mblk_t *mp; 2107 2108 TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run"); 2109 2110 /* 2111 * If the process is exiting/killed, return 2112 * immediately without processing any more 2113 * requests. 2114 */ 2115 if (p->p_flag & (SEXITING | SKILLED)) { 2116 svc_thread_exit(pool, clone_xprt); 2117 2118 /* 2119 * Thread has been interrupted and therefore 2120 * the service daemon is leaving as well so 2121 * let's go ahead and remove the service 2122 * pool at this time. 2123 */ 2124 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2125 mutex_enter(&svc->svc_plock); 2126 svc_pool_unregister(svc, pool); 2127 mutex_exit(&svc->svc_plock); 2128 2129 return (0); 2130 } 2131 2132 /* Find a transport with a pending request */ 2133 next = svc_poll(pool, xprt, clone_xprt); 2134 2135 /* 2136 * If svc_poll() finds a transport with a request 2137 * it latches xp_req_lock on it. Therefore we need 2138 * to dequeue the request and release the lock as 2139 * soon as possible. 2140 */ 2141 ASSERT(next != NULL && 2142 (next == SVC_EXPRTGONE || 2143 next == SVC_ETIMEDOUT || 2144 next == SVC_EINTR || 2145 MUTEX_HELD(&next->xp_req_lock))); 2146 2147 /* Ooops! Current transport is closing. Unlink now */ 2148 if (next == SVC_EXPRTGONE) { 2149 svc_clone_unlink(clone_xprt); 2150 xprt = NULL; 2151 continue; 2152 } 2153 2154 /* Ooops! Timeout while waiting for a request. Exit */ 2155 if (next == SVC_ETIMEDOUT) { 2156 svc_thread_exit(pool, clone_xprt); 2157 return (0); 2158 } 2159 2160 /* 2161 * Interrupted by a signal while waiting for a 2162 * request. Return to userspace and restart. 2163 */ 2164 if (next == SVC_EINTR) { 2165 svc_thread_exit(pool, clone_xprt); 2166 2167 /* 2168 * Thread has been interrupted and therefore 2169 * the service daemon is leaving as well so 2170 * let's go ahead and remove the service 2171 * pool at this time. 2172 */ 2173 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2174 mutex_enter(&svc->svc_plock); 2175 svc_pool_unregister(svc, pool); 2176 mutex_exit(&svc->svc_plock); 2177 2178 return (EINTR); 2179 } 2180 2181 /* 2182 * De-queue the request and release the request lock 2183 * on this transport (latched by svc_poll()). 2184 */ 2185 mp = next->xp_req_head; 2186 next->xp_req_head = mp->b_next; 2187 mp->b_next = (mblk_t *)0; 2188 2189 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ, 2190 "rpc_que_req_deq:pool %p mp %p", pool, mp); 2191 mutex_exit(&next->xp_req_lock); 2192 2193 /* 2194 * If this is a new request on a current transport then 2195 * the clone structure is already properly initialized. 2196 * Otherwise, if the request is on a different transport, 2197 * unlink from the current master and link to 2198 * the one we got a request on. 2199 */ 2200 if (next != xprt) { 2201 if (xprt) 2202 svc_clone_unlink(clone_xprt); 2203 svc_clone_link(next, clone_xprt); 2204 xprt = next; 2205 } 2206 2207 /* 2208 * If there are more requests and req_cv hasn't 2209 * been signaled yet then wake up one more thread now. 2210 * 2211 * We avoid signaling req_cv until the most recently 2212 * signaled thread wakes up and gets CPU to clear 2213 * the `drowsy' flag. 2214 */ 2215 if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2216 pool->p_asleep == 0)) { 2217 mutex_enter(&pool->p_req_lock); 2218 2219 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2220 pool->p_asleep == 0) 2221 mutex_exit(&pool->p_req_lock); 2222 else { 2223 pool->p_asleep--; 2224 pool->p_drowsy = TRUE; 2225 2226 cv_signal(&pool->p_req_cv); 2227 mutex_exit(&pool->p_req_lock); 2228 } 2229 } 2230 2231 /* 2232 * If there are no asleep/signaled threads, we are 2233 * still below pool->p_maxthreads limit, and no thread is 2234 * currently being created then signal the creator 2235 * for one more service thread. 2236 * 2237 * The asleep and drowsy checks are not protected 2238 * by a lock since it hurts performance and a wrong 2239 * decision is not essential. 2240 */ 2241 if (pool->p_asleep == 0 && !pool->p_drowsy && 2242 pool->p_threads + pool->p_detached_threads < 2243 pool->p_maxthreads) 2244 svc_creator_signal(pool); 2245 2246 /* 2247 * Process the request. 2248 */ 2249 svc_getreq(clone_xprt, mp); 2250 2251 /* If thread had a reservation it should have been canceled */ 2252 ASSERT(!clone_xprt->xp_reserved); 2253 2254 /* 2255 * If the clone is marked detached then exit. 2256 * The rpcmod slot has already been released 2257 * when we detached this thread. 2258 */ 2259 if (clone_xprt->xp_detached) { 2260 svc_thread_exitdetached(pool, clone_xprt); 2261 return (0); 2262 } 2263 2264 /* 2265 * Release our reference on the rpcmod 2266 * slot attached to xp_wq->q_ptr. 2267 */ 2268 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2269 } 2270 /* NOTREACHED */ 2271 } 2272 2273 /* 2274 * Flush any pending requests for the queue and 2275 * and free the associated mblks. 2276 */ 2277 void 2278 svc_queueclean(queue_t *q) 2279 { 2280 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2281 mblk_t *mp; 2282 2283 /* 2284 * clean up the requests 2285 */ 2286 mutex_enter(&xprt->xp_req_lock); 2287 while ((mp = xprt->xp_req_head) != NULL) { 2288 xprt->xp_req_head = mp->b_next; 2289 mp->b_next = (mblk_t *)0; 2290 (*RELE_PROC(xprt)) (xprt->xp_wq, mp); 2291 } 2292 mutex_exit(&xprt->xp_req_lock); 2293 } 2294 2295 /* 2296 * This routine is called by rpcmod to inform kernel RPC that a 2297 * queue is closing. It is called after all the requests have been 2298 * picked up (that is after all the slots on the queue have 2299 * been released by kernel RPC). It is also guaranteed that no more 2300 * request will be delivered on this transport. 2301 * 2302 * - clear xp_wq to mark the master server transport handle as closing 2303 * - if there are no more threads on this transport close/destroy it 2304 * - otherwise, broadcast threads sleeping in svc_poll(); the last 2305 * thread will close/destroy the transport. 2306 */ 2307 void 2308 svc_queueclose(queue_t *q) 2309 { 2310 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2311 2312 if (xprt == NULL) { 2313 /* 2314 * If there is no master xprt associated with this stream, 2315 * then there is nothing to do. This happens regularly 2316 * with connection-oriented listening streams created by 2317 * nfsd. 2318 */ 2319 return; 2320 } 2321 2322 mutex_enter(&xprt->xp_thread_lock); 2323 2324 ASSERT(xprt->xp_req_head == NULL); 2325 ASSERT(xprt->xp_wq != NULL); 2326 2327 xprt->xp_wq = NULL; 2328 2329 if (xprt->xp_threads == 0) { 2330 SVCPOOL *pool = xprt->xp_pool; 2331 2332 /* 2333 * svc_xprt_cleanup() destroys the transport 2334 * or releases the transport thread lock 2335 */ 2336 svc_xprt_cleanup(xprt, FALSE); 2337 2338 mutex_enter(&pool->p_thread_lock); 2339 2340 /* 2341 * If the pool is in closing state and this was 2342 * the last transport in the pool then signal the creator 2343 * thread to clean up and exit. 2344 */ 2345 if (pool->p_closing && svc_pool_tryexit(pool)) { 2346 return; 2347 } 2348 mutex_exit(&pool->p_thread_lock); 2349 } else { 2350 /* 2351 * Wakeup threads sleeping in svc_poll() so that they 2352 * unlink from the transport 2353 */ 2354 mutex_enter(&xprt->xp_pool->p_req_lock); 2355 cv_broadcast(&xprt->xp_pool->p_req_cv); 2356 mutex_exit(&xprt->xp_pool->p_req_lock); 2357 2358 /* 2359 * NOTICE: No references to the master transport structure 2360 * beyond this point! 2361 */ 2362 mutex_exit(&xprt->xp_thread_lock); 2363 } 2364 } 2365 2366 /* 2367 * Interrupt `request delivery' routine called from rpcmod 2368 * - put a request at the tail of the transport request queue 2369 * - insert a hint for svc_poll() into the xprt-ready queue 2370 * - increment the `pending-requests' count for the pool 2371 * - wake up a thread sleeping in svc_poll() if necessary 2372 * - if all the threads are running ask the creator for a new one. 2373 */ 2374 void 2375 svc_queuereq(queue_t *q, mblk_t *mp) 2376 { 2377 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2378 SVCPOOL *pool = xprt->xp_pool; 2379 2380 TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start"); 2381 2382 /* 2383 * Step 1. 2384 * Grab the transport's request lock and put 2385 * the request at the tail of the transport's 2386 * request queue. 2387 */ 2388 mutex_enter(&xprt->xp_req_lock); 2389 if (xprt->xp_req_head == NULL) 2390 xprt->xp_req_head = mp; 2391 else 2392 xprt->xp_req_tail->b_next = mp; 2393 xprt->xp_req_tail = mp; 2394 2395 mutex_exit(&xprt->xp_req_lock); 2396 2397 /* 2398 * Step 2. 2399 * Grab the pool request lock, insert a hint into 2400 * the xprt-ready queue, increment `pending-requests' 2401 * count for the pool, and wake up a thread sleeping 2402 * in svc_poll() if necessary. 2403 */ 2404 mutex_enter(&pool->p_req_lock); 2405 2406 /* Insert pointer to this transport into the xprt-ready queue */ 2407 svc_xprt_qput(pool, xprt); 2408 2409 /* Increment the `pending-requests' count for the pool */ 2410 pool->p_reqs++; 2411 2412 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ, 2413 "rpc_que_req_enq:pool %p mp %p", pool, mp); 2414 2415 /* 2416 * If there are more requests and req_cv hasn't 2417 * been signaled yet then wake up one more thread now. 2418 * 2419 * We avoid signaling req_cv until the most recently 2420 * signaled thread wakes up and gets CPU to clear 2421 * the `drowsy' flag. 2422 */ 2423 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2424 pool->p_asleep == 0) { 2425 mutex_exit(&pool->p_req_lock); 2426 } else { 2427 pool->p_drowsy = TRUE; 2428 pool->p_asleep--; 2429 2430 /* 2431 * Signal wakeup and drop the request lock. 2432 */ 2433 cv_signal(&pool->p_req_cv); 2434 mutex_exit(&pool->p_req_lock); 2435 } 2436 2437 /* 2438 * Step 3. 2439 * If there are no asleep/signaled threads, we are 2440 * still below pool->p_maxthreads limit, and no thread is 2441 * currently being created then signal the creator 2442 * for one more service thread. 2443 * 2444 * The asleep and drowsy checks are not not protected 2445 * by a lock since it hurts performance and a wrong 2446 * decision is not essential. 2447 */ 2448 if (pool->p_asleep == 0 && !pool->p_drowsy && 2449 pool->p_threads + pool->p_detached_threads < pool->p_maxthreads) 2450 svc_creator_signal(pool); 2451 2452 TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END, 2453 "svc_queuereq_end:(%S)", "end"); 2454 } 2455 2456 /* 2457 * Reserve a service thread so that it can be detached later. 2458 * This reservation is required to make sure that when it tries to 2459 * detach itself the total number of detached threads does not exceed 2460 * pool->p_maxthreads - pool->p_redline (i.e. that we can have 2461 * up to pool->p_redline non-detached threads). 2462 * 2463 * If the thread does not detach itself later, it should cancel the 2464 * reservation before returning to svc_run(). 2465 * 2466 * - check if there is room for more reserved/detached threads 2467 * - if so, then increment the `reserved threads' count for the pool 2468 * - mark the thread as reserved (setting the flag in the clone transport 2469 * handle for this thread 2470 * - returns 1 if the reservation succeeded, 0 if it failed. 2471 */ 2472 int 2473 svc_reserve_thread(SVCXPRT *clone_xprt) 2474 { 2475 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2476 2477 /* Recursive reservations are not allowed */ 2478 ASSERT(!clone_xprt->xp_reserved); 2479 ASSERT(!clone_xprt->xp_detached); 2480 2481 /* Check pool counts if there is room for reservation */ 2482 mutex_enter(&pool->p_thread_lock); 2483 if (pool->p_reserved_threads + pool->p_detached_threads >= 2484 pool->p_maxthreads - pool->p_redline) { 2485 mutex_exit(&pool->p_thread_lock); 2486 return (0); 2487 } 2488 pool->p_reserved_threads++; 2489 mutex_exit(&pool->p_thread_lock); 2490 2491 /* Mark the thread (clone handle) as reserved */ 2492 clone_xprt->xp_reserved = TRUE; 2493 2494 return (1); 2495 } 2496 2497 /* 2498 * Cancel a reservation for a thread. 2499 * - decrement the `reserved threads' count for the pool 2500 * - clear the flag in the clone transport handle for this thread. 2501 */ 2502 void 2503 svc_unreserve_thread(SVCXPRT *clone_xprt) 2504 { 2505 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2506 2507 /* Thread must have a reservation */ 2508 ASSERT(clone_xprt->xp_reserved); 2509 ASSERT(!clone_xprt->xp_detached); 2510 2511 /* Decrement global count */ 2512 mutex_enter(&pool->p_thread_lock); 2513 pool->p_reserved_threads--; 2514 mutex_exit(&pool->p_thread_lock); 2515 2516 /* Clear reservation flag */ 2517 clone_xprt->xp_reserved = FALSE; 2518 } 2519 2520 /* 2521 * Detach a thread from its transport, so that it can block for an 2522 * extended time. Because the transport can be closed after the thread is 2523 * detached, the thread should have already sent off a reply if it was 2524 * going to send one. 2525 * 2526 * - decrement `non-detached threads' count and increment `detached threads' 2527 * counts for the transport 2528 * - decrement the `non-detached threads' and `reserved threads' 2529 * counts and increment the `detached threads' count for the pool 2530 * - release the rpcmod slot 2531 * - mark the clone (thread) as detached. 2532 * 2533 * No need to return a pointer to the thread's CPR information, since 2534 * the thread has a userland identity. 2535 * 2536 * NOTICE: a thread must not detach itself without making a prior reservation 2537 * through svc_thread_reserve(). 2538 */ 2539 callb_cpr_t * 2540 svc_detach_thread(SVCXPRT *clone_xprt) 2541 { 2542 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 2543 SVCPOOL *pool = xprt->xp_pool; 2544 2545 /* Thread must have a reservation */ 2546 ASSERT(clone_xprt->xp_reserved); 2547 ASSERT(!clone_xprt->xp_detached); 2548 2549 /* Bookkeeping for this transport */ 2550 mutex_enter(&xprt->xp_thread_lock); 2551 xprt->xp_threads--; 2552 xprt->xp_detached_threads++; 2553 mutex_exit(&xprt->xp_thread_lock); 2554 2555 /* Bookkeeping for the pool */ 2556 mutex_enter(&pool->p_thread_lock); 2557 pool->p_threads--; 2558 pool->p_reserved_threads--; 2559 pool->p_detached_threads++; 2560 mutex_exit(&pool->p_thread_lock); 2561 2562 /* Release an rpcmod slot for this request */ 2563 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2564 2565 /* Mark the clone (thread) as detached */ 2566 clone_xprt->xp_reserved = FALSE; 2567 clone_xprt->xp_detached = TRUE; 2568 2569 return (NULL); 2570 } 2571 2572 /* 2573 * This routine is responsible for extracting RDMA plugin master XPRT, 2574 * unregister from the SVCPOOL and initiate plugin specific cleanup. 2575 * It is passed a list/group of rdma transports as records which are 2576 * active in a given registered or unregistered kRPC thread pool. Its shuts 2577 * all active rdma transports in that pool. If the thread active on the trasport 2578 * happens to be last thread for that pool, it will signal the creater thread 2579 * to cleanup the pool and destroy the xprt in svc_queueclose() 2580 */ 2581 void 2582 rdma_stop(rdma_xprt_group_t rdma_xprts) 2583 { 2584 SVCMASTERXPRT *xprt; 2585 rdma_xprt_record_t *curr_rec; 2586 queue_t *q; 2587 mblk_t *mp; 2588 int i; 2589 2590 if (rdma_xprts.rtg_count == 0) 2591 return; 2592 2593 for (i = 0; i < rdma_xprts.rtg_count; i++) { 2594 curr_rec = rdma_xprts.rtg_listhead; 2595 rdma_xprts.rtg_listhead = curr_rec->rtr_next; 2596 curr_rec->rtr_next = NULL; 2597 xprt = curr_rec->rtr_xprt_ptr; 2598 q = xprt->xp_wq; 2599 svc_rdma_kstop(xprt); 2600 2601 mutex_enter(&xprt->xp_req_lock); 2602 while ((mp = xprt->xp_req_head) != NULL) { 2603 xprt->xp_req_head = mp->b_next; 2604 mp->b_next = (mblk_t *)0; 2605 if (mp) 2606 freemsg(mp); 2607 } 2608 mutex_exit(&xprt->xp_req_lock); 2609 svc_queueclose(q); 2610 #ifdef DEBUG 2611 if (rdma_check) 2612 cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n"); 2613 #endif 2614 /* 2615 * Free the rdma transport record for the expunged rdma 2616 * based master transport handle. 2617 */ 2618 kmem_free(curr_rec, sizeof (rdma_xprt_record_t)); 2619 if (!rdma_xprts.rtg_listhead) 2620 break; 2621 } 2622 } 2623