1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved. 29 */ 30 31 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 32 /* All Rights Reserved */ 33 34 /* 35 * Portions of this source code were derived from Berkeley 4.3 BSD 36 * under license from the Regents of the University of California. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * Server-side remote procedure call interface. 43 * 44 * Master transport handle (SVCMASTERXPRT). 45 * The master transport handle structure is shared among service 46 * threads processing events on the transport. Some fields in the 47 * master structure are protected by locks 48 * - xp_req_lock protects the request queue: 49 * xp_req_head, xp_req_tail 50 * - xp_thread_lock protects the thread (clone) counts 51 * xp_threads, xp_detached_threads, xp_wq 52 * Each master transport is registered to exactly one thread pool. 53 * 54 * Clone transport handle (SVCXPRT) 55 * The clone transport handle structure is a per-service-thread handle 56 * to the transport. The structure carries all the fields/buffers used 57 * for request processing. A service thread or, in other words, a clone 58 * structure, can be linked to an arbitrary master structure to process 59 * requests on this transport. The master handle keeps track of reference 60 * counts of threads (clones) linked to it. A service thread can switch 61 * to another transport by unlinking its clone handle from the current 62 * transport and linking to a new one. Switching is relatively inexpensive 63 * but it involves locking (master's xprt->xp_thread_lock). 64 * 65 * Pools. 66 * A pool represents a kernel RPC service (NFS, Lock Manager, etc.). 67 * Transports related to the service are registered to the service pool. 68 * Service threads can switch between different transports in the pool. 69 * Thus, each service has its own pool of service threads. The maximum 70 * number of threads in a pool is pool->p_maxthreads. This limit allows 71 * to restrict resource usage by the service. Some fields are protected 72 * by locks: 73 * - p_req_lock protects several counts and flags: 74 * p_reqs, p_walkers, p_asleep, p_drowsy, p_req_cv 75 * - p_thread_lock governs other thread counts: 76 * p_threads, p_detached_threads, p_reserved_threads, p_closing 77 * 78 * In addition, each pool contains a doubly-linked list of transports, 79 * an `xprt-ready' queue and a creator thread (see below). Threads in 80 * the pool share some other parameters such as stack size and 81 * polling timeout. 82 * 83 * Pools are initialized through the svc_pool_create() function called from 84 * the nfssys() system call. However, thread creation must be done by 85 * the userland agent. This is done by using SVCPOOL_WAIT and 86 * SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and 87 * svc_do_run(), respectively. Once the pool has been initialized, 88 * the userland process must set up a 'creator' thread. This thread 89 * should park itself in the kernel by calling svc_wait(). If 90 * svc_wait() returns successfully, it should fork off a new worker 91 * thread, which then calls svc_do_run() in order to get work. When 92 * that thread is complete, svc_do_run() will return, and the user 93 * program should call thr_exit(). 94 * 95 * When we try to register a new pool and there is an old pool with 96 * the same id in the doubly linked pool list (this happens when we kill 97 * and restart nfsd or lockd), then we unlink the old pool from the list 98 * and mark its state as `closing'. After that the transports can still 99 * process requests but new transports won't be registered. When all the 100 * transports and service threads associated with the pool are gone the 101 * creator thread (see below) will clean up the pool structure and exit. 102 * 103 * svc_queuereq() and svc_run(). 104 * The kernel RPC server is interrupt driven. The svc_queuereq() interrupt 105 * routine is called to deliver an RPC request. The service threads 106 * loop in svc_run(). The interrupt function queues a request on the 107 * transport's queue and it makes sure that the request is serviced. 108 * It may either wake up one of sleeping threads, or ask for a new thread 109 * to be created, or, if the previous request is just being picked up, do 110 * nothing. In the last case the service thread that is picking up the 111 * previous request will wake up or create the next thread. After a service 112 * thread processes a request and sends a reply it returns to svc_run() 113 * and svc_run() calls svc_poll() to find new input. 114 * 115 * There is an "inconsistent" but "safe" optimization in the 116 * svc_queuereq() code. The request is queued under the transport's 117 * request lock, while the `pending-requests' count is incremented 118 * independently under the pool request lock. Thus, a request can be picked 119 * up by a service thread before the counter is incremented. It may also 120 * happen that the service thread will win the race condition on the pool 121 * lock and it will decrement the count even before the interrupt thread 122 * increments it (so the count can be temporarily negative). 123 * 124 * svc_poll(). 125 * In order to avoid unnecessary locking, which causes performance 126 * problems, we always look for a pending request on the current transport. 127 * If there is none we take a hint from the pool's `xprt-ready' queue. 128 * If the queue had an overflow we switch to the `drain' mode checking 129 * each transport in the pool's transport list. Once we find a 130 * master transport handle with a pending request we latch the request 131 * lock on this transport and return to svc_run(). If the request 132 * belongs to a transport different than the one the service thread is 133 * linked to we need to unlink and link again. 134 * 135 * A service thread goes asleep when there are no pending 136 * requests on the transports registered on the pool's transports. 137 * All the pool's threads sleep on the same condition variable. 138 * If a thread has been sleeping for too long period of time 139 * (by default 5 seconds) it wakes up and exits. Also when a transport 140 * is closing sleeping threads wake up to unlink from this transport. 141 * 142 * The `xprt-ready' queue. 143 * If a service thread finds no request on a transport it is currently linked 144 * to it will find another transport with a pending request. To make 145 * this search more efficient each pool has an `xprt-ready' queue. 146 * The queue is a FIFO. When the interrupt routine queues a request it also 147 * inserts a pointer to the transport into the `xprt-ready' queue. A 148 * thread looking for a transport with a pending request can pop up a 149 * transport and check for a request. The request can be already gone 150 * since it could be taken by a thread linked to that transport. In such a 151 * case we try the next hint. The `xprt-ready' queue has fixed size (by 152 * default 256 nodes). If it overflows svc_poll() has to switch to the 153 * less efficient but safe `drain' mode and walk through the pool's 154 * transport list. 155 * 156 * Both the svc_poll() loop and the `xprt-ready' queue are optimized 157 * for the peak load case that is for the situation when the queue is not 158 * empty, there are all the time few pending requests, and a service 159 * thread which has just processed a request does not go asleep but picks 160 * up immediately the next request. 161 * 162 * Thread creator. 163 * Each pool has a thread creator associated with it. The creator thread 164 * sleeps on a condition variable and waits for a signal to create a 165 * service thread. The actual thread creation is done in userland by 166 * the method described in "Pools" above. 167 * 168 * Signaling threads should turn on the `creator signaled' flag, and 169 * can avoid sending signals when the flag is on. The flag is cleared 170 * when the thread is created. 171 * 172 * When the pool is in closing state (ie it has been already unregistered 173 * from the pool list) the last thread on the last transport in the pool 174 * should turn the p_creator_exit flag on. The creator thread will 175 * clean up the pool structure and exit. 176 * 177 * Thread reservation; Detaching service threads. 178 * A service thread can detach itself to block for an extended amount 179 * of time. However, to keep the service active we need to guarantee 180 * at least pool->p_redline non-detached threads that can process incoming 181 * requests. This, the maximum number of detached and reserved threads is 182 * p->p_maxthreads - p->p_redline. A service thread should first acquire 183 * a reservation, and if the reservation was granted it can detach itself. 184 * If a reservation was granted but the thread does not detach itself 185 * it should cancel the reservation before it returns to svc_run(). 186 */ 187 188 #include <sys/param.h> 189 #include <sys/types.h> 190 #include <rpc/types.h> 191 #include <sys/socket.h> 192 #include <sys/time.h> 193 #include <sys/tiuser.h> 194 #include <sys/t_kuser.h> 195 #include <netinet/in.h> 196 #include <rpc/xdr.h> 197 #include <rpc/auth.h> 198 #include <rpc/clnt.h> 199 #include <rpc/rpc_msg.h> 200 #include <rpc/svc.h> 201 #include <sys/proc.h> 202 #include <sys/user.h> 203 #include <sys/stream.h> 204 #include <sys/strsubr.h> 205 #include <sys/tihdr.h> 206 #include <sys/debug.h> 207 #include <sys/cmn_err.h> 208 #include <sys/file.h> 209 #include <sys/systm.h> 210 #include <sys/callb.h> 211 #include <sys/vtrace.h> 212 #include <sys/zone.h> 213 #include <nfs/nfs.h> 214 215 #define RQCRED_SIZE 400 /* this size is excessive */ 216 217 /* 218 * Defines for svc_poll() 219 */ 220 #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1) /* Transport is closing */ 221 #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2) /* Timeout */ 222 #define SVC_EINTR ((SVCMASTERXPRT *)3) /* Interrupted by signal */ 223 224 /* 225 * Default stack size for service threads. 226 */ 227 #define DEFAULT_SVC_RUN_STKSIZE (0) /* default kernel stack */ 228 229 int svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE; 230 231 /* 232 * Default polling timeout for service threads. 233 * Multiplied by hz when used. 234 */ 235 #define DEFAULT_SVC_POLL_TIMEOUT (5) /* seconds */ 236 237 clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT; 238 239 /* 240 * Size of the `xprt-ready' queue. 241 */ 242 #define DEFAULT_SVC_QSIZE (256) /* qnodes */ 243 244 size_t svc_default_qsize = DEFAULT_SVC_QSIZE; 245 246 /* 247 * Default limit for the number of service threads. 248 */ 249 #define DEFAULT_SVC_MAXTHREADS (INT16_MAX) 250 251 int svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS; 252 253 /* 254 * Maximum number of requests from the same transport (in `drain' mode). 255 */ 256 #define DEFAULT_SVC_MAX_SAME_XPRT (8) 257 258 int svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT; 259 260 261 /* 262 * Default `Redline' of non-detached threads. 263 * Total number of detached and reserved threads in an RPC server 264 * thread pool is limited to pool->p_maxthreads - svc_redline. 265 */ 266 #define DEFAULT_SVC_REDLINE (1) 267 268 int svc_default_redline = DEFAULT_SVC_REDLINE; 269 270 /* 271 * A node for the `xprt-ready' queue. 272 * See below. 273 */ 274 struct __svcxprt_qnode { 275 __SVCXPRT_QNODE *q_next; 276 SVCMASTERXPRT *q_xprt; 277 }; 278 279 /* 280 * Global SVC variables (private). 281 */ 282 struct svc_globals { 283 SVCPOOL *svc_pools; 284 kmutex_t svc_plock; 285 }; 286 287 /* 288 * Debug variable to check for rdma based 289 * transport startup and cleanup. Contorlled 290 * through /etc/system. Off by default. 291 */ 292 int rdma_check = 0; 293 294 /* 295 * Authentication parameters list. 296 */ 297 static caddr_t rqcred_head; 298 static kmutex_t rqcred_lock; 299 300 /* 301 * Pointers to transport specific `rele' routines in rpcmod (set from rpcmod). 302 */ 303 void (*rpc_rele)(queue_t *, mblk_t *) = NULL; 304 void (*mir_rele)(queue_t *, mblk_t *) = NULL; 305 306 /* ARGSUSED */ 307 void 308 rpc_rdma_rele(queue_t *q, mblk_t *mp) 309 { 310 } 311 void (*rdma_rele)(queue_t *, mblk_t *) = rpc_rdma_rele; 312 313 314 /* 315 * This macro picks which `rele' routine to use, based on the transport type. 316 */ 317 #define RELE_PROC(xprt) \ 318 ((xprt)->xp_type == T_RDMA ? rdma_rele : \ 319 (((xprt)->xp_type == T_CLTS) ? rpc_rele : mir_rele)) 320 321 /* 322 * If true, then keep quiet about version mismatch. 323 * This macro is for broadcast RPC only. We have no broadcast RPC in 324 * kernel now but one may define a flag in the transport structure 325 * and redefine this macro. 326 */ 327 #define version_keepquiet(xprt) (FALSE) 328 329 /* 330 * ZSD key used to retrieve zone-specific svc globals 331 */ 332 static zone_key_t svc_zone_key; 333 334 static void svc_callout_free(SVCMASTERXPRT *); 335 static void svc_xprt_qinit(SVCPOOL *, size_t); 336 static void svc_xprt_qdestroy(SVCPOOL *); 337 static void svc_thread_creator(SVCPOOL *); 338 static void svc_creator_signal(SVCPOOL *); 339 static void svc_creator_signalexit(SVCPOOL *); 340 static void svc_pool_unregister(struct svc_globals *, SVCPOOL *); 341 static int svc_run(SVCPOOL *); 342 343 /* ARGSUSED */ 344 static void * 345 svc_zoneinit(zoneid_t zoneid) 346 { 347 struct svc_globals *svc; 348 349 svc = kmem_alloc(sizeof (*svc), KM_SLEEP); 350 mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL); 351 svc->svc_pools = NULL; 352 return (svc); 353 } 354 355 /* ARGSUSED */ 356 static void 357 svc_zoneshutdown(zoneid_t zoneid, void *arg) 358 { 359 struct svc_globals *svc = arg; 360 SVCPOOL *pool; 361 362 mutex_enter(&svc->svc_plock); 363 while ((pool = svc->svc_pools) != NULL) { 364 svc_pool_unregister(svc, pool); 365 } 366 mutex_exit(&svc->svc_plock); 367 } 368 369 /* ARGSUSED */ 370 static void 371 svc_zonefini(zoneid_t zoneid, void *arg) 372 { 373 struct svc_globals *svc = arg; 374 375 ASSERT(svc->svc_pools == NULL); 376 mutex_destroy(&svc->svc_plock); 377 kmem_free(svc, sizeof (*svc)); 378 } 379 380 /* 381 * Global SVC init routine. 382 * Initialize global generic and transport type specific structures 383 * used by the kernel RPC server side. This routine is called only 384 * once when the module is being loaded. 385 */ 386 void 387 svc_init() 388 { 389 zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown, 390 svc_zonefini); 391 svc_cots_init(); 392 svc_clts_init(); 393 } 394 395 /* 396 * Destroy the SVCPOOL structure. 397 */ 398 static void 399 svc_pool_cleanup(SVCPOOL *pool) 400 { 401 ASSERT(pool->p_threads + pool->p_detached_threads == 0); 402 ASSERT(pool->p_lcount == 0); 403 ASSERT(pool->p_closing); 404 405 /* 406 * Call the user supplied shutdown function. This is done 407 * here so the user of the pool will be able to cleanup 408 * service related resources. 409 */ 410 if (pool->p_shutdown != NULL) 411 (pool->p_shutdown)(); 412 413 /* Destroy `xprt-ready' queue */ 414 svc_xprt_qdestroy(pool); 415 416 /* Destroy transport list */ 417 rw_destroy(&pool->p_lrwlock); 418 419 /* Destroy locks and condition variables */ 420 mutex_destroy(&pool->p_thread_lock); 421 mutex_destroy(&pool->p_req_lock); 422 cv_destroy(&pool->p_req_cv); 423 424 /* Destroy creator's locks and condition variables */ 425 mutex_destroy(&pool->p_creator_lock); 426 cv_destroy(&pool->p_creator_cv); 427 mutex_destroy(&pool->p_user_lock); 428 cv_destroy(&pool->p_user_cv); 429 430 /* Free pool structure */ 431 kmem_free(pool, sizeof (SVCPOOL)); 432 } 433 434 /* 435 * If all the transports and service threads are already gone 436 * signal the creator thread to clean up and exit. 437 */ 438 static bool_t 439 svc_pool_tryexit(SVCPOOL *pool) 440 { 441 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 442 ASSERT(pool->p_closing); 443 444 if (pool->p_threads + pool->p_detached_threads == 0) { 445 rw_enter(&pool->p_lrwlock, RW_READER); 446 if (pool->p_lcount == 0) { 447 /* 448 * Release the locks before sending a signal. 449 */ 450 rw_exit(&pool->p_lrwlock); 451 mutex_exit(&pool->p_thread_lock); 452 453 /* 454 * Notify the creator thread to clean up and exit 455 * 456 * NOTICE: No references to the pool beyond this point! 457 * The pool is being destroyed. 458 */ 459 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 460 svc_creator_signalexit(pool); 461 462 return (TRUE); 463 } 464 rw_exit(&pool->p_lrwlock); 465 } 466 467 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 468 return (FALSE); 469 } 470 471 /* 472 * Find a pool with a given id. 473 */ 474 static SVCPOOL * 475 svc_pool_find(struct svc_globals *svc, int id) 476 { 477 SVCPOOL *pool; 478 479 ASSERT(MUTEX_HELD(&svc->svc_plock)); 480 481 /* 482 * Search the list for a pool with a matching id 483 * and register the transport handle with that pool. 484 */ 485 for (pool = svc->svc_pools; pool; pool = pool->p_next) 486 if (pool->p_id == id) 487 return (pool); 488 489 return (NULL); 490 } 491 492 /* 493 * PSARC 2003/523 Contract Private Interface 494 * svc_do_run 495 * Changes must be reviewed by Solaris File Sharing 496 * Changes must be communicated to contract-2003-523@sun.com 497 */ 498 int 499 svc_do_run(int id) 500 { 501 SVCPOOL *pool; 502 int err = 0; 503 struct svc_globals *svc; 504 505 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 506 mutex_enter(&svc->svc_plock); 507 508 pool = svc_pool_find(svc, id); 509 510 mutex_exit(&svc->svc_plock); 511 512 if (pool == NULL) 513 return (ENOENT); 514 515 /* 516 * Increment counter of pool threads now 517 * that a thread has been created. 518 */ 519 mutex_enter(&pool->p_thread_lock); 520 pool->p_threads++; 521 mutex_exit(&pool->p_thread_lock); 522 523 /* Give work to the new thread. */ 524 err = svc_run(pool); 525 526 return (err); 527 } 528 529 /* 530 * Unregister a pool from the pool list. 531 * Set the closing state. If all the transports and service threads 532 * are already gone signal the creator thread to clean up and exit. 533 */ 534 static void 535 svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool) 536 { 537 SVCPOOL *next = pool->p_next; 538 SVCPOOL *prev = pool->p_prev; 539 540 ASSERT(MUTEX_HELD(&svc->svc_plock)); 541 542 /* Remove from the list */ 543 if (pool == svc->svc_pools) 544 svc->svc_pools = next; 545 if (next) 546 next->p_prev = prev; 547 if (prev) 548 prev->p_next = next; 549 pool->p_next = pool->p_prev = NULL; 550 551 /* 552 * Offline the pool. Mark the pool as closing. 553 * If there are no transports in this pool notify 554 * the creator thread to clean it up and exit. 555 */ 556 mutex_enter(&pool->p_thread_lock); 557 if (pool->p_offline != NULL) 558 (pool->p_offline)(); 559 pool->p_closing = TRUE; 560 if (svc_pool_tryexit(pool)) 561 return; 562 mutex_exit(&pool->p_thread_lock); 563 } 564 565 /* 566 * Register a pool with a given id in the global doubly linked pool list. 567 * - if there is a pool with the same id in the list then unregister it 568 * - insert the new pool into the list. 569 */ 570 static void 571 svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id) 572 { 573 SVCPOOL *old_pool; 574 575 /* 576 * If there is a pool with the same id then remove it from 577 * the list and mark the pool as closing. 578 */ 579 mutex_enter(&svc->svc_plock); 580 581 if (old_pool = svc_pool_find(svc, id)) 582 svc_pool_unregister(svc, old_pool); 583 584 /* Insert into the doubly linked list */ 585 pool->p_id = id; 586 pool->p_next = svc->svc_pools; 587 pool->p_prev = NULL; 588 if (svc->svc_pools) 589 svc->svc_pools->p_prev = pool; 590 svc->svc_pools = pool; 591 592 mutex_exit(&svc->svc_plock); 593 } 594 595 /* 596 * Initialize a newly created pool structure 597 */ 598 static int 599 svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline, 600 uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt) 601 { 602 klwp_t *lwp = ttolwp(curthread); 603 604 ASSERT(pool); 605 606 if (maxthreads == 0) 607 maxthreads = svc_default_maxthreads; 608 if (redline == 0) 609 redline = svc_default_redline; 610 if (qsize == 0) 611 qsize = svc_default_qsize; 612 if (timeout == 0) 613 timeout = svc_default_timeout; 614 if (stksize == 0) 615 stksize = svc_default_stksize; 616 if (max_same_xprt == 0) 617 max_same_xprt = svc_default_max_same_xprt; 618 619 if (maxthreads < redline) 620 return (EINVAL); 621 622 /* Allocate and initialize the `xprt-ready' queue */ 623 svc_xprt_qinit(pool, qsize); 624 625 /* Initialize doubly-linked xprt list */ 626 rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL); 627 628 /* 629 * Setting lwp_childstksz on the current lwp so that 630 * descendants of this lwp get the modified stacksize, if 631 * it is defined. It is important that either this lwp or 632 * one of its descendants do the actual servicepool thread 633 * creation to maintain the stacksize inheritance. 634 */ 635 if (lwp != NULL) 636 lwp->lwp_childstksz = stksize; 637 638 /* Initialize thread limits, locks and condition variables */ 639 pool->p_maxthreads = maxthreads; 640 pool->p_redline = redline; 641 pool->p_timeout = timeout * hz; 642 pool->p_stksize = stksize; 643 pool->p_max_same_xprt = max_same_xprt; 644 mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL); 645 mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL); 646 cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL); 647 648 /* Initialize userland creator */ 649 pool->p_user_exit = FALSE; 650 pool->p_signal_create_thread = FALSE; 651 pool->p_user_waiting = FALSE; 652 mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL); 653 cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL); 654 655 /* Initialize the creator and start the creator thread */ 656 pool->p_creator_exit = FALSE; 657 mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL); 658 cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL); 659 660 (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator, 661 pool, 0, minclsyspri); 662 663 return (0); 664 } 665 666 /* 667 * PSARC 2003/523 Contract Private Interface 668 * svc_pool_create 669 * Changes must be reviewed by Solaris File Sharing 670 * Changes must be communicated to contract-2003-523@sun.com 671 * 672 * Create an kernel RPC server-side thread/transport pool. 673 * 674 * This is public interface for creation of a server RPC thread pool 675 * for a given service provider. Transports registered with the pool's id 676 * will be served by a pool's threads. This function is called from the 677 * nfssys() system call. 678 */ 679 int 680 svc_pool_create(struct svcpool_args *args) 681 { 682 SVCPOOL *pool; 683 int error; 684 struct svc_globals *svc; 685 686 /* 687 * Caller should check credentials in a way appropriate 688 * in the context of the call. 689 */ 690 691 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 692 /* Allocate a new pool */ 693 pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP); 694 695 /* 696 * Initialize the pool structure and create a creator thread. 697 */ 698 error = svc_pool_init(pool, args->maxthreads, args->redline, 699 args->qsize, args->timeout, args->stksize, args->max_same_xprt); 700 701 if (error) { 702 kmem_free(pool, sizeof (SVCPOOL)); 703 return (error); 704 } 705 706 /* Register the pool with the global pool list */ 707 svc_pool_register(svc, pool, args->id); 708 709 return (0); 710 } 711 712 int 713 svc_pool_control(int id, int cmd, void *arg) 714 { 715 SVCPOOL *pool; 716 struct svc_globals *svc; 717 718 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 719 720 switch (cmd) { 721 case SVCPSET_SHUTDOWN_PROC: 722 /* 723 * Search the list for a pool with a matching id 724 * and register the transport handle with that pool. 725 */ 726 mutex_enter(&svc->svc_plock); 727 728 if ((pool = svc_pool_find(svc, id)) == NULL) { 729 mutex_exit(&svc->svc_plock); 730 return (ENOENT); 731 } 732 /* 733 * Grab the transport list lock before releasing the 734 * pool list lock 735 */ 736 rw_enter(&pool->p_lrwlock, RW_WRITER); 737 mutex_exit(&svc->svc_plock); 738 739 pool->p_shutdown = *((void (*)())arg); 740 741 rw_exit(&pool->p_lrwlock); 742 743 return (0); 744 case SVCPSET_UNREGISTER_PROC: 745 /* 746 * Search the list for a pool with a matching id 747 * and register the unregister callback handle with that pool. 748 */ 749 mutex_enter(&svc->svc_plock); 750 751 if ((pool = svc_pool_find(svc, id)) == NULL) { 752 mutex_exit(&svc->svc_plock); 753 return (ENOENT); 754 } 755 /* 756 * Grab the transport list lock before releasing the 757 * pool list lock 758 */ 759 rw_enter(&pool->p_lrwlock, RW_WRITER); 760 mutex_exit(&svc->svc_plock); 761 762 pool->p_offline = *((void (*)())arg); 763 764 rw_exit(&pool->p_lrwlock); 765 766 return (0); 767 default: 768 return (EINVAL); 769 } 770 } 771 772 /* 773 * Pool's transport list manipulation routines. 774 * - svc_xprt_register() 775 * - svc_xprt_unregister() 776 * 777 * svc_xprt_register() is called from svc_tli_kcreate() to 778 * insert a new master transport handle into the doubly linked 779 * list of server transport handles (one list per pool). 780 * 781 * The list is used by svc_poll(), when it operates in `drain' 782 * mode, to search for a next transport with a pending request. 783 */ 784 785 int 786 svc_xprt_register(SVCMASTERXPRT *xprt, int id) 787 { 788 SVCMASTERXPRT *prev, *next; 789 SVCPOOL *pool; 790 struct svc_globals *svc; 791 792 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 793 /* 794 * Search the list for a pool with a matching id 795 * and register the transport handle with that pool. 796 */ 797 mutex_enter(&svc->svc_plock); 798 799 if ((pool = svc_pool_find(svc, id)) == NULL) { 800 mutex_exit(&svc->svc_plock); 801 return (ENOENT); 802 } 803 804 /* Grab the transport list lock before releasing the pool list lock */ 805 rw_enter(&pool->p_lrwlock, RW_WRITER); 806 mutex_exit(&svc->svc_plock); 807 808 /* Don't register new transports when the pool is in closing state */ 809 if (pool->p_closing) { 810 rw_exit(&pool->p_lrwlock); 811 return (EBUSY); 812 } 813 814 /* 815 * Initialize xp_pool to point to the pool. 816 * We don't want to go through the pool list every time. 817 */ 818 xprt->xp_pool = pool; 819 820 /* 821 * Insert a transport handle into the list. 822 * The list head points to the most recently inserted transport. 823 */ 824 if (pool->p_lhead == NULL) 825 pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt; 826 else { 827 next = pool->p_lhead; 828 prev = pool->p_lhead->xp_prev; 829 830 xprt->xp_next = next; 831 xprt->xp_prev = prev; 832 833 pool->p_lhead = prev->xp_next = next->xp_prev = xprt; 834 } 835 836 /* Increment the transports count */ 837 pool->p_lcount++; 838 839 rw_exit(&pool->p_lrwlock); 840 return (0); 841 } 842 843 /* 844 * Called from svc_xprt_cleanup() to remove a master transport handle 845 * from the pool's list of server transports (when a transport is 846 * being destroyed). 847 */ 848 void 849 svc_xprt_unregister(SVCMASTERXPRT *xprt) 850 { 851 SVCPOOL *pool = xprt->xp_pool; 852 853 /* 854 * Unlink xprt from the list. 855 * If the list head points to this xprt then move it 856 * to the next xprt or reset to NULL if this is the last 857 * xprt in the list. 858 */ 859 rw_enter(&pool->p_lrwlock, RW_WRITER); 860 861 if (xprt == xprt->xp_next) 862 pool->p_lhead = NULL; 863 else { 864 SVCMASTERXPRT *next = xprt->xp_next; 865 SVCMASTERXPRT *prev = xprt->xp_prev; 866 867 next->xp_prev = prev; 868 prev->xp_next = next; 869 870 if (pool->p_lhead == xprt) 871 pool->p_lhead = next; 872 } 873 874 xprt->xp_next = xprt->xp_prev = NULL; 875 876 /* Decrement list count */ 877 pool->p_lcount--; 878 879 rw_exit(&pool->p_lrwlock); 880 } 881 882 static void 883 svc_xprt_qdestroy(SVCPOOL *pool) 884 { 885 mutex_destroy(&pool->p_qend_lock); 886 kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE)); 887 } 888 889 /* 890 * Initialize an `xprt-ready' queue for a given pool. 891 */ 892 static void 893 svc_xprt_qinit(SVCPOOL *pool, size_t qsize) 894 { 895 int i; 896 897 pool->p_qsize = qsize; 898 pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE), 899 KM_SLEEP); 900 901 for (i = 0; i < pool->p_qsize - 1; i++) 902 pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]); 903 904 pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]); 905 pool->p_qtop = &(pool->p_qbody[0]); 906 pool->p_qend = &(pool->p_qbody[0]); 907 908 mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL); 909 } 910 911 /* 912 * Called from the svc_queuereq() interrupt routine to queue 913 * a hint for svc_poll() which transport has a pending request. 914 * - insert a pointer to xprt into the xprt-ready queue (FIFO) 915 * - if the xprt-ready queue is full turn the overflow flag on. 916 * 917 * NOTICE: pool->p_qtop is protected by the the pool's request lock 918 * and the caller (svc_queuereq()) must hold the lock. 919 */ 920 static void 921 svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt) 922 { 923 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 924 925 /* If the overflow flag is there is nothing we can do */ 926 if (pool->p_qoverflow) 927 return; 928 929 /* If the queue is full turn the overflow flag on and exit */ 930 if (pool->p_qtop->q_next == pool->p_qend) { 931 mutex_enter(&pool->p_qend_lock); 932 if (pool->p_qtop->q_next == pool->p_qend) { 933 pool->p_qoverflow = TRUE; 934 mutex_exit(&pool->p_qend_lock); 935 return; 936 } 937 mutex_exit(&pool->p_qend_lock); 938 } 939 940 /* Insert a hint and move pool->p_qtop */ 941 pool->p_qtop->q_xprt = xprt; 942 pool->p_qtop = pool->p_qtop->q_next; 943 } 944 945 /* 946 * Called from svc_poll() to get a hint which transport has a 947 * pending request. Returns a pointer to a transport or NULL if the 948 * `xprt-ready' queue is empty. 949 * 950 * Since we do not acquire the pool's request lock while checking if 951 * the queue is empty we may miss a request that is just being delivered. 952 * However this is ok since svc_poll() will retry again until the 953 * count indicates that there are pending requests for this pool. 954 */ 955 static SVCMASTERXPRT * 956 svc_xprt_qget(SVCPOOL *pool) 957 { 958 SVCMASTERXPRT *xprt; 959 960 mutex_enter(&pool->p_qend_lock); 961 do { 962 /* 963 * If the queue is empty return NULL. 964 * Since we do not acquire the pool's request lock which 965 * protects pool->p_qtop this is not exact check. However, 966 * this is safe - if we miss a request here svc_poll() 967 * will retry again. 968 */ 969 if (pool->p_qend == pool->p_qtop) { 970 mutex_exit(&pool->p_qend_lock); 971 return (NULL); 972 } 973 974 /* Get a hint and move pool->p_qend */ 975 xprt = pool->p_qend->q_xprt; 976 pool->p_qend = pool->p_qend->q_next; 977 978 /* Skip fields deleted by svc_xprt_qdelete() */ 979 } while (xprt == NULL); 980 mutex_exit(&pool->p_qend_lock); 981 982 return (xprt); 983 } 984 985 /* 986 * Reset an overflow in the xprt-ready queue after 987 * all the pending requests has been drained. 988 * This switches svc_poll back to getting hints from the 989 * xprt-ready queue. 990 * 991 * NOTICE: pool->p_qtop is protected by the the pool's request lock 992 * and the caller (svc_poll()) must hold the lock. 993 */ 994 static void 995 svc_xprt_qreset(SVCPOOL *pool) 996 { 997 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 998 999 pool->p_qend = pool->p_qtop; 1000 pool->p_qoverflow = FALSE; 1001 } 1002 1003 /* 1004 * Delete all the references to a transport handle that 1005 * is being destroyed from the xprt-ready queue. 1006 * Deleted pointers are replaced with NULLs. 1007 */ 1008 static void 1009 svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt) 1010 { 1011 __SVCXPRT_QNODE *q = pool->p_qend; 1012 __SVCXPRT_QNODE *qtop = pool->p_qtop; 1013 1014 /* 1015 * Delete all the references to xprt between the current 1016 * position of pool->p_qend and current pool->p_qtop. 1017 */ 1018 for (;;) { 1019 if (q->q_xprt == xprt) 1020 q->q_xprt = NULL; 1021 if (q == qtop) 1022 return; 1023 q = q->q_next; 1024 } 1025 } 1026 1027 /* 1028 * Destructor for a master server transport handle. 1029 * - if there are no more non-detached threads linked to this transport 1030 * then, if requested, call xp_closeproc (we don't wait for detached 1031 * threads linked to this transport to complete). 1032 * - if there are no more threads linked to this 1033 * transport then 1034 * a) remove references to this transport from the xprt-ready queue 1035 * b) remove a reference to this transport from the pool's transport list 1036 * c) call a transport specific `destroy' function 1037 * d) cancel remaining thread reservations. 1038 * 1039 * NOTICE: Caller must hold the transport's thread lock. 1040 */ 1041 static void 1042 svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached) 1043 { 1044 ASSERT(MUTEX_HELD(&xprt->xp_thread_lock)); 1045 ASSERT(xprt->xp_wq == NULL); 1046 1047 /* 1048 * If called from the last non-detached thread 1049 * it should call the closeproc on this transport. 1050 */ 1051 if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) { 1052 (*(xprt->xp_closeproc)) (xprt); 1053 } 1054 1055 if (xprt->xp_threads + xprt->xp_detached_threads > 0) 1056 mutex_exit(&xprt->xp_thread_lock); 1057 else { 1058 /* Remove references to xprt from the `xprt-ready' queue */ 1059 svc_xprt_qdelete(xprt->xp_pool, xprt); 1060 1061 /* Unregister xprt from the pool's transport list */ 1062 svc_xprt_unregister(xprt); 1063 svc_callout_free(xprt); 1064 SVC_DESTROY(xprt); 1065 } 1066 } 1067 1068 /* 1069 * Find a dispatch routine for a given prog/vers pair. 1070 * This function is called from svc_getreq() to search the callout 1071 * table for an entry with a matching RPC program number `prog' 1072 * and a version range that covers `vers'. 1073 * - if it finds a matching entry it returns pointer to the dispatch routine 1074 * - otherwise it returns NULL and, if `minp' or `maxp' are not NULL, 1075 * fills them with, respectively, lowest version and highest version 1076 * supported for the program `prog' 1077 */ 1078 static SVC_DISPATCH * 1079 svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers, 1080 rpcvers_t *vers_min, rpcvers_t *vers_max) 1081 { 1082 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1083 int i; 1084 1085 *vers_min = ~(rpcvers_t)0; 1086 *vers_max = 0; 1087 1088 for (i = 0; i < sct->sct_size; i++) { 1089 SVC_CALLOUT *sc = &sct->sct_sc[i]; 1090 1091 if (prog == sc->sc_prog) { 1092 if (vers >= sc->sc_versmin && vers <= sc->sc_versmax) 1093 return (sc->sc_dispatch); 1094 1095 if (*vers_max < sc->sc_versmax) 1096 *vers_max = sc->sc_versmax; 1097 if (*vers_min > sc->sc_versmin) 1098 *vers_min = sc->sc_versmin; 1099 } 1100 } 1101 1102 return (NULL); 1103 } 1104 1105 /* 1106 * Optionally free callout table allocated for this transport by 1107 * the service provider. 1108 */ 1109 static void 1110 svc_callout_free(SVCMASTERXPRT *xprt) 1111 { 1112 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1113 1114 if (sct->sct_free) { 1115 kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT)); 1116 kmem_free(sct, sizeof (SVC_CALLOUT_TABLE)); 1117 } 1118 } 1119 1120 /* 1121 * Send a reply to an RPC request 1122 * 1123 * PSARC 2003/523 Contract Private Interface 1124 * svc_sendreply 1125 * Changes must be reviewed by Solaris File Sharing 1126 * Changes must be communicated to contract-2003-523@sun.com 1127 */ 1128 bool_t 1129 svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results, 1130 const caddr_t xdr_location) 1131 { 1132 struct rpc_msg rply; 1133 1134 rply.rm_direction = REPLY; 1135 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1136 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1137 rply.acpted_rply.ar_stat = SUCCESS; 1138 rply.acpted_rply.ar_results.where = xdr_location; 1139 rply.acpted_rply.ar_results.proc = xdr_results; 1140 1141 return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply)); 1142 } 1143 1144 /* 1145 * No procedure error reply 1146 * 1147 * PSARC 2003/523 Contract Private Interface 1148 * svcerr_noproc 1149 * Changes must be reviewed by Solaris File Sharing 1150 * Changes must be communicated to contract-2003-523@sun.com 1151 */ 1152 void 1153 svcerr_noproc(const SVCXPRT *clone_xprt) 1154 { 1155 struct rpc_msg rply; 1156 1157 rply.rm_direction = REPLY; 1158 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1159 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1160 rply.acpted_rply.ar_stat = PROC_UNAVAIL; 1161 SVC_FREERES((SVCXPRT *)clone_xprt); 1162 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1163 } 1164 1165 /* 1166 * Can't decode arguments error reply 1167 * 1168 * PSARC 2003/523 Contract Private Interface 1169 * svcerr_decode 1170 * Changes must be reviewed by Solaris File Sharing 1171 * Changes must be communicated to contract-2003-523@sun.com 1172 */ 1173 void 1174 svcerr_decode(const SVCXPRT *clone_xprt) 1175 { 1176 struct rpc_msg rply; 1177 1178 rply.rm_direction = REPLY; 1179 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1180 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1181 rply.acpted_rply.ar_stat = GARBAGE_ARGS; 1182 SVC_FREERES((SVCXPRT *)clone_xprt); 1183 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1184 } 1185 1186 /* 1187 * Some system error 1188 */ 1189 void 1190 svcerr_systemerr(const SVCXPRT *clone_xprt) 1191 { 1192 struct rpc_msg rply; 1193 1194 rply.rm_direction = REPLY; 1195 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1196 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1197 rply.acpted_rply.ar_stat = SYSTEM_ERR; 1198 SVC_FREERES((SVCXPRT *)clone_xprt); 1199 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1200 } 1201 1202 /* 1203 * Authentication error reply 1204 */ 1205 void 1206 svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why) 1207 { 1208 struct rpc_msg rply; 1209 1210 rply.rm_direction = REPLY; 1211 rply.rm_reply.rp_stat = MSG_DENIED; 1212 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1213 rply.rjcted_rply.rj_why = why; 1214 SVC_FREERES((SVCXPRT *)clone_xprt); 1215 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1216 } 1217 1218 /* 1219 * Authentication too weak error reply 1220 */ 1221 void 1222 svcerr_weakauth(const SVCXPRT *clone_xprt) 1223 { 1224 svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK); 1225 } 1226 1227 /* 1228 * Program unavailable error reply 1229 * 1230 * PSARC 2003/523 Contract Private Interface 1231 * svcerr_noprog 1232 * Changes must be reviewed by Solaris File Sharing 1233 * Changes must be communicated to contract-2003-523@sun.com 1234 */ 1235 void 1236 svcerr_noprog(const SVCXPRT *clone_xprt) 1237 { 1238 struct rpc_msg rply; 1239 1240 rply.rm_direction = REPLY; 1241 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1242 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1243 rply.acpted_rply.ar_stat = PROG_UNAVAIL; 1244 SVC_FREERES((SVCXPRT *)clone_xprt); 1245 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1246 } 1247 1248 /* 1249 * Program version mismatch error reply 1250 * 1251 * PSARC 2003/523 Contract Private Interface 1252 * svcerr_progvers 1253 * Changes must be reviewed by Solaris File Sharing 1254 * Changes must be communicated to contract-2003-523@sun.com 1255 */ 1256 void 1257 svcerr_progvers(const SVCXPRT *clone_xprt, 1258 const rpcvers_t low_vers, const rpcvers_t high_vers) 1259 { 1260 struct rpc_msg rply; 1261 1262 rply.rm_direction = REPLY; 1263 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1264 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1265 rply.acpted_rply.ar_stat = PROG_MISMATCH; 1266 rply.acpted_rply.ar_vers.low = low_vers; 1267 rply.acpted_rply.ar_vers.high = high_vers; 1268 SVC_FREERES((SVCXPRT *)clone_xprt); 1269 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1270 } 1271 1272 /* 1273 * Get server side input from some transport. 1274 * 1275 * Statement of authentication parameters management: 1276 * This function owns and manages all authentication parameters, specifically 1277 * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and 1278 * the "cooked" credentials (rqst->rq_clntcred). 1279 * However, this function does not know the structure of the cooked 1280 * credentials, so it make the following assumptions: 1281 * a) the structure is contiguous (no pointers), and 1282 * b) the cred structure size does not exceed RQCRED_SIZE bytes. 1283 * In all events, all three parameters are freed upon exit from this routine. 1284 * The storage is trivially managed on the call stack in user land, but 1285 * is malloced in kernel land. 1286 * 1287 * Note: the xprt's xp_svc_lock is not held while the service's dispatch 1288 * routine is running. If we decide to implement svc_unregister(), we'll 1289 * need to decide whether it's okay for a thread to unregister a service 1290 * while a request is being processed. If we decide that this is a 1291 * problem, we can probably use some sort of reference counting scheme to 1292 * keep the callout entry from going away until the request has completed. 1293 */ 1294 static void 1295 svc_getreq( 1296 SVCXPRT *clone_xprt, /* clone transport handle */ 1297 mblk_t *mp) 1298 { 1299 struct rpc_msg msg; 1300 struct svc_req r; 1301 char *cred_area; /* too big to allocate on call stack */ 1302 1303 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START, 1304 "svc_getreq_start:"); 1305 1306 ASSERT(clone_xprt->xp_master != NULL); 1307 1308 /* 1309 * Firstly, allocate the authentication parameters' storage 1310 */ 1311 mutex_enter(&rqcred_lock); 1312 if (rqcred_head) { 1313 cred_area = rqcred_head; 1314 1315 /* LINTED pointer alignment */ 1316 rqcred_head = *(caddr_t *)rqcred_head; 1317 mutex_exit(&rqcred_lock); 1318 } else { 1319 mutex_exit(&rqcred_lock); 1320 cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE, 1321 KM_SLEEP); 1322 } 1323 msg.rm_call.cb_cred.oa_base = cred_area; 1324 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1325 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1326 1327 /* 1328 * Now receive a message from the transport. 1329 */ 1330 if (SVC_RECV(clone_xprt, mp, &msg)) { 1331 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1332 rpcvers_t vers_min; 1333 rpcvers_t vers_max; 1334 bool_t no_dispatch; 1335 enum auth_stat why; 1336 1337 /* 1338 * Find the registered program and call its 1339 * dispatch routine. 1340 */ 1341 r.rq_xprt = clone_xprt; 1342 r.rq_prog = msg.rm_call.cb_prog; 1343 r.rq_vers = msg.rm_call.cb_vers; 1344 r.rq_proc = msg.rm_call.cb_proc; 1345 r.rq_cred = msg.rm_call.cb_cred; 1346 1347 /* 1348 * First authenticate the message. 1349 */ 1350 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START, 1351 "svc_getreq_auth_start:"); 1352 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) { 1353 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1354 "svc_getreq_auth_end:(%S)", "failed"); 1355 svcerr_auth(clone_xprt, why); 1356 /* 1357 * Free the arguments. 1358 */ 1359 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1360 } else if (no_dispatch) { 1361 /* 1362 * XXX - when bug id 4053736 is done, remove 1363 * the SVC_FREEARGS() call. 1364 */ 1365 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1366 } else { 1367 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1368 "svc_getreq_auth_end:(%S)", "good"); 1369 1370 dispatchroutine = svc_callout_find(clone_xprt, 1371 r.rq_prog, r.rq_vers, &vers_min, &vers_max); 1372 1373 if (dispatchroutine) { 1374 (*dispatchroutine) (&r, clone_xprt); 1375 } else { 1376 /* 1377 * If we got here, the program or version 1378 * is not served ... 1379 */ 1380 if (vers_max == 0 || 1381 version_keepquiet(clone_xprt)) 1382 svcerr_noprog(clone_xprt); 1383 else 1384 svcerr_progvers(clone_xprt, vers_min, 1385 vers_max); 1386 1387 /* 1388 * Free the arguments. For successful calls 1389 * this is done by the dispatch routine. 1390 */ 1391 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1392 /* Fall through to ... */ 1393 } 1394 /* 1395 * Call cleanup procedure for RPCSEC_GSS. 1396 * This is a hack since there is currently no 1397 * op, such as SVC_CLEANAUTH. rpc_gss_cleanup 1398 * should only be called for a non null proc. 1399 * Null procs in RPC GSS are overloaded to 1400 * provide context setup and control. The main 1401 * purpose of rpc_gss_cleanup is to decrement the 1402 * reference count associated with the cached 1403 * GSS security context. We should never get here 1404 * for an RPCSEC_GSS null proc since *no_dispatch 1405 * would have been set to true from sec_svc_msg above. 1406 */ 1407 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1408 rpc_gss_cleanup(clone_xprt); 1409 } 1410 } 1411 1412 /* 1413 * Free authentication parameters' storage 1414 */ 1415 mutex_enter(&rqcred_lock); 1416 /* LINTED pointer alignment */ 1417 *(caddr_t *)cred_area = rqcred_head; 1418 rqcred_head = cred_area; 1419 mutex_exit(&rqcred_lock); 1420 } 1421 1422 /* 1423 * Allocate new clone transport handle. 1424 */ 1425 static SVCXPRT * 1426 svc_clone_init(void) 1427 { 1428 SVCXPRT *clone_xprt; 1429 1430 clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP); 1431 clone_xprt->xp_cred = crget(); 1432 return (clone_xprt); 1433 } 1434 1435 /* 1436 * Free memory allocated by svc_clone_init. 1437 */ 1438 static void 1439 svc_clone_free(SVCXPRT *clone_xprt) 1440 { 1441 /* Fre credentials from crget() */ 1442 if (clone_xprt->xp_cred) 1443 crfree(clone_xprt->xp_cred); 1444 1445 kmem_free(clone_xprt, sizeof (SVCXPRT)); 1446 } 1447 1448 /* 1449 * Link a per-thread clone transport handle to a master 1450 * - increment a thread reference count on the master 1451 * - copy some of the master's fields to the clone 1452 * - call a transport specific clone routine. 1453 */ 1454 static void 1455 svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1456 { 1457 cred_t *cred = clone_xprt->xp_cred; 1458 1459 ASSERT(cred); 1460 1461 /* 1462 * Bump up master's thread count. 1463 * Linking a per-thread clone transport handle to a master 1464 * associates a service thread with the master. 1465 */ 1466 mutex_enter(&xprt->xp_thread_lock); 1467 xprt->xp_threads++; 1468 mutex_exit(&xprt->xp_thread_lock); 1469 1470 /* Clear everything */ 1471 bzero(clone_xprt, sizeof (SVCXPRT)); 1472 1473 /* Set pointer to the master transport stucture */ 1474 clone_xprt->xp_master = xprt; 1475 1476 /* Structure copy of all the common fields */ 1477 clone_xprt->xp_xpc = xprt->xp_xpc; 1478 1479 /* Restore per-thread fields (xp_cred) */ 1480 clone_xprt->xp_cred = cred; 1481 1482 /* 1483 * NOTICE: There is no transport-type specific code now. 1484 * If you want to add a transport-type specific cloning code 1485 * add one more operation (e.g. xp_clone()) to svc_ops, 1486 * implement it for each transport type, and call it here 1487 * through an appropriate macro (e.g. SVC_CLONE()). 1488 */ 1489 } 1490 1491 /* 1492 * Unlink a non-detached clone transport handle from a master 1493 * - decrement a thread reference count on the master 1494 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1495 * if this is the last non-detached/absolute thread on this transport 1496 * then it will close/destroy the transport 1497 * - call transport specific function to destroy the clone handle 1498 * - clear xp_master to avoid recursion. 1499 */ 1500 static void 1501 svc_clone_unlink(SVCXPRT *clone_xprt) 1502 { 1503 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1504 1505 /* This cannot be a detached thread */ 1506 ASSERT(!clone_xprt->xp_detached); 1507 ASSERT(xprt->xp_threads > 0); 1508 1509 /* Decrement a reference count on the transport */ 1510 mutex_enter(&xprt->xp_thread_lock); 1511 xprt->xp_threads--; 1512 1513 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1514 if (xprt->xp_wq) 1515 mutex_exit(&xprt->xp_thread_lock); 1516 else 1517 svc_xprt_cleanup(xprt, FALSE); 1518 1519 /* Call a transport specific clone `destroy' function */ 1520 SVC_CLONE_DESTROY(clone_xprt); 1521 1522 /* Clear xp_master */ 1523 clone_xprt->xp_master = NULL; 1524 } 1525 1526 /* 1527 * Unlink a detached clone transport handle from a master 1528 * - decrement the thread count on the master 1529 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1530 * if this is the last thread on this transport then it will destroy 1531 * the transport. 1532 * - call a transport specific function to destroy the clone handle 1533 * - clear xp_master to avoid recursion. 1534 */ 1535 static void 1536 svc_clone_unlinkdetached(SVCXPRT *clone_xprt) 1537 { 1538 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1539 1540 /* This must be a detached thread */ 1541 ASSERT(clone_xprt->xp_detached); 1542 ASSERT(xprt->xp_detached_threads > 0); 1543 ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0); 1544 1545 /* Grab xprt->xp_thread_lock and decrement link counts */ 1546 mutex_enter(&xprt->xp_thread_lock); 1547 xprt->xp_detached_threads--; 1548 1549 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1550 if (xprt->xp_wq) 1551 mutex_exit(&xprt->xp_thread_lock); 1552 else 1553 svc_xprt_cleanup(xprt, TRUE); 1554 1555 /* Call transport specific clone `destroy' function */ 1556 SVC_CLONE_DESTROY(clone_xprt); 1557 1558 /* Clear xp_master */ 1559 clone_xprt->xp_master = NULL; 1560 } 1561 1562 /* 1563 * Try to exit a non-detached service thread 1564 * - check if there are enough threads left 1565 * - if this thread (ie its clone transport handle) are linked 1566 * to a master transport then unlink it 1567 * - free the clone structure 1568 * - return to userland for thread exit 1569 * 1570 * If this is the last non-detached or the last thread on this 1571 * transport then the call to svc_clone_unlink() will, respectively, 1572 * close and/or destroy the transport. 1573 */ 1574 static void 1575 svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt) 1576 { 1577 if (clone_xprt->xp_master) 1578 svc_clone_unlink(clone_xprt); 1579 svc_clone_free(clone_xprt); 1580 1581 mutex_enter(&pool->p_thread_lock); 1582 pool->p_threads--; 1583 if (pool->p_closing && svc_pool_tryexit(pool)) 1584 /* return - thread exit will be handled at user level */ 1585 return; 1586 mutex_exit(&pool->p_thread_lock); 1587 1588 /* return - thread exit will be handled at user level */ 1589 } 1590 1591 /* 1592 * Exit a detached service thread that returned to svc_run 1593 * - decrement the `detached thread' count for the pool 1594 * - unlink the detached clone transport handle from the master 1595 * - free the clone structure 1596 * - return to userland for thread exit 1597 * 1598 * If this is the last thread on this transport then the call 1599 * to svc_clone_unlinkdetached() will destroy the transport. 1600 */ 1601 static void 1602 svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt) 1603 { 1604 /* This must be a detached thread */ 1605 ASSERT(clone_xprt->xp_master); 1606 ASSERT(clone_xprt->xp_detached); 1607 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 1608 1609 svc_clone_unlinkdetached(clone_xprt); 1610 svc_clone_free(clone_xprt); 1611 1612 mutex_enter(&pool->p_thread_lock); 1613 1614 ASSERT(pool->p_reserved_threads >= 0); 1615 ASSERT(pool->p_detached_threads > 0); 1616 1617 pool->p_detached_threads--; 1618 if (pool->p_closing && svc_pool_tryexit(pool)) 1619 /* return - thread exit will be handled at user level */ 1620 return; 1621 mutex_exit(&pool->p_thread_lock); 1622 1623 /* return - thread exit will be handled at user level */ 1624 } 1625 1626 /* 1627 * PSARC 2003/523 Contract Private Interface 1628 * svc_wait 1629 * Changes must be reviewed by Solaris File Sharing 1630 * Changes must be communicated to contract-2003-523@sun.com 1631 */ 1632 int 1633 svc_wait(int id) 1634 { 1635 SVCPOOL *pool; 1636 int err = 0; 1637 struct svc_globals *svc; 1638 1639 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 1640 mutex_enter(&svc->svc_plock); 1641 pool = svc_pool_find(svc, id); 1642 mutex_exit(&svc->svc_plock); 1643 1644 if (pool == NULL) 1645 return (ENOENT); 1646 1647 mutex_enter(&pool->p_user_lock); 1648 1649 /* Check if there's already a user thread waiting on this pool */ 1650 if (pool->p_user_waiting) { 1651 mutex_exit(&pool->p_user_lock); 1652 return (EBUSY); 1653 } 1654 1655 pool->p_user_waiting = TRUE; 1656 1657 /* Go to sleep, waiting for the signaled flag. */ 1658 while (!pool->p_signal_create_thread && !pool->p_user_exit) { 1659 if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) { 1660 /* Interrupted, return to handle exit or signal */ 1661 pool->p_user_waiting = FALSE; 1662 pool->p_signal_create_thread = FALSE; 1663 mutex_exit(&pool->p_user_lock); 1664 1665 /* 1666 * Thread has been interrupted and therefore 1667 * the service daemon is leaving as well so 1668 * let's go ahead and remove the service 1669 * pool at this time. 1670 */ 1671 mutex_enter(&svc->svc_plock); 1672 svc_pool_unregister(svc, pool); 1673 mutex_exit(&svc->svc_plock); 1674 1675 return (EINTR); 1676 } 1677 } 1678 1679 pool->p_signal_create_thread = FALSE; 1680 pool->p_user_waiting = FALSE; 1681 1682 /* 1683 * About to exit the service pool. Set return value 1684 * to let the userland code know our intent. Signal 1685 * svc_thread_creator() so that it can clean up the 1686 * pool structure. 1687 */ 1688 if (pool->p_user_exit) { 1689 err = ECANCELED; 1690 cv_signal(&pool->p_user_cv); 1691 } 1692 1693 mutex_exit(&pool->p_user_lock); 1694 1695 /* Return to userland with error code, for possible thread creation. */ 1696 return (err); 1697 } 1698 1699 /* 1700 * `Service threads' creator thread. 1701 * The creator thread waits for a signal to create new thread. 1702 */ 1703 static void 1704 svc_thread_creator(SVCPOOL *pool) 1705 { 1706 callb_cpr_t cpr_info; /* CPR info for the creator thread */ 1707 1708 CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr, 1709 "svc_thread_creator"); 1710 1711 for (;;) { 1712 mutex_enter(&pool->p_creator_lock); 1713 1714 /* Check if someone set the exit flag */ 1715 if (pool->p_creator_exit) 1716 break; 1717 1718 /* Clear the `signaled' flag and go asleep */ 1719 pool->p_creator_signaled = FALSE; 1720 1721 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1722 cv_wait(&pool->p_creator_cv, &pool->p_creator_lock); 1723 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1724 1725 /* Check if someone signaled to exit */ 1726 if (pool->p_creator_exit) 1727 break; 1728 1729 mutex_exit(&pool->p_creator_lock); 1730 1731 mutex_enter(&pool->p_thread_lock); 1732 1733 /* 1734 * When the pool is in closing state and all the transports 1735 * are gone the creator should not create any new threads. 1736 */ 1737 if (pool->p_closing) { 1738 rw_enter(&pool->p_lrwlock, RW_READER); 1739 if (pool->p_lcount == 0) { 1740 rw_exit(&pool->p_lrwlock); 1741 mutex_exit(&pool->p_thread_lock); 1742 continue; 1743 } 1744 rw_exit(&pool->p_lrwlock); 1745 } 1746 1747 /* 1748 * Create a new service thread now. 1749 */ 1750 ASSERT(pool->p_reserved_threads >= 0); 1751 ASSERT(pool->p_detached_threads >= 0); 1752 1753 if (pool->p_threads + pool->p_detached_threads < 1754 pool->p_maxthreads) { 1755 /* 1756 * Signal the service pool wait thread 1757 * only if it hasn't already been signaled. 1758 */ 1759 mutex_enter(&pool->p_user_lock); 1760 if (pool->p_signal_create_thread == FALSE) { 1761 pool->p_signal_create_thread = TRUE; 1762 cv_signal(&pool->p_user_cv); 1763 } 1764 mutex_exit(&pool->p_user_lock); 1765 1766 } 1767 1768 mutex_exit(&pool->p_thread_lock); 1769 } 1770 1771 /* 1772 * Pool is closed. Cleanup and exit. 1773 */ 1774 1775 /* Signal userland creator thread that it can stop now. */ 1776 mutex_enter(&pool->p_user_lock); 1777 pool->p_user_exit = TRUE; 1778 cv_broadcast(&pool->p_user_cv); 1779 mutex_exit(&pool->p_user_lock); 1780 1781 /* Wait for svc_wait() to be done with the pool */ 1782 mutex_enter(&pool->p_user_lock); 1783 while (pool->p_user_waiting) { 1784 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1785 cv_wait(&pool->p_user_cv, &pool->p_user_lock); 1786 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1787 } 1788 mutex_exit(&pool->p_user_lock); 1789 1790 CALLB_CPR_EXIT(&cpr_info); 1791 svc_pool_cleanup(pool); 1792 zthread_exit(); 1793 } 1794 1795 /* 1796 * If the creator thread is idle signal it to create 1797 * a new service thread. 1798 */ 1799 static void 1800 svc_creator_signal(SVCPOOL *pool) 1801 { 1802 mutex_enter(&pool->p_creator_lock); 1803 if (pool->p_creator_signaled == FALSE) { 1804 pool->p_creator_signaled = TRUE; 1805 cv_signal(&pool->p_creator_cv); 1806 } 1807 mutex_exit(&pool->p_creator_lock); 1808 } 1809 1810 /* 1811 * Notify the creator thread to clean up and exit. 1812 */ 1813 static void 1814 svc_creator_signalexit(SVCPOOL *pool) 1815 { 1816 mutex_enter(&pool->p_creator_lock); 1817 pool->p_creator_exit = TRUE; 1818 cv_signal(&pool->p_creator_cv); 1819 mutex_exit(&pool->p_creator_lock); 1820 } 1821 1822 /* 1823 * Polling part of the svc_run(). 1824 * - search for a transport with a pending request 1825 * - when one is found then latch the request lock and return to svc_run() 1826 * - if there is no request go asleep and wait for a signal 1827 * - handle two exceptions: 1828 * a) current transport is closing 1829 * b) timeout waiting for a new request 1830 * in both cases return to svc_run() 1831 */ 1832 static SVCMASTERXPRT * 1833 svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1834 { 1835 /* 1836 * Main loop iterates until 1837 * a) we find a pending request, 1838 * b) detect that the current transport is closing 1839 * c) time out waiting for a new request. 1840 */ 1841 for (;;) { 1842 SVCMASTERXPRT *next; 1843 clock_t timeleft; 1844 1845 /* 1846 * Step 1. 1847 * Check if there is a pending request on the current 1848 * transport handle so that we can avoid cloning. 1849 * If so then decrement the `pending-request' count for 1850 * the pool and return to svc_run(). 1851 * 1852 * We need to prevent a potential starvation. When 1853 * a selected transport has all pending requests coming in 1854 * all the time then the service threads will never switch to 1855 * another transport. With a limited number of service 1856 * threads some transports may be never serviced. 1857 * To prevent such a scenario we pick up at most 1858 * pool->p_max_same_xprt requests from the same transport 1859 * and then take a hint from the xprt-ready queue or walk 1860 * the transport list. 1861 */ 1862 if (xprt && xprt->xp_req_head && (!pool->p_qoverflow || 1863 clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) { 1864 mutex_enter(&xprt->xp_req_lock); 1865 if (xprt->xp_req_head) { 1866 mutex_enter(&pool->p_req_lock); 1867 pool->p_reqs--; 1868 mutex_exit(&pool->p_req_lock); 1869 1870 return (xprt); 1871 } 1872 mutex_exit(&xprt->xp_req_lock); 1873 } 1874 clone_xprt->xp_same_xprt = 0; 1875 1876 /* 1877 * Step 2. 1878 * If there is no request on the current transport try to 1879 * find another transport with a pending request. 1880 */ 1881 mutex_enter(&pool->p_req_lock); 1882 pool->p_walkers++; 1883 mutex_exit(&pool->p_req_lock); 1884 1885 /* 1886 * Make sure that transports will not be destroyed just 1887 * while we are checking them. 1888 */ 1889 rw_enter(&pool->p_lrwlock, RW_READER); 1890 1891 for (;;) { 1892 SVCMASTERXPRT *hint; 1893 1894 /* 1895 * Get the next transport from the xprt-ready queue. 1896 * This is a hint. There is no guarantee that the 1897 * transport still has a pending request since it 1898 * could be picked up by another thread in step 1. 1899 * 1900 * If the transport has a pending request then keep 1901 * it locked. Decrement the `pending-requests' for 1902 * the pool and `walking-threads' counts, and return 1903 * to svc_run(). 1904 */ 1905 hint = svc_xprt_qget(pool); 1906 1907 if (hint && hint->xp_req_head) { 1908 mutex_enter(&hint->xp_req_lock); 1909 if (hint->xp_req_head) { 1910 rw_exit(&pool->p_lrwlock); 1911 1912 mutex_enter(&pool->p_req_lock); 1913 pool->p_reqs--; 1914 pool->p_walkers--; 1915 mutex_exit(&pool->p_req_lock); 1916 1917 return (hint); 1918 } 1919 mutex_exit(&hint->xp_req_lock); 1920 } 1921 1922 /* 1923 * If there was no hint in the xprt-ready queue then 1924 * - if there is less pending requests than polling 1925 * threads go asleep 1926 * - otherwise check if there was an overflow in the 1927 * xprt-ready queue; if so, then we need to break 1928 * the `drain' mode 1929 */ 1930 if (hint == NULL) { 1931 if (pool->p_reqs < pool->p_walkers) { 1932 mutex_enter(&pool->p_req_lock); 1933 if (pool->p_reqs < pool->p_walkers) 1934 goto sleep; 1935 mutex_exit(&pool->p_req_lock); 1936 } 1937 if (pool->p_qoverflow) { 1938 break; 1939 } 1940 } 1941 } 1942 1943 /* 1944 * If there was an overflow in the xprt-ready queue then we 1945 * need to switch to the `drain' mode, i.e. walk through the 1946 * pool's transport list and search for a transport with a 1947 * pending request. If we manage to drain all the pending 1948 * requests then we can clear the overflow flag. This will 1949 * switch svc_poll() back to taking hints from the xprt-ready 1950 * queue (which is generally more efficient). 1951 * 1952 * If there are no registered transports simply go asleep. 1953 */ 1954 if (xprt == NULL && pool->p_lhead == NULL) { 1955 mutex_enter(&pool->p_req_lock); 1956 goto sleep; 1957 } 1958 1959 /* 1960 * `Walk' through the pool's list of master server 1961 * transport handles. Continue to loop until there are less 1962 * looping threads then pending requests. 1963 */ 1964 next = xprt ? xprt->xp_next : pool->p_lhead; 1965 1966 for (;;) { 1967 /* 1968 * Check if there is a request on this transport. 1969 * 1970 * Since blocking on a locked mutex is very expensive 1971 * check for a request without a lock first. If we miss 1972 * a request that is just being delivered but this will 1973 * cost at most one full walk through the list. 1974 */ 1975 if (next->xp_req_head) { 1976 /* 1977 * Check again, now with a lock. 1978 */ 1979 mutex_enter(&next->xp_req_lock); 1980 if (next->xp_req_head) { 1981 rw_exit(&pool->p_lrwlock); 1982 1983 mutex_enter(&pool->p_req_lock); 1984 pool->p_reqs--; 1985 pool->p_walkers--; 1986 mutex_exit(&pool->p_req_lock); 1987 1988 return (next); 1989 } 1990 mutex_exit(&next->xp_req_lock); 1991 } 1992 1993 /* 1994 * Continue to `walk' through the pool's 1995 * transport list until there is less requests 1996 * than walkers. Check this condition without 1997 * a lock first to avoid contention on a mutex. 1998 */ 1999 if (pool->p_reqs < pool->p_walkers) { 2000 /* 2001 * Check again, now with the lock. 2002 * If all the pending requests have been 2003 * picked up than clear the overflow flag. 2004 */ 2005 mutex_enter(&pool->p_req_lock); 2006 if (pool->p_reqs <= 0) 2007 svc_xprt_qreset(pool); 2008 if (pool->p_reqs < pool->p_walkers) 2009 break; /* goto sleep */ 2010 mutex_exit(&pool->p_req_lock); 2011 } 2012 2013 next = next->xp_next; 2014 } 2015 2016 sleep: 2017 /* 2018 * No work to do. Stop the `walk' and go asleep. 2019 * Decrement the `walking-threads' count for the pool. 2020 */ 2021 pool->p_walkers--; 2022 rw_exit(&pool->p_lrwlock); 2023 2024 /* 2025 * Count us as asleep, mark this thread as safe 2026 * for suspend and wait for a request. 2027 */ 2028 pool->p_asleep++; 2029 timeleft = cv_timedwait_sig(&pool->p_req_cv, &pool->p_req_lock, 2030 pool->p_timeout + lbolt); 2031 2032 /* 2033 * If the drowsy flag is on this means that 2034 * someone has signaled a wakeup. In such a case 2035 * the `asleep-threads' count has already updated 2036 * so just clear the flag. 2037 * 2038 * If the drowsy flag is off then we need to update 2039 * the `asleep-threads' count. 2040 */ 2041 if (pool->p_drowsy) { 2042 pool->p_drowsy = FALSE; 2043 /* 2044 * If the thread is here because it timedout, 2045 * instead of returning SVC_ETIMEDOUT, it is 2046 * time to do some more work. 2047 */ 2048 if (timeleft == -1) 2049 timeleft = 1; 2050 } else { 2051 pool->p_asleep--; 2052 } 2053 mutex_exit(&pool->p_req_lock); 2054 2055 /* 2056 * If we received a signal while waiting for a 2057 * request, inform svc_run(), so that we can return 2058 * to user level and restart the call. 2059 */ 2060 if (timeleft == 0) 2061 return (SVC_EINTR); 2062 2063 /* 2064 * If the current transport is gone then notify 2065 * svc_run() to unlink from it. 2066 */ 2067 if (xprt && xprt->xp_wq == NULL) 2068 return (SVC_EXPRTGONE); 2069 2070 /* 2071 * If we have timed out waiting for a request inform 2072 * svc_run() that we probably don't need this thread. 2073 */ 2074 if (timeleft == -1) 2075 return (SVC_ETIMEDOUT); 2076 } 2077 } 2078 2079 /* 2080 * Main loop of the kernel RPC server 2081 * - wait for input (find a transport with a pending request). 2082 * - dequeue the request 2083 * - call a registered server routine to process the requests 2084 * 2085 * There can many threads running concurrently in this loop 2086 * on the same or on different transports. 2087 */ 2088 static int 2089 svc_run(SVCPOOL *pool) 2090 { 2091 SVCMASTERXPRT *xprt = NULL; /* master transport handle */ 2092 SVCXPRT *clone_xprt; /* clone for this thread */ 2093 struct svc_globals *svc; 2094 proc_t *p = ttoproc(curthread); 2095 2096 /* Allocate a clone transport handle for this thread */ 2097 clone_xprt = svc_clone_init(); 2098 2099 /* 2100 * The loop iterates until the thread becomes 2101 * idle too long or the transport is gone. 2102 */ 2103 for (;;) { 2104 SVCMASTERXPRT *next; 2105 mblk_t *mp; 2106 2107 TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run"); 2108 2109 /* 2110 * If the process is exiting/killed, return 2111 * immediately without processing any more 2112 * requests. 2113 */ 2114 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2115 svc_thread_exit(pool, clone_xprt); 2116 2117 /* 2118 * Thread has been interrupted and therefore 2119 * the service daemon is leaving as well so 2120 * let's go ahead and remove the service 2121 * pool at this time. 2122 */ 2123 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2124 mutex_enter(&svc->svc_plock); 2125 svc_pool_unregister(svc, pool); 2126 mutex_exit(&svc->svc_plock); 2127 2128 return (0); 2129 } 2130 2131 /* Find a transport with a pending request */ 2132 next = svc_poll(pool, xprt, clone_xprt); 2133 2134 /* 2135 * If svc_poll() finds a transport with a request 2136 * it latches xp_req_lock on it. Therefore we need 2137 * to dequeue the request and release the lock as 2138 * soon as possible. 2139 */ 2140 ASSERT(next != NULL && 2141 (next == SVC_EXPRTGONE || 2142 next == SVC_ETIMEDOUT || 2143 next == SVC_EINTR || 2144 MUTEX_HELD(&next->xp_req_lock))); 2145 2146 /* Ooops! Current transport is closing. Unlink now */ 2147 if (next == SVC_EXPRTGONE) { 2148 svc_clone_unlink(clone_xprt); 2149 xprt = NULL; 2150 continue; 2151 } 2152 2153 /* Ooops! Timeout while waiting for a request. Exit */ 2154 if (next == SVC_ETIMEDOUT) { 2155 svc_thread_exit(pool, clone_xprt); 2156 return (0); 2157 } 2158 2159 /* 2160 * Interrupted by a signal while waiting for a 2161 * request. Return to userspace and restart. 2162 */ 2163 if (next == SVC_EINTR) { 2164 svc_thread_exit(pool, clone_xprt); 2165 2166 /* 2167 * Thread has been interrupted and therefore 2168 * the service daemon is leaving as well so 2169 * let's go ahead and remove the service 2170 * pool at this time. 2171 */ 2172 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2173 mutex_enter(&svc->svc_plock); 2174 svc_pool_unregister(svc, pool); 2175 mutex_exit(&svc->svc_plock); 2176 2177 return (EINTR); 2178 } 2179 2180 /* 2181 * De-queue the request and release the request lock 2182 * on this transport (latched by svc_poll()). 2183 */ 2184 mp = next->xp_req_head; 2185 next->xp_req_head = mp->b_next; 2186 mp->b_next = (mblk_t *)0; 2187 2188 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ, 2189 "rpc_que_req_deq:pool %p mp %p", pool, mp); 2190 mutex_exit(&next->xp_req_lock); 2191 2192 /* 2193 * If this is a new request on a current transport then 2194 * the clone structure is already properly initialized. 2195 * Otherwise, if the request is on a different transport, 2196 * unlink from the current master and link to 2197 * the one we got a request on. 2198 */ 2199 if (next != xprt) { 2200 if (xprt) 2201 svc_clone_unlink(clone_xprt); 2202 svc_clone_link(next, clone_xprt); 2203 xprt = next; 2204 } 2205 2206 /* 2207 * If there are more requests and req_cv hasn't 2208 * been signaled yet then wake up one more thread now. 2209 * 2210 * We avoid signaling req_cv until the most recently 2211 * signaled thread wakes up and gets CPU to clear 2212 * the `drowsy' flag. 2213 */ 2214 if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2215 pool->p_asleep == 0)) { 2216 mutex_enter(&pool->p_req_lock); 2217 2218 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2219 pool->p_asleep == 0) 2220 mutex_exit(&pool->p_req_lock); 2221 else { 2222 pool->p_asleep--; 2223 pool->p_drowsy = TRUE; 2224 2225 cv_signal(&pool->p_req_cv); 2226 mutex_exit(&pool->p_req_lock); 2227 } 2228 } 2229 2230 /* 2231 * If there are no asleep/signaled threads, we are 2232 * still below pool->p_maxthreads limit, and no thread is 2233 * currently being created then signal the creator 2234 * for one more service thread. 2235 * 2236 * The asleep and drowsy checks are not protected 2237 * by a lock since it hurts performance and a wrong 2238 * decision is not essential. 2239 */ 2240 if (pool->p_asleep == 0 && !pool->p_drowsy && 2241 pool->p_threads + pool->p_detached_threads < 2242 pool->p_maxthreads) 2243 svc_creator_signal(pool); 2244 2245 /* 2246 * Process the request. 2247 */ 2248 svc_getreq(clone_xprt, mp); 2249 2250 /* If thread had a reservation it should have been canceled */ 2251 ASSERT(!clone_xprt->xp_reserved); 2252 2253 /* 2254 * If the clone is marked detached then exit. 2255 * The rpcmod slot has already been released 2256 * when we detached this thread. 2257 */ 2258 if (clone_xprt->xp_detached) { 2259 svc_thread_exitdetached(pool, clone_xprt); 2260 return (0); 2261 } 2262 2263 /* 2264 * Release our reference on the rpcmod 2265 * slot attached to xp_wq->q_ptr. 2266 */ 2267 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2268 } 2269 /* NOTREACHED */ 2270 } 2271 2272 /* 2273 * Flush any pending requests for the queue and 2274 * and free the associated mblks. 2275 */ 2276 void 2277 svc_queueclean(queue_t *q) 2278 { 2279 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2280 mblk_t *mp; 2281 2282 /* 2283 * clean up the requests 2284 */ 2285 mutex_enter(&xprt->xp_req_lock); 2286 while ((mp = xprt->xp_req_head) != NULL) { 2287 xprt->xp_req_head = mp->b_next; 2288 mp->b_next = (mblk_t *)0; 2289 (*RELE_PROC(xprt)) (xprt->xp_wq, mp); 2290 } 2291 mutex_exit(&xprt->xp_req_lock); 2292 } 2293 2294 /* 2295 * This routine is called by rpcmod to inform kernel RPC that a 2296 * queue is closing. It is called after all the requests have been 2297 * picked up (that is after all the slots on the queue have 2298 * been released by kernel RPC). It is also guaranteed that no more 2299 * request will be delivered on this transport. 2300 * 2301 * - clear xp_wq to mark the master server transport handle as closing 2302 * - if there are no more threads on this transport close/destroy it 2303 * - otherwise, broadcast threads sleeping in svc_poll(); the last 2304 * thread will close/destroy the transport. 2305 */ 2306 void 2307 svc_queueclose(queue_t *q) 2308 { 2309 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2310 2311 if (xprt == NULL) { 2312 /* 2313 * If there is no master xprt associated with this stream, 2314 * then there is nothing to do. This happens regularly 2315 * with connection-oriented listening streams created by 2316 * nfsd. 2317 */ 2318 return; 2319 } 2320 2321 mutex_enter(&xprt->xp_thread_lock); 2322 2323 ASSERT(xprt->xp_req_head == NULL); 2324 ASSERT(xprt->xp_wq != NULL); 2325 2326 xprt->xp_wq = NULL; 2327 2328 if (xprt->xp_threads == 0) { 2329 SVCPOOL *pool = xprt->xp_pool; 2330 2331 /* 2332 * svc_xprt_cleanup() destroys the transport 2333 * or releases the transport thread lock 2334 */ 2335 svc_xprt_cleanup(xprt, FALSE); 2336 2337 mutex_enter(&pool->p_thread_lock); 2338 2339 /* 2340 * If the pool is in closing state and this was 2341 * the last transport in the pool then signal the creator 2342 * thread to clean up and exit. 2343 */ 2344 if (pool->p_closing && svc_pool_tryexit(pool)) { 2345 return; 2346 } 2347 mutex_exit(&pool->p_thread_lock); 2348 } else { 2349 /* 2350 * Wakeup threads sleeping in svc_poll() so that they 2351 * unlink from the transport 2352 */ 2353 mutex_enter(&xprt->xp_pool->p_req_lock); 2354 cv_broadcast(&xprt->xp_pool->p_req_cv); 2355 mutex_exit(&xprt->xp_pool->p_req_lock); 2356 2357 /* 2358 * NOTICE: No references to the master transport structure 2359 * beyond this point! 2360 */ 2361 mutex_exit(&xprt->xp_thread_lock); 2362 } 2363 } 2364 2365 /* 2366 * Interrupt `request delivery' routine called from rpcmod 2367 * - put a request at the tail of the transport request queue 2368 * - insert a hint for svc_poll() into the xprt-ready queue 2369 * - increment the `pending-requests' count for the pool 2370 * - wake up a thread sleeping in svc_poll() if necessary 2371 * - if all the threads are running ask the creator for a new one. 2372 */ 2373 void 2374 svc_queuereq(queue_t *q, mblk_t *mp) 2375 { 2376 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2377 SVCPOOL *pool = xprt->xp_pool; 2378 2379 TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start"); 2380 2381 /* 2382 * Step 1. 2383 * Grab the transport's request lock and put 2384 * the request at the tail of the transport's 2385 * request queue. 2386 */ 2387 mutex_enter(&xprt->xp_req_lock); 2388 if (xprt->xp_req_head == NULL) 2389 xprt->xp_req_head = mp; 2390 else 2391 xprt->xp_req_tail->b_next = mp; 2392 xprt->xp_req_tail = mp; 2393 2394 mutex_exit(&xprt->xp_req_lock); 2395 2396 /* 2397 * Step 2. 2398 * Grab the pool request lock, insert a hint into 2399 * the xprt-ready queue, increment `pending-requests' 2400 * count for the pool, and wake up a thread sleeping 2401 * in svc_poll() if necessary. 2402 */ 2403 mutex_enter(&pool->p_req_lock); 2404 2405 /* Insert pointer to this transport into the xprt-ready queue */ 2406 svc_xprt_qput(pool, xprt); 2407 2408 /* Increment the `pending-requests' count for the pool */ 2409 pool->p_reqs++; 2410 2411 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ, 2412 "rpc_que_req_enq:pool %p mp %p", pool, mp); 2413 2414 /* 2415 * If there are more requests and req_cv hasn't 2416 * been signaled yet then wake up one more thread now. 2417 * 2418 * We avoid signaling req_cv until the most recently 2419 * signaled thread wakes up and gets CPU to clear 2420 * the `drowsy' flag. 2421 */ 2422 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2423 pool->p_asleep == 0) { 2424 mutex_exit(&pool->p_req_lock); 2425 } else { 2426 pool->p_drowsy = TRUE; 2427 pool->p_asleep--; 2428 2429 /* 2430 * Signal wakeup and drop the request lock. 2431 */ 2432 cv_signal(&pool->p_req_cv); 2433 mutex_exit(&pool->p_req_lock); 2434 } 2435 2436 /* 2437 * Step 3. 2438 * If there are no asleep/signaled threads, we are 2439 * still below pool->p_maxthreads limit, and no thread is 2440 * currently being created then signal the creator 2441 * for one more service thread. 2442 * 2443 * The asleep and drowsy checks are not not protected 2444 * by a lock since it hurts performance and a wrong 2445 * decision is not essential. 2446 */ 2447 if (pool->p_asleep == 0 && !pool->p_drowsy && 2448 pool->p_threads + pool->p_detached_threads < pool->p_maxthreads) 2449 svc_creator_signal(pool); 2450 2451 TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END, 2452 "svc_queuereq_end:(%S)", "end"); 2453 } 2454 2455 /* 2456 * Reserve a service thread so that it can be detached later. 2457 * This reservation is required to make sure that when it tries to 2458 * detach itself the total number of detached threads does not exceed 2459 * pool->p_maxthreads - pool->p_redline (i.e. that we can have 2460 * up to pool->p_redline non-detached threads). 2461 * 2462 * If the thread does not detach itself later, it should cancel the 2463 * reservation before returning to svc_run(). 2464 * 2465 * - check if there is room for more reserved/detached threads 2466 * - if so, then increment the `reserved threads' count for the pool 2467 * - mark the thread as reserved (setting the flag in the clone transport 2468 * handle for this thread 2469 * - returns 1 if the reservation succeeded, 0 if it failed. 2470 */ 2471 int 2472 svc_reserve_thread(SVCXPRT *clone_xprt) 2473 { 2474 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2475 2476 /* Recursive reservations are not allowed */ 2477 ASSERT(!clone_xprt->xp_reserved); 2478 ASSERT(!clone_xprt->xp_detached); 2479 2480 /* Check pool counts if there is room for reservation */ 2481 mutex_enter(&pool->p_thread_lock); 2482 if (pool->p_reserved_threads + pool->p_detached_threads >= 2483 pool->p_maxthreads - pool->p_redline) { 2484 mutex_exit(&pool->p_thread_lock); 2485 return (0); 2486 } 2487 pool->p_reserved_threads++; 2488 mutex_exit(&pool->p_thread_lock); 2489 2490 /* Mark the thread (clone handle) as reserved */ 2491 clone_xprt->xp_reserved = TRUE; 2492 2493 return (1); 2494 } 2495 2496 /* 2497 * Cancel a reservation for a thread. 2498 * - decrement the `reserved threads' count for the pool 2499 * - clear the flag in the clone transport handle for this thread. 2500 */ 2501 void 2502 svc_unreserve_thread(SVCXPRT *clone_xprt) 2503 { 2504 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2505 2506 /* Thread must have a reservation */ 2507 ASSERT(clone_xprt->xp_reserved); 2508 ASSERT(!clone_xprt->xp_detached); 2509 2510 /* Decrement global count */ 2511 mutex_enter(&pool->p_thread_lock); 2512 pool->p_reserved_threads--; 2513 mutex_exit(&pool->p_thread_lock); 2514 2515 /* Clear reservation flag */ 2516 clone_xprt->xp_reserved = FALSE; 2517 } 2518 2519 /* 2520 * Detach a thread from its transport, so that it can block for an 2521 * extended time. Because the transport can be closed after the thread is 2522 * detached, the thread should have already sent off a reply if it was 2523 * going to send one. 2524 * 2525 * - decrement `non-detached threads' count and increment `detached threads' 2526 * counts for the transport 2527 * - decrement the `non-detached threads' and `reserved threads' 2528 * counts and increment the `detached threads' count for the pool 2529 * - release the rpcmod slot 2530 * - mark the clone (thread) as detached. 2531 * 2532 * No need to return a pointer to the thread's CPR information, since 2533 * the thread has a userland identity. 2534 * 2535 * NOTICE: a thread must not detach itself without making a prior reservation 2536 * through svc_thread_reserve(). 2537 */ 2538 callb_cpr_t * 2539 svc_detach_thread(SVCXPRT *clone_xprt) 2540 { 2541 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 2542 SVCPOOL *pool = xprt->xp_pool; 2543 2544 /* Thread must have a reservation */ 2545 ASSERT(clone_xprt->xp_reserved); 2546 ASSERT(!clone_xprt->xp_detached); 2547 2548 /* Bookkeeping for this transport */ 2549 mutex_enter(&xprt->xp_thread_lock); 2550 xprt->xp_threads--; 2551 xprt->xp_detached_threads++; 2552 mutex_exit(&xprt->xp_thread_lock); 2553 2554 /* Bookkeeping for the pool */ 2555 mutex_enter(&pool->p_thread_lock); 2556 pool->p_threads--; 2557 pool->p_reserved_threads--; 2558 pool->p_detached_threads++; 2559 mutex_exit(&pool->p_thread_lock); 2560 2561 /* Release an rpcmod slot for this request */ 2562 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2563 2564 /* Mark the clone (thread) as detached */ 2565 clone_xprt->xp_reserved = FALSE; 2566 clone_xprt->xp_detached = TRUE; 2567 2568 return (NULL); 2569 } 2570 2571 /* 2572 * This routine is responsible for extracting RDMA plugin master XPRT, 2573 * unregister from the SVCPOOL and initiate plugin specific cleanup. 2574 * It is passed a list/group of rdma transports as records which are 2575 * active in a given registered or unregistered kRPC thread pool. Its shuts 2576 * all active rdma transports in that pool. If the thread active on the trasport 2577 * happens to be last thread for that pool, it will signal the creater thread 2578 * to cleanup the pool and destroy the xprt in svc_queueclose() 2579 */ 2580 void 2581 rdma_stop(rdma_xprt_group_t rdma_xprts) 2582 { 2583 SVCMASTERXPRT *xprt; 2584 rdma_xprt_record_t *curr_rec; 2585 queue_t *q; 2586 mblk_t *mp; 2587 int i; 2588 2589 if (rdma_xprts.rtg_count == 0) 2590 return; 2591 2592 for (i = 0; i < rdma_xprts.rtg_count; i++) { 2593 curr_rec = rdma_xprts.rtg_listhead; 2594 rdma_xprts.rtg_listhead = curr_rec->rtr_next; 2595 curr_rec->rtr_next = NULL; 2596 xprt = curr_rec->rtr_xprt_ptr; 2597 q = xprt->xp_wq; 2598 svc_rdma_kstop(xprt); 2599 2600 mutex_enter(&xprt->xp_req_lock); 2601 while ((mp = xprt->xp_req_head) != NULL) { 2602 xprt->xp_req_head = mp->b_next; 2603 mp->b_next = (mblk_t *)0; 2604 if (mp) 2605 freemsg(mp); 2606 } 2607 mutex_exit(&xprt->xp_req_lock); 2608 svc_queueclose(q); 2609 #ifdef DEBUG 2610 if (rdma_check) 2611 cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n"); 2612 #endif 2613 /* 2614 * Free the rdma transport record for the expunged rdma 2615 * based master transport handle. 2616 */ 2617 kmem_free(curr_rec, sizeof (rdma_xprt_record_t)); 2618 if (!rdma_xprts.rtg_listhead) 2619 break; 2620 } 2621 } 2622