1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved. 29 */ 30 31 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 32 /* All Rights Reserved */ 33 34 /* 35 * Portions of this source code were derived from Berkeley 4.3 BSD 36 * under license from the Regents of the University of California. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * Server-side remote procedure call interface. 43 * 44 * Master transport handle (SVCMASTERXPRT). 45 * The master transport handle structure is shared among service 46 * threads processing events on the transport. Some fields in the 47 * master structure are protected by locks 48 * - xp_req_lock protects the request queue: 49 * xp_req_head, xp_req_tail 50 * - xp_thread_lock protects the thread (clone) counts 51 * xp_threads, xp_detached_threads, xp_wq 52 * Each master transport is registered to exactly one thread pool. 53 * 54 * Clone transport handle (SVCXPRT) 55 * The clone transport handle structure is a per-service-thread handle 56 * to the transport. The structure carries all the fields/buffers used 57 * for request processing. A service thread or, in other words, a clone 58 * structure, can be linked to an arbitrary master structure to process 59 * requests on this transport. The master handle keeps track of reference 60 * counts of threads (clones) linked to it. A service thread can switch 61 * to another transport by unlinking its clone handle from the current 62 * transport and linking to a new one. Switching is relatively inexpensive 63 * but it involves locking (master's xprt->xp_thread_lock). 64 * 65 * Pools. 66 * A pool represents a kernel RPC service (NFS, Lock Manager, etc.). 67 * Transports related to the service are registered to the service pool. 68 * Service threads can switch between different transports in the pool. 69 * Thus, each service has its own pool of service threads. The maximum 70 * number of threads in a pool is pool->p_maxthreads. This limit allows 71 * to restrict resource usage by the service. Some fields are protected 72 * by locks: 73 * - p_req_lock protects several counts and flags: 74 * p_reqs, p_walkers, p_asleep, p_drowsy, p_req_cv 75 * - p_thread_lock governs other thread counts: 76 * p_threads, p_detached_threads, p_reserved_threads, p_closing 77 * 78 * In addition, each pool contains a doubly-linked list of transports, 79 * an `xprt-ready' queue and a creator thread (see below). Threads in 80 * the pool share some other parameters such as stack size and 81 * polling timeout. 82 * 83 * Pools are initialized through the svc_pool_create() function called from 84 * the nfssys() system call. However, thread creation must be done by 85 * the userland agent. This is done by using SVCPOOL_WAIT and 86 * SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and 87 * svc_do_run(), respectively. Once the pool has been initialized, 88 * the userland process must set up a 'creator' thread. This thread 89 * should park itself in the kernel by calling svc_wait(). If 90 * svc_wait() returns successfully, it should fork off a new worker 91 * thread, which then calls svc_do_run() in order to get work. When 92 * that thread is complete, svc_do_run() will return, and the user 93 * program should call thr_exit(). 94 * 95 * When we try to register a new pool and there is an old pool with 96 * the same id in the doubly linked pool list (this happens when we kill 97 * and restart nfsd or lockd), then we unlink the old pool from the list 98 * and mark its state as `closing'. After that the transports can still 99 * process requests but new transports won't be registered. When all the 100 * transports and service threads associated with the pool are gone the 101 * creator thread (see below) will clean up the pool structure and exit. 102 * 103 * svc_queuereq() and svc_run(). 104 * The kernel RPC server is interrupt driven. The svc_queuereq() interrupt 105 * routine is called to deliver an RPC request. The service threads 106 * loop in svc_run(). The interrupt function queues a request on the 107 * transport's queue and it makes sure that the request is serviced. 108 * It may either wake up one of sleeping threads, or ask for a new thread 109 * to be created, or, if the previous request is just being picked up, do 110 * nothing. In the last case the service thread that is picking up the 111 * previous request will wake up or create the next thread. After a service 112 * thread processes a request and sends a reply it returns to svc_run() 113 * and svc_run() calls svc_poll() to find new input. 114 * 115 * There is an "inconsistent" but "safe" optimization in the 116 * svc_queuereq() code. The request is queued under the transport's 117 * request lock, while the `pending-requests' count is incremented 118 * independently under the pool request lock. Thus, a request can be picked 119 * up by a service thread before the counter is incremented. It may also 120 * happen that the service thread will win the race condition on the pool 121 * lock and it will decrement the count even before the interrupt thread 122 * increments it (so the count can be temporarily negative). 123 * 124 * svc_poll(). 125 * In order to avoid unnecessary locking, which causes performance 126 * problems, we always look for a pending request on the current transport. 127 * If there is none we take a hint from the pool's `xprt-ready' queue. 128 * If the queue had an overflow we switch to the `drain' mode checking 129 * each transport in the pool's transport list. Once we find a 130 * master transport handle with a pending request we latch the request 131 * lock on this transport and return to svc_run(). If the request 132 * belongs to a transport different than the one the service thread is 133 * linked to we need to unlink and link again. 134 * 135 * A service thread goes asleep when there are no pending 136 * requests on the transports registered on the pool's transports. 137 * All the pool's threads sleep on the same condition variable. 138 * If a thread has been sleeping for too long period of time 139 * (by default 5 seconds) it wakes up and exits. Also when a transport 140 * is closing sleeping threads wake up to unlink from this transport. 141 * 142 * The `xprt-ready' queue. 143 * If a service thread finds no request on a transport it is currently linked 144 * to it will find another transport with a pending request. To make 145 * this search more efficient each pool has an `xprt-ready' queue. 146 * The queue is a FIFO. When the interrupt routine queues a request it also 147 * inserts a pointer to the transport into the `xprt-ready' queue. A 148 * thread looking for a transport with a pending request can pop up a 149 * transport and check for a request. The request can be already gone 150 * since it could be taken by a thread linked to that transport. In such a 151 * case we try the next hint. The `xprt-ready' queue has fixed size (by 152 * default 256 nodes). If it overflows svc_poll() has to switch to the 153 * less efficient but safe `drain' mode and walk through the pool's 154 * transport list. 155 * 156 * Both the svc_poll() loop and the `xprt-ready' queue are optimized 157 * for the peak load case that is for the situation when the queue is not 158 * empty, there are all the time few pending requests, and a service 159 * thread which has just processed a request does not go asleep but picks 160 * up immediately the next request. 161 * 162 * Thread creator. 163 * Each pool has a thread creator associated with it. The creator thread 164 * sleeps on a condition variable and waits for a signal to create a 165 * service thread. The actual thread creation is done in userland by 166 * the method described in "Pools" above. 167 * 168 * Signaling threads should turn on the `creator signaled' flag, and 169 * can avoid sending signals when the flag is on. The flag is cleared 170 * when the thread is created. 171 * 172 * When the pool is in closing state (ie it has been already unregistered 173 * from the pool list) the last thread on the last transport in the pool 174 * should turn the p_creator_exit flag on. The creator thread will 175 * clean up the pool structure and exit. 176 * 177 * Thread reservation; Detaching service threads. 178 * A service thread can detach itself to block for an extended amount 179 * of time. However, to keep the service active we need to guarantee 180 * at least pool->p_redline non-detached threads that can process incoming 181 * requests. This, the maximum number of detached and reserved threads is 182 * p->p_maxthreads - p->p_redline. A service thread should first acquire 183 * a reservation, and if the reservation was granted it can detach itself. 184 * If a reservation was granted but the thread does not detach itself 185 * it should cancel the reservation before it returns to svc_run(). 186 */ 187 188 #include <sys/param.h> 189 #include <sys/types.h> 190 #include <rpc/types.h> 191 #include <sys/socket.h> 192 #include <sys/time.h> 193 #include <sys/tiuser.h> 194 #include <sys/t_kuser.h> 195 #include <netinet/in.h> 196 #include <rpc/xdr.h> 197 #include <rpc/auth.h> 198 #include <rpc/clnt.h> 199 #include <rpc/rpc_msg.h> 200 #include <rpc/svc.h> 201 #include <sys/proc.h> 202 #include <sys/user.h> 203 #include <sys/stream.h> 204 #include <sys/strsubr.h> 205 #include <sys/tihdr.h> 206 #include <sys/debug.h> 207 #include <sys/cmn_err.h> 208 #include <sys/file.h> 209 #include <sys/systm.h> 210 #include <sys/callb.h> 211 #include <sys/vtrace.h> 212 #include <sys/zone.h> 213 #include <nfs/nfs.h> 214 #include <sys/tsol/label_macro.h> 215 216 #define RQCRED_SIZE 400 /* this size is excessive */ 217 218 /* 219 * Defines for svc_poll() 220 */ 221 #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1) /* Transport is closing */ 222 #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2) /* Timeout */ 223 #define SVC_EINTR ((SVCMASTERXPRT *)3) /* Interrupted by signal */ 224 225 /* 226 * Default stack size for service threads. 227 */ 228 #define DEFAULT_SVC_RUN_STKSIZE (0) /* default kernel stack */ 229 230 int svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE; 231 232 /* 233 * Default polling timeout for service threads. 234 * Multiplied by hz when used. 235 */ 236 #define DEFAULT_SVC_POLL_TIMEOUT (5) /* seconds */ 237 238 clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT; 239 240 /* 241 * Size of the `xprt-ready' queue. 242 */ 243 #define DEFAULT_SVC_QSIZE (256) /* qnodes */ 244 245 size_t svc_default_qsize = DEFAULT_SVC_QSIZE; 246 247 /* 248 * Default limit for the number of service threads. 249 */ 250 #define DEFAULT_SVC_MAXTHREADS (INT16_MAX) 251 252 int svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS; 253 254 /* 255 * Maximum number of requests from the same transport (in `drain' mode). 256 */ 257 #define DEFAULT_SVC_MAX_SAME_XPRT (8) 258 259 int svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT; 260 261 262 /* 263 * Default `Redline' of non-detached threads. 264 * Total number of detached and reserved threads in an RPC server 265 * thread pool is limited to pool->p_maxthreads - svc_redline. 266 */ 267 #define DEFAULT_SVC_REDLINE (1) 268 269 int svc_default_redline = DEFAULT_SVC_REDLINE; 270 271 /* 272 * A node for the `xprt-ready' queue. 273 * See below. 274 */ 275 struct __svcxprt_qnode { 276 __SVCXPRT_QNODE *q_next; 277 SVCMASTERXPRT *q_xprt; 278 }; 279 280 /* 281 * Global SVC variables (private). 282 */ 283 struct svc_globals { 284 SVCPOOL *svc_pools; 285 kmutex_t svc_plock; 286 }; 287 288 /* 289 * Debug variable to check for rdma based 290 * transport startup and cleanup. Contorlled 291 * through /etc/system. Off by default. 292 */ 293 int rdma_check = 0; 294 295 /* 296 * Authentication parameters list. 297 */ 298 static caddr_t rqcred_head; 299 static kmutex_t rqcred_lock; 300 301 /* 302 * Pointers to transport specific `rele' routines in rpcmod (set from rpcmod). 303 */ 304 void (*rpc_rele)(queue_t *, mblk_t *) = NULL; 305 void (*mir_rele)(queue_t *, mblk_t *) = NULL; 306 307 /* ARGSUSED */ 308 void 309 rpc_rdma_rele(queue_t *q, mblk_t *mp) 310 { 311 } 312 void (*rdma_rele)(queue_t *, mblk_t *) = rpc_rdma_rele; 313 314 315 /* 316 * This macro picks which `rele' routine to use, based on the transport type. 317 */ 318 #define RELE_PROC(xprt) \ 319 ((xprt)->xp_type == T_RDMA ? rdma_rele : \ 320 (((xprt)->xp_type == T_CLTS) ? rpc_rele : mir_rele)) 321 322 /* 323 * If true, then keep quiet about version mismatch. 324 * This macro is for broadcast RPC only. We have no broadcast RPC in 325 * kernel now but one may define a flag in the transport structure 326 * and redefine this macro. 327 */ 328 #define version_keepquiet(xprt) (FALSE) 329 330 /* 331 * ZSD key used to retrieve zone-specific svc globals 332 */ 333 static zone_key_t svc_zone_key; 334 335 static void svc_callout_free(SVCMASTERXPRT *); 336 static void svc_xprt_qinit(SVCPOOL *, size_t); 337 static void svc_xprt_qdestroy(SVCPOOL *); 338 static void svc_thread_creator(SVCPOOL *); 339 static void svc_creator_signal(SVCPOOL *); 340 static void svc_creator_signalexit(SVCPOOL *); 341 static void svc_pool_unregister(struct svc_globals *, SVCPOOL *); 342 static int svc_run(SVCPOOL *); 343 344 /* ARGSUSED */ 345 static void * 346 svc_zoneinit(zoneid_t zoneid) 347 { 348 struct svc_globals *svc; 349 350 svc = kmem_alloc(sizeof (*svc), KM_SLEEP); 351 mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL); 352 svc->svc_pools = NULL; 353 return (svc); 354 } 355 356 /* ARGSUSED */ 357 static void 358 svc_zoneshutdown(zoneid_t zoneid, void *arg) 359 { 360 struct svc_globals *svc = arg; 361 SVCPOOL *pool; 362 363 mutex_enter(&svc->svc_plock); 364 while ((pool = svc->svc_pools) != NULL) { 365 svc_pool_unregister(svc, pool); 366 } 367 mutex_exit(&svc->svc_plock); 368 } 369 370 /* ARGSUSED */ 371 static void 372 svc_zonefini(zoneid_t zoneid, void *arg) 373 { 374 struct svc_globals *svc = arg; 375 376 ASSERT(svc->svc_pools == NULL); 377 mutex_destroy(&svc->svc_plock); 378 kmem_free(svc, sizeof (*svc)); 379 } 380 381 /* 382 * Global SVC init routine. 383 * Initialize global generic and transport type specific structures 384 * used by the kernel RPC server side. This routine is called only 385 * once when the module is being loaded. 386 */ 387 void 388 svc_init() 389 { 390 zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown, 391 svc_zonefini); 392 svc_cots_init(); 393 svc_clts_init(); 394 } 395 396 /* 397 * Destroy the SVCPOOL structure. 398 */ 399 static void 400 svc_pool_cleanup(SVCPOOL *pool) 401 { 402 ASSERT(pool->p_threads + pool->p_detached_threads == 0); 403 ASSERT(pool->p_lcount == 0); 404 ASSERT(pool->p_closing); 405 406 /* 407 * Call the user supplied shutdown function. This is done 408 * here so the user of the pool will be able to cleanup 409 * service related resources. 410 */ 411 if (pool->p_shutdown != NULL) 412 (pool->p_shutdown)(); 413 414 /* Destroy `xprt-ready' queue */ 415 svc_xprt_qdestroy(pool); 416 417 /* Destroy transport list */ 418 rw_destroy(&pool->p_lrwlock); 419 420 /* Destroy locks and condition variables */ 421 mutex_destroy(&pool->p_thread_lock); 422 mutex_destroy(&pool->p_req_lock); 423 cv_destroy(&pool->p_req_cv); 424 425 /* Destroy creator's locks and condition variables */ 426 mutex_destroy(&pool->p_creator_lock); 427 cv_destroy(&pool->p_creator_cv); 428 mutex_destroy(&pool->p_user_lock); 429 cv_destroy(&pool->p_user_cv); 430 431 /* Free pool structure */ 432 kmem_free(pool, sizeof (SVCPOOL)); 433 } 434 435 /* 436 * If all the transports and service threads are already gone 437 * signal the creator thread to clean up and exit. 438 */ 439 static bool_t 440 svc_pool_tryexit(SVCPOOL *pool) 441 { 442 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 443 ASSERT(pool->p_closing); 444 445 if (pool->p_threads + pool->p_detached_threads == 0) { 446 rw_enter(&pool->p_lrwlock, RW_READER); 447 if (pool->p_lcount == 0) { 448 /* 449 * Release the locks before sending a signal. 450 */ 451 rw_exit(&pool->p_lrwlock); 452 mutex_exit(&pool->p_thread_lock); 453 454 /* 455 * Notify the creator thread to clean up and exit 456 * 457 * NOTICE: No references to the pool beyond this point! 458 * The pool is being destroyed. 459 */ 460 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 461 svc_creator_signalexit(pool); 462 463 return (TRUE); 464 } 465 rw_exit(&pool->p_lrwlock); 466 } 467 468 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 469 return (FALSE); 470 } 471 472 /* 473 * Find a pool with a given id. 474 */ 475 static SVCPOOL * 476 svc_pool_find(struct svc_globals *svc, int id) 477 { 478 SVCPOOL *pool; 479 480 ASSERT(MUTEX_HELD(&svc->svc_plock)); 481 482 /* 483 * Search the list for a pool with a matching id 484 * and register the transport handle with that pool. 485 */ 486 for (pool = svc->svc_pools; pool; pool = pool->p_next) 487 if (pool->p_id == id) 488 return (pool); 489 490 return (NULL); 491 } 492 493 /* 494 * PSARC 2003/523 Contract Private Interface 495 * svc_do_run 496 * Changes must be reviewed by Solaris File Sharing 497 * Changes must be communicated to contract-2003-523@sun.com 498 */ 499 int 500 svc_do_run(int id) 501 { 502 SVCPOOL *pool; 503 int err = 0; 504 struct svc_globals *svc; 505 506 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 507 mutex_enter(&svc->svc_plock); 508 509 pool = svc_pool_find(svc, id); 510 511 mutex_exit(&svc->svc_plock); 512 513 if (pool == NULL) 514 return (ENOENT); 515 516 /* 517 * Increment counter of pool threads now 518 * that a thread has been created. 519 */ 520 mutex_enter(&pool->p_thread_lock); 521 pool->p_threads++; 522 mutex_exit(&pool->p_thread_lock); 523 524 /* Give work to the new thread. */ 525 err = svc_run(pool); 526 527 return (err); 528 } 529 530 /* 531 * Unregister a pool from the pool list. 532 * Set the closing state. If all the transports and service threads 533 * are already gone signal the creator thread to clean up and exit. 534 */ 535 static void 536 svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool) 537 { 538 SVCPOOL *next = pool->p_next; 539 SVCPOOL *prev = pool->p_prev; 540 541 ASSERT(MUTEX_HELD(&svc->svc_plock)); 542 543 /* Remove from the list */ 544 if (pool == svc->svc_pools) 545 svc->svc_pools = next; 546 if (next) 547 next->p_prev = prev; 548 if (prev) 549 prev->p_next = next; 550 pool->p_next = pool->p_prev = NULL; 551 552 /* 553 * Offline the pool. Mark the pool as closing. 554 * If there are no transports in this pool notify 555 * the creator thread to clean it up and exit. 556 */ 557 mutex_enter(&pool->p_thread_lock); 558 if (pool->p_offline != NULL) 559 (pool->p_offline)(); 560 pool->p_closing = TRUE; 561 if (svc_pool_tryexit(pool)) 562 return; 563 mutex_exit(&pool->p_thread_lock); 564 } 565 566 /* 567 * Register a pool with a given id in the global doubly linked pool list. 568 * - if there is a pool with the same id in the list then unregister it 569 * - insert the new pool into the list. 570 */ 571 static void 572 svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id) 573 { 574 SVCPOOL *old_pool; 575 576 /* 577 * If there is a pool with the same id then remove it from 578 * the list and mark the pool as closing. 579 */ 580 mutex_enter(&svc->svc_plock); 581 582 if (old_pool = svc_pool_find(svc, id)) 583 svc_pool_unregister(svc, old_pool); 584 585 /* Insert into the doubly linked list */ 586 pool->p_id = id; 587 pool->p_next = svc->svc_pools; 588 pool->p_prev = NULL; 589 if (svc->svc_pools) 590 svc->svc_pools->p_prev = pool; 591 svc->svc_pools = pool; 592 593 mutex_exit(&svc->svc_plock); 594 } 595 596 /* 597 * Initialize a newly created pool structure 598 */ 599 static int 600 svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline, 601 uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt) 602 { 603 klwp_t *lwp = ttolwp(curthread); 604 605 ASSERT(pool); 606 607 if (maxthreads == 0) 608 maxthreads = svc_default_maxthreads; 609 if (redline == 0) 610 redline = svc_default_redline; 611 if (qsize == 0) 612 qsize = svc_default_qsize; 613 if (timeout == 0) 614 timeout = svc_default_timeout; 615 if (stksize == 0) 616 stksize = svc_default_stksize; 617 if (max_same_xprt == 0) 618 max_same_xprt = svc_default_max_same_xprt; 619 620 if (maxthreads < redline) 621 return (EINVAL); 622 623 /* Allocate and initialize the `xprt-ready' queue */ 624 svc_xprt_qinit(pool, qsize); 625 626 /* Initialize doubly-linked xprt list */ 627 rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL); 628 629 /* 630 * Setting lwp_childstksz on the current lwp so that 631 * descendants of this lwp get the modified stacksize, if 632 * it is defined. It is important that either this lwp or 633 * one of its descendants do the actual servicepool thread 634 * creation to maintain the stacksize inheritance. 635 */ 636 if (lwp != NULL) 637 lwp->lwp_childstksz = stksize; 638 639 /* Initialize thread limits, locks and condition variables */ 640 pool->p_maxthreads = maxthreads; 641 pool->p_redline = redline; 642 pool->p_timeout = timeout * hz; 643 pool->p_stksize = stksize; 644 pool->p_max_same_xprt = max_same_xprt; 645 mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL); 646 mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL); 647 cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL); 648 649 /* Initialize userland creator */ 650 pool->p_user_exit = FALSE; 651 pool->p_signal_create_thread = FALSE; 652 pool->p_user_waiting = FALSE; 653 mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL); 654 cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL); 655 656 /* Initialize the creator and start the creator thread */ 657 pool->p_creator_exit = FALSE; 658 mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL); 659 cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL); 660 661 (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator, 662 pool, 0, minclsyspri); 663 664 return (0); 665 } 666 667 /* 668 * PSARC 2003/523 Contract Private Interface 669 * svc_pool_create 670 * Changes must be reviewed by Solaris File Sharing 671 * Changes must be communicated to contract-2003-523@sun.com 672 * 673 * Create an kernel RPC server-side thread/transport pool. 674 * 675 * This is public interface for creation of a server RPC thread pool 676 * for a given service provider. Transports registered with the pool's id 677 * will be served by a pool's threads. This function is called from the 678 * nfssys() system call. 679 */ 680 int 681 svc_pool_create(struct svcpool_args *args) 682 { 683 SVCPOOL *pool; 684 int error; 685 struct svc_globals *svc; 686 687 /* 688 * Caller should check credentials in a way appropriate 689 * in the context of the call. 690 */ 691 692 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 693 /* Allocate a new pool */ 694 pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP); 695 696 /* 697 * Initialize the pool structure and create a creator thread. 698 */ 699 error = svc_pool_init(pool, args->maxthreads, args->redline, 700 args->qsize, args->timeout, args->stksize, args->max_same_xprt); 701 702 if (error) { 703 kmem_free(pool, sizeof (SVCPOOL)); 704 return (error); 705 } 706 707 /* Register the pool with the global pool list */ 708 svc_pool_register(svc, pool, args->id); 709 710 return (0); 711 } 712 713 int 714 svc_pool_control(int id, int cmd, void *arg) 715 { 716 SVCPOOL *pool; 717 struct svc_globals *svc; 718 719 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 720 721 switch (cmd) { 722 case SVCPSET_SHUTDOWN_PROC: 723 /* 724 * Search the list for a pool with a matching id 725 * and register the transport handle with that pool. 726 */ 727 mutex_enter(&svc->svc_plock); 728 729 if ((pool = svc_pool_find(svc, id)) == NULL) { 730 mutex_exit(&svc->svc_plock); 731 return (ENOENT); 732 } 733 /* 734 * Grab the transport list lock before releasing the 735 * pool list lock 736 */ 737 rw_enter(&pool->p_lrwlock, RW_WRITER); 738 mutex_exit(&svc->svc_plock); 739 740 pool->p_shutdown = *((void (*)())arg); 741 742 rw_exit(&pool->p_lrwlock); 743 744 return (0); 745 case SVCPSET_UNREGISTER_PROC: 746 /* 747 * Search the list for a pool with a matching id 748 * and register the unregister callback handle with that pool. 749 */ 750 mutex_enter(&svc->svc_plock); 751 752 if ((pool = svc_pool_find(svc, id)) == NULL) { 753 mutex_exit(&svc->svc_plock); 754 return (ENOENT); 755 } 756 /* 757 * Grab the transport list lock before releasing the 758 * pool list lock 759 */ 760 rw_enter(&pool->p_lrwlock, RW_WRITER); 761 mutex_exit(&svc->svc_plock); 762 763 pool->p_offline = *((void (*)())arg); 764 765 rw_exit(&pool->p_lrwlock); 766 767 return (0); 768 default: 769 return (EINVAL); 770 } 771 } 772 773 /* 774 * Pool's transport list manipulation routines. 775 * - svc_xprt_register() 776 * - svc_xprt_unregister() 777 * 778 * svc_xprt_register() is called from svc_tli_kcreate() to 779 * insert a new master transport handle into the doubly linked 780 * list of server transport handles (one list per pool). 781 * 782 * The list is used by svc_poll(), when it operates in `drain' 783 * mode, to search for a next transport with a pending request. 784 */ 785 786 int 787 svc_xprt_register(SVCMASTERXPRT *xprt, int id) 788 { 789 SVCMASTERXPRT *prev, *next; 790 SVCPOOL *pool; 791 struct svc_globals *svc; 792 793 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 794 /* 795 * Search the list for a pool with a matching id 796 * and register the transport handle with that pool. 797 */ 798 mutex_enter(&svc->svc_plock); 799 800 if ((pool = svc_pool_find(svc, id)) == NULL) { 801 mutex_exit(&svc->svc_plock); 802 return (ENOENT); 803 } 804 805 /* Grab the transport list lock before releasing the pool list lock */ 806 rw_enter(&pool->p_lrwlock, RW_WRITER); 807 mutex_exit(&svc->svc_plock); 808 809 /* Don't register new transports when the pool is in closing state */ 810 if (pool->p_closing) { 811 rw_exit(&pool->p_lrwlock); 812 return (EBUSY); 813 } 814 815 /* 816 * Initialize xp_pool to point to the pool. 817 * We don't want to go through the pool list every time. 818 */ 819 xprt->xp_pool = pool; 820 821 /* 822 * Insert a transport handle into the list. 823 * The list head points to the most recently inserted transport. 824 */ 825 if (pool->p_lhead == NULL) 826 pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt; 827 else { 828 next = pool->p_lhead; 829 prev = pool->p_lhead->xp_prev; 830 831 xprt->xp_next = next; 832 xprt->xp_prev = prev; 833 834 pool->p_lhead = prev->xp_next = next->xp_prev = xprt; 835 } 836 837 /* Increment the transports count */ 838 pool->p_lcount++; 839 840 rw_exit(&pool->p_lrwlock); 841 return (0); 842 } 843 844 /* 845 * Called from svc_xprt_cleanup() to remove a master transport handle 846 * from the pool's list of server transports (when a transport is 847 * being destroyed). 848 */ 849 void 850 svc_xprt_unregister(SVCMASTERXPRT *xprt) 851 { 852 SVCPOOL *pool = xprt->xp_pool; 853 854 /* 855 * Unlink xprt from the list. 856 * If the list head points to this xprt then move it 857 * to the next xprt or reset to NULL if this is the last 858 * xprt in the list. 859 */ 860 rw_enter(&pool->p_lrwlock, RW_WRITER); 861 862 if (xprt == xprt->xp_next) 863 pool->p_lhead = NULL; 864 else { 865 SVCMASTERXPRT *next = xprt->xp_next; 866 SVCMASTERXPRT *prev = xprt->xp_prev; 867 868 next->xp_prev = prev; 869 prev->xp_next = next; 870 871 if (pool->p_lhead == xprt) 872 pool->p_lhead = next; 873 } 874 875 xprt->xp_next = xprt->xp_prev = NULL; 876 877 /* Decrement list count */ 878 pool->p_lcount--; 879 880 rw_exit(&pool->p_lrwlock); 881 } 882 883 static void 884 svc_xprt_qdestroy(SVCPOOL *pool) 885 { 886 mutex_destroy(&pool->p_qend_lock); 887 kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE)); 888 } 889 890 /* 891 * Initialize an `xprt-ready' queue for a given pool. 892 */ 893 static void 894 svc_xprt_qinit(SVCPOOL *pool, size_t qsize) 895 { 896 int i; 897 898 pool->p_qsize = qsize; 899 pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE), 900 KM_SLEEP); 901 902 for (i = 0; i < pool->p_qsize - 1; i++) 903 pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]); 904 905 pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]); 906 pool->p_qtop = &(pool->p_qbody[0]); 907 pool->p_qend = &(pool->p_qbody[0]); 908 909 mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL); 910 } 911 912 /* 913 * Called from the svc_queuereq() interrupt routine to queue 914 * a hint for svc_poll() which transport has a pending request. 915 * - insert a pointer to xprt into the xprt-ready queue (FIFO) 916 * - if the xprt-ready queue is full turn the overflow flag on. 917 * 918 * NOTICE: pool->p_qtop is protected by the the pool's request lock 919 * and the caller (svc_queuereq()) must hold the lock. 920 */ 921 static void 922 svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt) 923 { 924 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 925 926 /* If the overflow flag is there is nothing we can do */ 927 if (pool->p_qoverflow) 928 return; 929 930 /* If the queue is full turn the overflow flag on and exit */ 931 if (pool->p_qtop->q_next == pool->p_qend) { 932 mutex_enter(&pool->p_qend_lock); 933 if (pool->p_qtop->q_next == pool->p_qend) { 934 pool->p_qoverflow = TRUE; 935 mutex_exit(&pool->p_qend_lock); 936 return; 937 } 938 mutex_exit(&pool->p_qend_lock); 939 } 940 941 /* Insert a hint and move pool->p_qtop */ 942 pool->p_qtop->q_xprt = xprt; 943 pool->p_qtop = pool->p_qtop->q_next; 944 } 945 946 /* 947 * Called from svc_poll() to get a hint which transport has a 948 * pending request. Returns a pointer to a transport or NULL if the 949 * `xprt-ready' queue is empty. 950 * 951 * Since we do not acquire the pool's request lock while checking if 952 * the queue is empty we may miss a request that is just being delivered. 953 * However this is ok since svc_poll() will retry again until the 954 * count indicates that there are pending requests for this pool. 955 */ 956 static SVCMASTERXPRT * 957 svc_xprt_qget(SVCPOOL *pool) 958 { 959 SVCMASTERXPRT *xprt; 960 961 mutex_enter(&pool->p_qend_lock); 962 do { 963 /* 964 * If the queue is empty return NULL. 965 * Since we do not acquire the pool's request lock which 966 * protects pool->p_qtop this is not exact check. However, 967 * this is safe - if we miss a request here svc_poll() 968 * will retry again. 969 */ 970 if (pool->p_qend == pool->p_qtop) { 971 mutex_exit(&pool->p_qend_lock); 972 return (NULL); 973 } 974 975 /* Get a hint and move pool->p_qend */ 976 xprt = pool->p_qend->q_xprt; 977 pool->p_qend = pool->p_qend->q_next; 978 979 /* Skip fields deleted by svc_xprt_qdelete() */ 980 } while (xprt == NULL); 981 mutex_exit(&pool->p_qend_lock); 982 983 return (xprt); 984 } 985 986 /* 987 * Reset an overflow in the xprt-ready queue after 988 * all the pending requests has been drained. 989 * This switches svc_poll back to getting hints from the 990 * xprt-ready queue. 991 * 992 * NOTICE: pool->p_qtop is protected by the the pool's request lock 993 * and the caller (svc_poll()) must hold the lock. 994 */ 995 static void 996 svc_xprt_qreset(SVCPOOL *pool) 997 { 998 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 999 1000 pool->p_qend = pool->p_qtop; 1001 pool->p_qoverflow = FALSE; 1002 } 1003 1004 /* 1005 * Delete all the references to a transport handle that 1006 * is being destroyed from the xprt-ready queue. 1007 * Deleted pointers are replaced with NULLs. 1008 */ 1009 static void 1010 svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt) 1011 { 1012 __SVCXPRT_QNODE *q = pool->p_qend; 1013 __SVCXPRT_QNODE *qtop = pool->p_qtop; 1014 1015 /* 1016 * Delete all the references to xprt between the current 1017 * position of pool->p_qend and current pool->p_qtop. 1018 */ 1019 for (;;) { 1020 if (q->q_xprt == xprt) 1021 q->q_xprt = NULL; 1022 if (q == qtop) 1023 return; 1024 q = q->q_next; 1025 } 1026 } 1027 1028 /* 1029 * Destructor for a master server transport handle. 1030 * - if there are no more non-detached threads linked to this transport 1031 * then, if requested, call xp_closeproc (we don't wait for detached 1032 * threads linked to this transport to complete). 1033 * - if there are no more threads linked to this 1034 * transport then 1035 * a) remove references to this transport from the xprt-ready queue 1036 * b) remove a reference to this transport from the pool's transport list 1037 * c) call a transport specific `destroy' function 1038 * d) cancel remaining thread reservations. 1039 * 1040 * NOTICE: Caller must hold the transport's thread lock. 1041 */ 1042 static void 1043 svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached) 1044 { 1045 ASSERT(MUTEX_HELD(&xprt->xp_thread_lock)); 1046 ASSERT(xprt->xp_wq == NULL); 1047 1048 /* 1049 * If called from the last non-detached thread 1050 * it should call the closeproc on this transport. 1051 */ 1052 if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) { 1053 (*(xprt->xp_closeproc)) (xprt); 1054 } 1055 1056 if (xprt->xp_threads + xprt->xp_detached_threads > 0) 1057 mutex_exit(&xprt->xp_thread_lock); 1058 else { 1059 /* Remove references to xprt from the `xprt-ready' queue */ 1060 svc_xprt_qdelete(xprt->xp_pool, xprt); 1061 1062 /* Unregister xprt from the pool's transport list */ 1063 svc_xprt_unregister(xprt); 1064 svc_callout_free(xprt); 1065 SVC_DESTROY(xprt); 1066 } 1067 } 1068 1069 /* 1070 * Find a dispatch routine for a given prog/vers pair. 1071 * This function is called from svc_getreq() to search the callout 1072 * table for an entry with a matching RPC program number `prog' 1073 * and a version range that covers `vers'. 1074 * - if it finds a matching entry it returns pointer to the dispatch routine 1075 * - otherwise it returns NULL and, if `minp' or `maxp' are not NULL, 1076 * fills them with, respectively, lowest version and highest version 1077 * supported for the program `prog' 1078 */ 1079 static SVC_DISPATCH * 1080 svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers, 1081 rpcvers_t *vers_min, rpcvers_t *vers_max) 1082 { 1083 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1084 int i; 1085 1086 *vers_min = ~(rpcvers_t)0; 1087 *vers_max = 0; 1088 1089 for (i = 0; i < sct->sct_size; i++) { 1090 SVC_CALLOUT *sc = &sct->sct_sc[i]; 1091 1092 if (prog == sc->sc_prog) { 1093 if (vers >= sc->sc_versmin && vers <= sc->sc_versmax) 1094 return (sc->sc_dispatch); 1095 1096 if (*vers_max < sc->sc_versmax) 1097 *vers_max = sc->sc_versmax; 1098 if (*vers_min > sc->sc_versmin) 1099 *vers_min = sc->sc_versmin; 1100 } 1101 } 1102 1103 return (NULL); 1104 } 1105 1106 /* 1107 * Optionally free callout table allocated for this transport by 1108 * the service provider. 1109 */ 1110 static void 1111 svc_callout_free(SVCMASTERXPRT *xprt) 1112 { 1113 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1114 1115 if (sct->sct_free) { 1116 kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT)); 1117 kmem_free(sct, sizeof (SVC_CALLOUT_TABLE)); 1118 } 1119 } 1120 1121 /* 1122 * Send a reply to an RPC request 1123 * 1124 * PSARC 2003/523 Contract Private Interface 1125 * svc_sendreply 1126 * Changes must be reviewed by Solaris File Sharing 1127 * Changes must be communicated to contract-2003-523@sun.com 1128 */ 1129 bool_t 1130 svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results, 1131 const caddr_t xdr_location) 1132 { 1133 struct rpc_msg rply; 1134 1135 rply.rm_direction = REPLY; 1136 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1137 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1138 rply.acpted_rply.ar_stat = SUCCESS; 1139 rply.acpted_rply.ar_results.where = xdr_location; 1140 rply.acpted_rply.ar_results.proc = xdr_results; 1141 1142 return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply)); 1143 } 1144 1145 /* 1146 * No procedure error reply 1147 * 1148 * PSARC 2003/523 Contract Private Interface 1149 * svcerr_noproc 1150 * Changes must be reviewed by Solaris File Sharing 1151 * Changes must be communicated to contract-2003-523@sun.com 1152 */ 1153 void 1154 svcerr_noproc(const SVCXPRT *clone_xprt) 1155 { 1156 struct rpc_msg rply; 1157 1158 rply.rm_direction = REPLY; 1159 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1160 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1161 rply.acpted_rply.ar_stat = PROC_UNAVAIL; 1162 SVC_FREERES((SVCXPRT *)clone_xprt); 1163 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1164 } 1165 1166 /* 1167 * Can't decode arguments error reply 1168 * 1169 * PSARC 2003/523 Contract Private Interface 1170 * svcerr_decode 1171 * Changes must be reviewed by Solaris File Sharing 1172 * Changes must be communicated to contract-2003-523@sun.com 1173 */ 1174 void 1175 svcerr_decode(const SVCXPRT *clone_xprt) 1176 { 1177 struct rpc_msg rply; 1178 1179 rply.rm_direction = REPLY; 1180 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1181 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1182 rply.acpted_rply.ar_stat = GARBAGE_ARGS; 1183 SVC_FREERES((SVCXPRT *)clone_xprt); 1184 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1185 } 1186 1187 /* 1188 * Some system error 1189 */ 1190 void 1191 svcerr_systemerr(const SVCXPRT *clone_xprt) 1192 { 1193 struct rpc_msg rply; 1194 1195 rply.rm_direction = REPLY; 1196 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1197 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1198 rply.acpted_rply.ar_stat = SYSTEM_ERR; 1199 SVC_FREERES((SVCXPRT *)clone_xprt); 1200 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1201 } 1202 1203 /* 1204 * Authentication error reply 1205 */ 1206 void 1207 svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why) 1208 { 1209 struct rpc_msg rply; 1210 1211 rply.rm_direction = REPLY; 1212 rply.rm_reply.rp_stat = MSG_DENIED; 1213 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1214 rply.rjcted_rply.rj_why = why; 1215 SVC_FREERES((SVCXPRT *)clone_xprt); 1216 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1217 } 1218 1219 /* 1220 * Authentication too weak error reply 1221 */ 1222 void 1223 svcerr_weakauth(const SVCXPRT *clone_xprt) 1224 { 1225 svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK); 1226 } 1227 1228 /* 1229 * Program unavailable error reply 1230 * 1231 * PSARC 2003/523 Contract Private Interface 1232 * svcerr_noprog 1233 * Changes must be reviewed by Solaris File Sharing 1234 * Changes must be communicated to contract-2003-523@sun.com 1235 */ 1236 void 1237 svcerr_noprog(const SVCXPRT *clone_xprt) 1238 { 1239 struct rpc_msg rply; 1240 1241 rply.rm_direction = REPLY; 1242 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1243 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1244 rply.acpted_rply.ar_stat = PROG_UNAVAIL; 1245 SVC_FREERES((SVCXPRT *)clone_xprt); 1246 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1247 } 1248 1249 /* 1250 * Program version mismatch error reply 1251 * 1252 * PSARC 2003/523 Contract Private Interface 1253 * svcerr_progvers 1254 * Changes must be reviewed by Solaris File Sharing 1255 * Changes must be communicated to contract-2003-523@sun.com 1256 */ 1257 void 1258 svcerr_progvers(const SVCXPRT *clone_xprt, 1259 const rpcvers_t low_vers, const rpcvers_t high_vers) 1260 { 1261 struct rpc_msg rply; 1262 1263 rply.rm_direction = REPLY; 1264 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1265 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1266 rply.acpted_rply.ar_stat = PROG_MISMATCH; 1267 rply.acpted_rply.ar_vers.low = low_vers; 1268 rply.acpted_rply.ar_vers.high = high_vers; 1269 SVC_FREERES((SVCXPRT *)clone_xprt); 1270 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1271 } 1272 1273 /* 1274 * Get server side input from some transport. 1275 * 1276 * Statement of authentication parameters management: 1277 * This function owns and manages all authentication parameters, specifically 1278 * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and 1279 * the "cooked" credentials (rqst->rq_clntcred). 1280 * However, this function does not know the structure of the cooked 1281 * credentials, so it make the following assumptions: 1282 * a) the structure is contiguous (no pointers), and 1283 * b) the cred structure size does not exceed RQCRED_SIZE bytes. 1284 * In all events, all three parameters are freed upon exit from this routine. 1285 * The storage is trivially managed on the call stack in user land, but 1286 * is malloced in kernel land. 1287 * 1288 * Note: the xprt's xp_svc_lock is not held while the service's dispatch 1289 * routine is running. If we decide to implement svc_unregister(), we'll 1290 * need to decide whether it's okay for a thread to unregister a service 1291 * while a request is being processed. If we decide that this is a 1292 * problem, we can probably use some sort of reference counting scheme to 1293 * keep the callout entry from going away until the request has completed. 1294 */ 1295 static void 1296 svc_getreq( 1297 SVCXPRT *clone_xprt, /* clone transport handle */ 1298 mblk_t *mp) 1299 { 1300 struct rpc_msg msg; 1301 struct svc_req r; 1302 char *cred_area; /* too big to allocate on call stack */ 1303 1304 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START, 1305 "svc_getreq_start:"); 1306 1307 ASSERT(clone_xprt->xp_master != NULL); 1308 ASSERT(!is_system_labeled() || DB_CRED(mp) != NULL || 1309 mp->b_datap->db_type != M_DATA); 1310 1311 /* 1312 * Firstly, allocate the authentication parameters' storage 1313 */ 1314 mutex_enter(&rqcred_lock); 1315 if (rqcred_head) { 1316 cred_area = rqcred_head; 1317 1318 /* LINTED pointer alignment */ 1319 rqcred_head = *(caddr_t *)rqcred_head; 1320 mutex_exit(&rqcred_lock); 1321 } else { 1322 mutex_exit(&rqcred_lock); 1323 cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE, 1324 KM_SLEEP); 1325 } 1326 msg.rm_call.cb_cred.oa_base = cred_area; 1327 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1328 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1329 1330 /* 1331 * underlying transport recv routine may modify mblk data 1332 * and make it difficult to extract label afterwards. So 1333 * get the label from the raw mblk data now. 1334 */ 1335 if (is_system_labeled()) { 1336 mblk_t *lmp; 1337 1338 r.rq_label = kmem_alloc(sizeof (bslabel_t), KM_SLEEP); 1339 if (DB_CRED(mp) != NULL) 1340 lmp = mp; 1341 else { 1342 ASSERT(mp->b_cont != NULL); 1343 lmp = mp->b_cont; 1344 ASSERT(DB_CRED(lmp) != NULL); 1345 } 1346 bcopy(label2bslabel(crgetlabel(DB_CRED(lmp))), r.rq_label, 1347 sizeof (bslabel_t)); 1348 } else { 1349 r.rq_label = NULL; 1350 } 1351 1352 /* 1353 * Now receive a message from the transport. 1354 */ 1355 if (SVC_RECV(clone_xprt, mp, &msg)) { 1356 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1357 rpcvers_t vers_min; 1358 rpcvers_t vers_max; 1359 bool_t no_dispatch; 1360 enum auth_stat why; 1361 1362 /* 1363 * Find the registered program and call its 1364 * dispatch routine. 1365 */ 1366 r.rq_xprt = clone_xprt; 1367 r.rq_prog = msg.rm_call.cb_prog; 1368 r.rq_vers = msg.rm_call.cb_vers; 1369 r.rq_proc = msg.rm_call.cb_proc; 1370 r.rq_cred = msg.rm_call.cb_cred; 1371 1372 /* 1373 * First authenticate the message. 1374 */ 1375 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START, 1376 "svc_getreq_auth_start:"); 1377 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) { 1378 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1379 "svc_getreq_auth_end:(%S)", "failed"); 1380 svcerr_auth(clone_xprt, why); 1381 /* 1382 * Free the arguments. 1383 */ 1384 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1385 } else if (no_dispatch) { 1386 /* 1387 * XXX - when bug id 4053736 is done, remove 1388 * the SVC_FREEARGS() call. 1389 */ 1390 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1391 } else { 1392 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1393 "svc_getreq_auth_end:(%S)", "good"); 1394 1395 dispatchroutine = svc_callout_find(clone_xprt, 1396 r.rq_prog, r.rq_vers, &vers_min, &vers_max); 1397 1398 if (dispatchroutine) { 1399 (*dispatchroutine) (&r, clone_xprt); 1400 } else { 1401 /* 1402 * If we got here, the program or version 1403 * is not served ... 1404 */ 1405 if (vers_max == 0 || 1406 version_keepquiet(clone_xprt)) 1407 svcerr_noprog(clone_xprt); 1408 else 1409 svcerr_progvers(clone_xprt, vers_min, 1410 vers_max); 1411 1412 /* 1413 * Free the arguments. For successful calls 1414 * this is done by the dispatch routine. 1415 */ 1416 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1417 /* Fall through to ... */ 1418 } 1419 /* 1420 * Call cleanup procedure for RPCSEC_GSS. 1421 * This is a hack since there is currently no 1422 * op, such as SVC_CLEANAUTH. rpc_gss_cleanup 1423 * should only be called for a non null proc. 1424 * Null procs in RPC GSS are overloaded to 1425 * provide context setup and control. The main 1426 * purpose of rpc_gss_cleanup is to decrement the 1427 * reference count associated with the cached 1428 * GSS security context. We should never get here 1429 * for an RPCSEC_GSS null proc since *no_dispatch 1430 * would have been set to true from sec_svc_msg above. 1431 */ 1432 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1433 rpc_gss_cleanup(clone_xprt); 1434 } 1435 } 1436 1437 if (r.rq_label != NULL) 1438 kmem_free(r.rq_label, sizeof (bslabel_t)); 1439 1440 /* 1441 * Free authentication parameters' storage 1442 */ 1443 mutex_enter(&rqcred_lock); 1444 /* LINTED pointer alignment */ 1445 *(caddr_t *)cred_area = rqcred_head; 1446 rqcred_head = cred_area; 1447 mutex_exit(&rqcred_lock); 1448 } 1449 1450 /* 1451 * Allocate new clone transport handle. 1452 */ 1453 static SVCXPRT * 1454 svc_clone_init(void) 1455 { 1456 SVCXPRT *clone_xprt; 1457 1458 clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP); 1459 clone_xprt->xp_cred = crget(); 1460 return (clone_xprt); 1461 } 1462 1463 /* 1464 * Free memory allocated by svc_clone_init. 1465 */ 1466 static void 1467 svc_clone_free(SVCXPRT *clone_xprt) 1468 { 1469 /* Fre credentials from crget() */ 1470 if (clone_xprt->xp_cred) 1471 crfree(clone_xprt->xp_cred); 1472 kmem_free(clone_xprt, sizeof (SVCXPRT)); 1473 } 1474 1475 /* 1476 * Link a per-thread clone transport handle to a master 1477 * - increment a thread reference count on the master 1478 * - copy some of the master's fields to the clone 1479 * - call a transport specific clone routine. 1480 */ 1481 static void 1482 svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1483 { 1484 cred_t *cred = clone_xprt->xp_cred; 1485 1486 ASSERT(cred); 1487 1488 /* 1489 * Bump up master's thread count. 1490 * Linking a per-thread clone transport handle to a master 1491 * associates a service thread with the master. 1492 */ 1493 mutex_enter(&xprt->xp_thread_lock); 1494 xprt->xp_threads++; 1495 mutex_exit(&xprt->xp_thread_lock); 1496 1497 /* Clear everything */ 1498 bzero(clone_xprt, sizeof (SVCXPRT)); 1499 1500 /* Set pointer to the master transport stucture */ 1501 clone_xprt->xp_master = xprt; 1502 1503 /* Structure copy of all the common fields */ 1504 clone_xprt->xp_xpc = xprt->xp_xpc; 1505 1506 /* Restore per-thread fields (xp_cred) */ 1507 clone_xprt->xp_cred = cred; 1508 1509 1510 /* 1511 * NOTICE: There is no transport-type specific code now. 1512 * If you want to add a transport-type specific cloning code 1513 * add one more operation (e.g. xp_clone()) to svc_ops, 1514 * implement it for each transport type, and call it here 1515 * through an appropriate macro (e.g. SVC_CLONE()). 1516 */ 1517 } 1518 1519 /* 1520 * Unlink a non-detached clone transport handle from a master 1521 * - decrement a thread reference count on the master 1522 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1523 * if this is the last non-detached/absolute thread on this transport 1524 * then it will close/destroy the transport 1525 * - call transport specific function to destroy the clone handle 1526 * - clear xp_master to avoid recursion. 1527 */ 1528 static void 1529 svc_clone_unlink(SVCXPRT *clone_xprt) 1530 { 1531 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1532 1533 /* This cannot be a detached thread */ 1534 ASSERT(!clone_xprt->xp_detached); 1535 ASSERT(xprt->xp_threads > 0); 1536 1537 /* Decrement a reference count on the transport */ 1538 mutex_enter(&xprt->xp_thread_lock); 1539 xprt->xp_threads--; 1540 1541 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1542 if (xprt->xp_wq) 1543 mutex_exit(&xprt->xp_thread_lock); 1544 else 1545 svc_xprt_cleanup(xprt, FALSE); 1546 1547 /* Call a transport specific clone `destroy' function */ 1548 SVC_CLONE_DESTROY(clone_xprt); 1549 1550 /* Clear xp_master */ 1551 clone_xprt->xp_master = NULL; 1552 } 1553 1554 /* 1555 * Unlink a detached clone transport handle from a master 1556 * - decrement the thread count on the master 1557 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1558 * if this is the last thread on this transport then it will destroy 1559 * the transport. 1560 * - call a transport specific function to destroy the clone handle 1561 * - clear xp_master to avoid recursion. 1562 */ 1563 static void 1564 svc_clone_unlinkdetached(SVCXPRT *clone_xprt) 1565 { 1566 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1567 1568 /* This must be a detached thread */ 1569 ASSERT(clone_xprt->xp_detached); 1570 ASSERT(xprt->xp_detached_threads > 0); 1571 ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0); 1572 1573 /* Grab xprt->xp_thread_lock and decrement link counts */ 1574 mutex_enter(&xprt->xp_thread_lock); 1575 xprt->xp_detached_threads--; 1576 1577 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1578 if (xprt->xp_wq) 1579 mutex_exit(&xprt->xp_thread_lock); 1580 else 1581 svc_xprt_cleanup(xprt, TRUE); 1582 1583 /* Call transport specific clone `destroy' function */ 1584 SVC_CLONE_DESTROY(clone_xprt); 1585 1586 /* Clear xp_master */ 1587 clone_xprt->xp_master = NULL; 1588 } 1589 1590 /* 1591 * Try to exit a non-detached service thread 1592 * - check if there are enough threads left 1593 * - if this thread (ie its clone transport handle) are linked 1594 * to a master transport then unlink it 1595 * - free the clone structure 1596 * - return to userland for thread exit 1597 * 1598 * If this is the last non-detached or the last thread on this 1599 * transport then the call to svc_clone_unlink() will, respectively, 1600 * close and/or destroy the transport. 1601 */ 1602 static void 1603 svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt) 1604 { 1605 if (clone_xprt->xp_master) 1606 svc_clone_unlink(clone_xprt); 1607 svc_clone_free(clone_xprt); 1608 1609 mutex_enter(&pool->p_thread_lock); 1610 pool->p_threads--; 1611 if (pool->p_closing && svc_pool_tryexit(pool)) 1612 /* return - thread exit will be handled at user level */ 1613 return; 1614 mutex_exit(&pool->p_thread_lock); 1615 1616 /* return - thread exit will be handled at user level */ 1617 } 1618 1619 /* 1620 * Exit a detached service thread that returned to svc_run 1621 * - decrement the `detached thread' count for the pool 1622 * - unlink the detached clone transport handle from the master 1623 * - free the clone structure 1624 * - return to userland for thread exit 1625 * 1626 * If this is the last thread on this transport then the call 1627 * to svc_clone_unlinkdetached() will destroy the transport. 1628 */ 1629 static void 1630 svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt) 1631 { 1632 /* This must be a detached thread */ 1633 ASSERT(clone_xprt->xp_master); 1634 ASSERT(clone_xprt->xp_detached); 1635 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 1636 1637 svc_clone_unlinkdetached(clone_xprt); 1638 svc_clone_free(clone_xprt); 1639 1640 mutex_enter(&pool->p_thread_lock); 1641 1642 ASSERT(pool->p_reserved_threads >= 0); 1643 ASSERT(pool->p_detached_threads > 0); 1644 1645 pool->p_detached_threads--; 1646 if (pool->p_closing && svc_pool_tryexit(pool)) 1647 /* return - thread exit will be handled at user level */ 1648 return; 1649 mutex_exit(&pool->p_thread_lock); 1650 1651 /* return - thread exit will be handled at user level */ 1652 } 1653 1654 /* 1655 * PSARC 2003/523 Contract Private Interface 1656 * svc_wait 1657 * Changes must be reviewed by Solaris File Sharing 1658 * Changes must be communicated to contract-2003-523@sun.com 1659 */ 1660 int 1661 svc_wait(int id) 1662 { 1663 SVCPOOL *pool; 1664 int err = 0; 1665 struct svc_globals *svc; 1666 1667 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 1668 mutex_enter(&svc->svc_plock); 1669 pool = svc_pool_find(svc, id); 1670 mutex_exit(&svc->svc_plock); 1671 1672 if (pool == NULL) 1673 return (ENOENT); 1674 1675 mutex_enter(&pool->p_user_lock); 1676 1677 /* Check if there's already a user thread waiting on this pool */ 1678 if (pool->p_user_waiting) { 1679 mutex_exit(&pool->p_user_lock); 1680 return (EBUSY); 1681 } 1682 1683 pool->p_user_waiting = TRUE; 1684 1685 /* Go to sleep, waiting for the signaled flag. */ 1686 while (!pool->p_signal_create_thread && !pool->p_user_exit) { 1687 if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) { 1688 /* Interrupted, return to handle exit or signal */ 1689 pool->p_user_waiting = FALSE; 1690 pool->p_signal_create_thread = FALSE; 1691 mutex_exit(&pool->p_user_lock); 1692 1693 /* 1694 * Thread has been interrupted and therefore 1695 * the service daemon is leaving as well so 1696 * let's go ahead and remove the service 1697 * pool at this time. 1698 */ 1699 mutex_enter(&svc->svc_plock); 1700 svc_pool_unregister(svc, pool); 1701 mutex_exit(&svc->svc_plock); 1702 1703 return (EINTR); 1704 } 1705 } 1706 1707 pool->p_signal_create_thread = FALSE; 1708 pool->p_user_waiting = FALSE; 1709 1710 /* 1711 * About to exit the service pool. Set return value 1712 * to let the userland code know our intent. Signal 1713 * svc_thread_creator() so that it can clean up the 1714 * pool structure. 1715 */ 1716 if (pool->p_user_exit) { 1717 err = ECANCELED; 1718 cv_signal(&pool->p_user_cv); 1719 } 1720 1721 mutex_exit(&pool->p_user_lock); 1722 1723 /* Return to userland with error code, for possible thread creation. */ 1724 return (err); 1725 } 1726 1727 /* 1728 * `Service threads' creator thread. 1729 * The creator thread waits for a signal to create new thread. 1730 */ 1731 static void 1732 svc_thread_creator(SVCPOOL *pool) 1733 { 1734 callb_cpr_t cpr_info; /* CPR info for the creator thread */ 1735 1736 CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr, 1737 "svc_thread_creator"); 1738 1739 for (;;) { 1740 mutex_enter(&pool->p_creator_lock); 1741 1742 /* Check if someone set the exit flag */ 1743 if (pool->p_creator_exit) 1744 break; 1745 1746 /* Clear the `signaled' flag and go asleep */ 1747 pool->p_creator_signaled = FALSE; 1748 1749 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1750 cv_wait(&pool->p_creator_cv, &pool->p_creator_lock); 1751 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1752 1753 /* Check if someone signaled to exit */ 1754 if (pool->p_creator_exit) 1755 break; 1756 1757 mutex_exit(&pool->p_creator_lock); 1758 1759 mutex_enter(&pool->p_thread_lock); 1760 1761 /* 1762 * When the pool is in closing state and all the transports 1763 * are gone the creator should not create any new threads. 1764 */ 1765 if (pool->p_closing) { 1766 rw_enter(&pool->p_lrwlock, RW_READER); 1767 if (pool->p_lcount == 0) { 1768 rw_exit(&pool->p_lrwlock); 1769 mutex_exit(&pool->p_thread_lock); 1770 continue; 1771 } 1772 rw_exit(&pool->p_lrwlock); 1773 } 1774 1775 /* 1776 * Create a new service thread now. 1777 */ 1778 ASSERT(pool->p_reserved_threads >= 0); 1779 ASSERT(pool->p_detached_threads >= 0); 1780 1781 if (pool->p_threads + pool->p_detached_threads < 1782 pool->p_maxthreads) { 1783 /* 1784 * Signal the service pool wait thread 1785 * only if it hasn't already been signaled. 1786 */ 1787 mutex_enter(&pool->p_user_lock); 1788 if (pool->p_signal_create_thread == FALSE) { 1789 pool->p_signal_create_thread = TRUE; 1790 cv_signal(&pool->p_user_cv); 1791 } 1792 mutex_exit(&pool->p_user_lock); 1793 1794 } 1795 1796 mutex_exit(&pool->p_thread_lock); 1797 } 1798 1799 /* 1800 * Pool is closed. Cleanup and exit. 1801 */ 1802 1803 /* Signal userland creator thread that it can stop now. */ 1804 mutex_enter(&pool->p_user_lock); 1805 pool->p_user_exit = TRUE; 1806 cv_broadcast(&pool->p_user_cv); 1807 mutex_exit(&pool->p_user_lock); 1808 1809 /* Wait for svc_wait() to be done with the pool */ 1810 mutex_enter(&pool->p_user_lock); 1811 while (pool->p_user_waiting) { 1812 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1813 cv_wait(&pool->p_user_cv, &pool->p_user_lock); 1814 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1815 } 1816 mutex_exit(&pool->p_user_lock); 1817 1818 CALLB_CPR_EXIT(&cpr_info); 1819 svc_pool_cleanup(pool); 1820 zthread_exit(); 1821 } 1822 1823 /* 1824 * If the creator thread is idle signal it to create 1825 * a new service thread. 1826 */ 1827 static void 1828 svc_creator_signal(SVCPOOL *pool) 1829 { 1830 mutex_enter(&pool->p_creator_lock); 1831 if (pool->p_creator_signaled == FALSE) { 1832 pool->p_creator_signaled = TRUE; 1833 cv_signal(&pool->p_creator_cv); 1834 } 1835 mutex_exit(&pool->p_creator_lock); 1836 } 1837 1838 /* 1839 * Notify the creator thread to clean up and exit. 1840 */ 1841 static void 1842 svc_creator_signalexit(SVCPOOL *pool) 1843 { 1844 mutex_enter(&pool->p_creator_lock); 1845 pool->p_creator_exit = TRUE; 1846 cv_signal(&pool->p_creator_cv); 1847 mutex_exit(&pool->p_creator_lock); 1848 } 1849 1850 /* 1851 * Polling part of the svc_run(). 1852 * - search for a transport with a pending request 1853 * - when one is found then latch the request lock and return to svc_run() 1854 * - if there is no request go asleep and wait for a signal 1855 * - handle two exceptions: 1856 * a) current transport is closing 1857 * b) timeout waiting for a new request 1858 * in both cases return to svc_run() 1859 */ 1860 static SVCMASTERXPRT * 1861 svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1862 { 1863 /* 1864 * Main loop iterates until 1865 * a) we find a pending request, 1866 * b) detect that the current transport is closing 1867 * c) time out waiting for a new request. 1868 */ 1869 for (;;) { 1870 SVCMASTERXPRT *next; 1871 clock_t timeleft; 1872 1873 /* 1874 * Step 1. 1875 * Check if there is a pending request on the current 1876 * transport handle so that we can avoid cloning. 1877 * If so then decrement the `pending-request' count for 1878 * the pool and return to svc_run(). 1879 * 1880 * We need to prevent a potential starvation. When 1881 * a selected transport has all pending requests coming in 1882 * all the time then the service threads will never switch to 1883 * another transport. With a limited number of service 1884 * threads some transports may be never serviced. 1885 * To prevent such a scenario we pick up at most 1886 * pool->p_max_same_xprt requests from the same transport 1887 * and then take a hint from the xprt-ready queue or walk 1888 * the transport list. 1889 */ 1890 if (xprt && xprt->xp_req_head && (!pool->p_qoverflow || 1891 clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) { 1892 mutex_enter(&xprt->xp_req_lock); 1893 if (xprt->xp_req_head) { 1894 mutex_enter(&pool->p_req_lock); 1895 pool->p_reqs--; 1896 mutex_exit(&pool->p_req_lock); 1897 1898 return (xprt); 1899 } 1900 mutex_exit(&xprt->xp_req_lock); 1901 } 1902 clone_xprt->xp_same_xprt = 0; 1903 1904 /* 1905 * Step 2. 1906 * If there is no request on the current transport try to 1907 * find another transport with a pending request. 1908 */ 1909 mutex_enter(&pool->p_req_lock); 1910 pool->p_walkers++; 1911 mutex_exit(&pool->p_req_lock); 1912 1913 /* 1914 * Make sure that transports will not be destroyed just 1915 * while we are checking them. 1916 */ 1917 rw_enter(&pool->p_lrwlock, RW_READER); 1918 1919 for (;;) { 1920 SVCMASTERXPRT *hint; 1921 1922 /* 1923 * Get the next transport from the xprt-ready queue. 1924 * This is a hint. There is no guarantee that the 1925 * transport still has a pending request since it 1926 * could be picked up by another thread in step 1. 1927 * 1928 * If the transport has a pending request then keep 1929 * it locked. Decrement the `pending-requests' for 1930 * the pool and `walking-threads' counts, and return 1931 * to svc_run(). 1932 */ 1933 hint = svc_xprt_qget(pool); 1934 1935 if (hint && hint->xp_req_head) { 1936 mutex_enter(&hint->xp_req_lock); 1937 if (hint->xp_req_head) { 1938 rw_exit(&pool->p_lrwlock); 1939 1940 mutex_enter(&pool->p_req_lock); 1941 pool->p_reqs--; 1942 pool->p_walkers--; 1943 mutex_exit(&pool->p_req_lock); 1944 1945 return (hint); 1946 } 1947 mutex_exit(&hint->xp_req_lock); 1948 } 1949 1950 /* 1951 * If there was no hint in the xprt-ready queue then 1952 * - if there is less pending requests than polling 1953 * threads go asleep 1954 * - otherwise check if there was an overflow in the 1955 * xprt-ready queue; if so, then we need to break 1956 * the `drain' mode 1957 */ 1958 if (hint == NULL) { 1959 if (pool->p_reqs < pool->p_walkers) { 1960 mutex_enter(&pool->p_req_lock); 1961 if (pool->p_reqs < pool->p_walkers) 1962 goto sleep; 1963 mutex_exit(&pool->p_req_lock); 1964 } 1965 if (pool->p_qoverflow) { 1966 break; 1967 } 1968 } 1969 } 1970 1971 /* 1972 * If there was an overflow in the xprt-ready queue then we 1973 * need to switch to the `drain' mode, i.e. walk through the 1974 * pool's transport list and search for a transport with a 1975 * pending request. If we manage to drain all the pending 1976 * requests then we can clear the overflow flag. This will 1977 * switch svc_poll() back to taking hints from the xprt-ready 1978 * queue (which is generally more efficient). 1979 * 1980 * If there are no registered transports simply go asleep. 1981 */ 1982 if (xprt == NULL && pool->p_lhead == NULL) { 1983 mutex_enter(&pool->p_req_lock); 1984 goto sleep; 1985 } 1986 1987 /* 1988 * `Walk' through the pool's list of master server 1989 * transport handles. Continue to loop until there are less 1990 * looping threads then pending requests. 1991 */ 1992 next = xprt ? xprt->xp_next : pool->p_lhead; 1993 1994 for (;;) { 1995 /* 1996 * Check if there is a request on this transport. 1997 * 1998 * Since blocking on a locked mutex is very expensive 1999 * check for a request without a lock first. If we miss 2000 * a request that is just being delivered but this will 2001 * cost at most one full walk through the list. 2002 */ 2003 if (next->xp_req_head) { 2004 /* 2005 * Check again, now with a lock. 2006 */ 2007 mutex_enter(&next->xp_req_lock); 2008 if (next->xp_req_head) { 2009 rw_exit(&pool->p_lrwlock); 2010 2011 mutex_enter(&pool->p_req_lock); 2012 pool->p_reqs--; 2013 pool->p_walkers--; 2014 mutex_exit(&pool->p_req_lock); 2015 2016 return (next); 2017 } 2018 mutex_exit(&next->xp_req_lock); 2019 } 2020 2021 /* 2022 * Continue to `walk' through the pool's 2023 * transport list until there is less requests 2024 * than walkers. Check this condition without 2025 * a lock first to avoid contention on a mutex. 2026 */ 2027 if (pool->p_reqs < pool->p_walkers) { 2028 /* 2029 * Check again, now with the lock. 2030 * If all the pending requests have been 2031 * picked up than clear the overflow flag. 2032 */ 2033 mutex_enter(&pool->p_req_lock); 2034 if (pool->p_reqs <= 0) 2035 svc_xprt_qreset(pool); 2036 if (pool->p_reqs < pool->p_walkers) 2037 break; /* goto sleep */ 2038 mutex_exit(&pool->p_req_lock); 2039 } 2040 2041 next = next->xp_next; 2042 } 2043 2044 sleep: 2045 /* 2046 * No work to do. Stop the `walk' and go asleep. 2047 * Decrement the `walking-threads' count for the pool. 2048 */ 2049 pool->p_walkers--; 2050 rw_exit(&pool->p_lrwlock); 2051 2052 /* 2053 * Count us as asleep, mark this thread as safe 2054 * for suspend and wait for a request. 2055 */ 2056 pool->p_asleep++; 2057 timeleft = cv_timedwait_sig(&pool->p_req_cv, &pool->p_req_lock, 2058 pool->p_timeout + lbolt); 2059 2060 /* 2061 * If the drowsy flag is on this means that 2062 * someone has signaled a wakeup. In such a case 2063 * the `asleep-threads' count has already updated 2064 * so just clear the flag. 2065 * 2066 * If the drowsy flag is off then we need to update 2067 * the `asleep-threads' count. 2068 */ 2069 if (pool->p_drowsy) { 2070 pool->p_drowsy = FALSE; 2071 /* 2072 * If the thread is here because it timedout, 2073 * instead of returning SVC_ETIMEDOUT, it is 2074 * time to do some more work. 2075 */ 2076 if (timeleft == -1) 2077 timeleft = 1; 2078 } else { 2079 pool->p_asleep--; 2080 } 2081 mutex_exit(&pool->p_req_lock); 2082 2083 /* 2084 * If we received a signal while waiting for a 2085 * request, inform svc_run(), so that we can return 2086 * to user level and restart the call. 2087 */ 2088 if (timeleft == 0) 2089 return (SVC_EINTR); 2090 2091 /* 2092 * If the current transport is gone then notify 2093 * svc_run() to unlink from it. 2094 */ 2095 if (xprt && xprt->xp_wq == NULL) 2096 return (SVC_EXPRTGONE); 2097 2098 /* 2099 * If we have timed out waiting for a request inform 2100 * svc_run() that we probably don't need this thread. 2101 */ 2102 if (timeleft == -1) 2103 return (SVC_ETIMEDOUT); 2104 } 2105 } 2106 2107 /* 2108 * Main loop of the kernel RPC server 2109 * - wait for input (find a transport with a pending request). 2110 * - dequeue the request 2111 * - call a registered server routine to process the requests 2112 * 2113 * There can many threads running concurrently in this loop 2114 * on the same or on different transports. 2115 */ 2116 static int 2117 svc_run(SVCPOOL *pool) 2118 { 2119 SVCMASTERXPRT *xprt = NULL; /* master transport handle */ 2120 SVCXPRT *clone_xprt; /* clone for this thread */ 2121 struct svc_globals *svc; 2122 proc_t *p = ttoproc(curthread); 2123 2124 /* Allocate a clone transport handle for this thread */ 2125 clone_xprt = svc_clone_init(); 2126 2127 /* 2128 * The loop iterates until the thread becomes 2129 * idle too long or the transport is gone. 2130 */ 2131 for (;;) { 2132 SVCMASTERXPRT *next; 2133 mblk_t *mp; 2134 2135 TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run"); 2136 2137 /* 2138 * If the process is exiting/killed, return 2139 * immediately without processing any more 2140 * requests. 2141 */ 2142 if (p->p_flag & (SEXITING | SKILLED)) { 2143 svc_thread_exit(pool, clone_xprt); 2144 2145 /* 2146 * Thread has been interrupted and therefore 2147 * the service daemon is leaving as well so 2148 * let's go ahead and remove the service 2149 * pool at this time. 2150 */ 2151 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2152 mutex_enter(&svc->svc_plock); 2153 svc_pool_unregister(svc, pool); 2154 mutex_exit(&svc->svc_plock); 2155 2156 return (0); 2157 } 2158 2159 /* Find a transport with a pending request */ 2160 next = svc_poll(pool, xprt, clone_xprt); 2161 2162 /* 2163 * If svc_poll() finds a transport with a request 2164 * it latches xp_req_lock on it. Therefore we need 2165 * to dequeue the request and release the lock as 2166 * soon as possible. 2167 */ 2168 ASSERT(next != NULL && 2169 (next == SVC_EXPRTGONE || 2170 next == SVC_ETIMEDOUT || 2171 next == SVC_EINTR || 2172 MUTEX_HELD(&next->xp_req_lock))); 2173 2174 /* Ooops! Current transport is closing. Unlink now */ 2175 if (next == SVC_EXPRTGONE) { 2176 svc_clone_unlink(clone_xprt); 2177 xprt = NULL; 2178 continue; 2179 } 2180 2181 /* Ooops! Timeout while waiting for a request. Exit */ 2182 if (next == SVC_ETIMEDOUT) { 2183 svc_thread_exit(pool, clone_xprt); 2184 return (0); 2185 } 2186 2187 /* 2188 * Interrupted by a signal while waiting for a 2189 * request. Return to userspace and restart. 2190 */ 2191 if (next == SVC_EINTR) { 2192 svc_thread_exit(pool, clone_xprt); 2193 2194 /* 2195 * Thread has been interrupted and therefore 2196 * the service daemon is leaving as well so 2197 * let's go ahead and remove the service 2198 * pool at this time. 2199 */ 2200 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2201 mutex_enter(&svc->svc_plock); 2202 svc_pool_unregister(svc, pool); 2203 mutex_exit(&svc->svc_plock); 2204 2205 return (EINTR); 2206 } 2207 2208 /* 2209 * De-queue the request and release the request lock 2210 * on this transport (latched by svc_poll()). 2211 */ 2212 mp = next->xp_req_head; 2213 next->xp_req_head = mp->b_next; 2214 mp->b_next = (mblk_t *)0; 2215 2216 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ, 2217 "rpc_que_req_deq:pool %p mp %p", pool, mp); 2218 mutex_exit(&next->xp_req_lock); 2219 2220 /* 2221 * If this is a new request on a current transport then 2222 * the clone structure is already properly initialized. 2223 * Otherwise, if the request is on a different transport, 2224 * unlink from the current master and link to 2225 * the one we got a request on. 2226 */ 2227 if (next != xprt) { 2228 if (xprt) 2229 svc_clone_unlink(clone_xprt); 2230 svc_clone_link(next, clone_xprt); 2231 xprt = next; 2232 } 2233 2234 /* 2235 * If there are more requests and req_cv hasn't 2236 * been signaled yet then wake up one more thread now. 2237 * 2238 * We avoid signaling req_cv until the most recently 2239 * signaled thread wakes up and gets CPU to clear 2240 * the `drowsy' flag. 2241 */ 2242 if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2243 pool->p_asleep == 0)) { 2244 mutex_enter(&pool->p_req_lock); 2245 2246 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2247 pool->p_asleep == 0) 2248 mutex_exit(&pool->p_req_lock); 2249 else { 2250 pool->p_asleep--; 2251 pool->p_drowsy = TRUE; 2252 2253 cv_signal(&pool->p_req_cv); 2254 mutex_exit(&pool->p_req_lock); 2255 } 2256 } 2257 2258 /* 2259 * If there are no asleep/signaled threads, we are 2260 * still below pool->p_maxthreads limit, and no thread is 2261 * currently being created then signal the creator 2262 * for one more service thread. 2263 * 2264 * The asleep and drowsy checks are not protected 2265 * by a lock since it hurts performance and a wrong 2266 * decision is not essential. 2267 */ 2268 if (pool->p_asleep == 0 && !pool->p_drowsy && 2269 pool->p_threads + pool->p_detached_threads < 2270 pool->p_maxthreads) 2271 svc_creator_signal(pool); 2272 2273 /* 2274 * Process the request. 2275 */ 2276 svc_getreq(clone_xprt, mp); 2277 2278 /* If thread had a reservation it should have been canceled */ 2279 ASSERT(!clone_xprt->xp_reserved); 2280 2281 /* 2282 * If the clone is marked detached then exit. 2283 * The rpcmod slot has already been released 2284 * when we detached this thread. 2285 */ 2286 if (clone_xprt->xp_detached) { 2287 svc_thread_exitdetached(pool, clone_xprt); 2288 return (0); 2289 } 2290 2291 /* 2292 * Release our reference on the rpcmod 2293 * slot attached to xp_wq->q_ptr. 2294 */ 2295 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2296 } 2297 /* NOTREACHED */ 2298 } 2299 2300 /* 2301 * Flush any pending requests for the queue and 2302 * and free the associated mblks. 2303 */ 2304 void 2305 svc_queueclean(queue_t *q) 2306 { 2307 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2308 mblk_t *mp; 2309 2310 /* 2311 * clean up the requests 2312 */ 2313 mutex_enter(&xprt->xp_req_lock); 2314 while ((mp = xprt->xp_req_head) != NULL) { 2315 xprt->xp_req_head = mp->b_next; 2316 mp->b_next = (mblk_t *)0; 2317 (*RELE_PROC(xprt)) (xprt->xp_wq, mp); 2318 } 2319 mutex_exit(&xprt->xp_req_lock); 2320 } 2321 2322 /* 2323 * This routine is called by rpcmod to inform kernel RPC that a 2324 * queue is closing. It is called after all the requests have been 2325 * picked up (that is after all the slots on the queue have 2326 * been released by kernel RPC). It is also guaranteed that no more 2327 * request will be delivered on this transport. 2328 * 2329 * - clear xp_wq to mark the master server transport handle as closing 2330 * - if there are no more threads on this transport close/destroy it 2331 * - otherwise, broadcast threads sleeping in svc_poll(); the last 2332 * thread will close/destroy the transport. 2333 */ 2334 void 2335 svc_queueclose(queue_t *q) 2336 { 2337 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2338 2339 if (xprt == NULL) { 2340 /* 2341 * If there is no master xprt associated with this stream, 2342 * then there is nothing to do. This happens regularly 2343 * with connection-oriented listening streams created by 2344 * nfsd. 2345 */ 2346 return; 2347 } 2348 2349 mutex_enter(&xprt->xp_thread_lock); 2350 2351 ASSERT(xprt->xp_req_head == NULL); 2352 ASSERT(xprt->xp_wq != NULL); 2353 2354 xprt->xp_wq = NULL; 2355 2356 if (xprt->xp_threads == 0) { 2357 SVCPOOL *pool = xprt->xp_pool; 2358 2359 /* 2360 * svc_xprt_cleanup() destroys the transport 2361 * or releases the transport thread lock 2362 */ 2363 svc_xprt_cleanup(xprt, FALSE); 2364 2365 mutex_enter(&pool->p_thread_lock); 2366 2367 /* 2368 * If the pool is in closing state and this was 2369 * the last transport in the pool then signal the creator 2370 * thread to clean up and exit. 2371 */ 2372 if (pool->p_closing && svc_pool_tryexit(pool)) { 2373 return; 2374 } 2375 mutex_exit(&pool->p_thread_lock); 2376 } else { 2377 /* 2378 * Wakeup threads sleeping in svc_poll() so that they 2379 * unlink from the transport 2380 */ 2381 mutex_enter(&xprt->xp_pool->p_req_lock); 2382 cv_broadcast(&xprt->xp_pool->p_req_cv); 2383 mutex_exit(&xprt->xp_pool->p_req_lock); 2384 2385 /* 2386 * NOTICE: No references to the master transport structure 2387 * beyond this point! 2388 */ 2389 mutex_exit(&xprt->xp_thread_lock); 2390 } 2391 } 2392 2393 /* 2394 * Interrupt `request delivery' routine called from rpcmod 2395 * - put a request at the tail of the transport request queue 2396 * - insert a hint for svc_poll() into the xprt-ready queue 2397 * - increment the `pending-requests' count for the pool 2398 * - wake up a thread sleeping in svc_poll() if necessary 2399 * - if all the threads are running ask the creator for a new one. 2400 */ 2401 void 2402 svc_queuereq(queue_t *q, mblk_t *mp) 2403 { 2404 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2405 SVCPOOL *pool = xprt->xp_pool; 2406 2407 TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start"); 2408 2409 ASSERT(!is_system_labeled() || DB_CRED(mp) != NULL || 2410 mp->b_datap->db_type != M_DATA); 2411 2412 /* 2413 * Step 1. 2414 * Grab the transport's request lock and put 2415 * the request at the tail of the transport's 2416 * request queue. 2417 */ 2418 mutex_enter(&xprt->xp_req_lock); 2419 if (xprt->xp_req_head == NULL) 2420 xprt->xp_req_head = mp; 2421 else 2422 xprt->xp_req_tail->b_next = mp; 2423 xprt->xp_req_tail = mp; 2424 2425 mutex_exit(&xprt->xp_req_lock); 2426 2427 /* 2428 * Step 2. 2429 * Grab the pool request lock, insert a hint into 2430 * the xprt-ready queue, increment `pending-requests' 2431 * count for the pool, and wake up a thread sleeping 2432 * in svc_poll() if necessary. 2433 */ 2434 mutex_enter(&pool->p_req_lock); 2435 2436 /* Insert pointer to this transport into the xprt-ready queue */ 2437 svc_xprt_qput(pool, xprt); 2438 2439 /* Increment the `pending-requests' count for the pool */ 2440 pool->p_reqs++; 2441 2442 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ, 2443 "rpc_que_req_enq:pool %p mp %p", pool, mp); 2444 2445 /* 2446 * If there are more requests and req_cv hasn't 2447 * been signaled yet then wake up one more thread now. 2448 * 2449 * We avoid signaling req_cv until the most recently 2450 * signaled thread wakes up and gets CPU to clear 2451 * the `drowsy' flag. 2452 */ 2453 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2454 pool->p_asleep == 0) { 2455 mutex_exit(&pool->p_req_lock); 2456 } else { 2457 pool->p_drowsy = TRUE; 2458 pool->p_asleep--; 2459 2460 /* 2461 * Signal wakeup and drop the request lock. 2462 */ 2463 cv_signal(&pool->p_req_cv); 2464 mutex_exit(&pool->p_req_lock); 2465 } 2466 2467 /* 2468 * Step 3. 2469 * If there are no asleep/signaled threads, we are 2470 * still below pool->p_maxthreads limit, and no thread is 2471 * currently being created then signal the creator 2472 * for one more service thread. 2473 * 2474 * The asleep and drowsy checks are not not protected 2475 * by a lock since it hurts performance and a wrong 2476 * decision is not essential. 2477 */ 2478 if (pool->p_asleep == 0 && !pool->p_drowsy && 2479 pool->p_threads + pool->p_detached_threads < pool->p_maxthreads) 2480 svc_creator_signal(pool); 2481 2482 TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END, 2483 "svc_queuereq_end:(%S)", "end"); 2484 } 2485 2486 /* 2487 * Reserve a service thread so that it can be detached later. 2488 * This reservation is required to make sure that when it tries to 2489 * detach itself the total number of detached threads does not exceed 2490 * pool->p_maxthreads - pool->p_redline (i.e. that we can have 2491 * up to pool->p_redline non-detached threads). 2492 * 2493 * If the thread does not detach itself later, it should cancel the 2494 * reservation before returning to svc_run(). 2495 * 2496 * - check if there is room for more reserved/detached threads 2497 * - if so, then increment the `reserved threads' count for the pool 2498 * - mark the thread as reserved (setting the flag in the clone transport 2499 * handle for this thread 2500 * - returns 1 if the reservation succeeded, 0 if it failed. 2501 */ 2502 int 2503 svc_reserve_thread(SVCXPRT *clone_xprt) 2504 { 2505 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2506 2507 /* Recursive reservations are not allowed */ 2508 ASSERT(!clone_xprt->xp_reserved); 2509 ASSERT(!clone_xprt->xp_detached); 2510 2511 /* Check pool counts if there is room for reservation */ 2512 mutex_enter(&pool->p_thread_lock); 2513 if (pool->p_reserved_threads + pool->p_detached_threads >= 2514 pool->p_maxthreads - pool->p_redline) { 2515 mutex_exit(&pool->p_thread_lock); 2516 return (0); 2517 } 2518 pool->p_reserved_threads++; 2519 mutex_exit(&pool->p_thread_lock); 2520 2521 /* Mark the thread (clone handle) as reserved */ 2522 clone_xprt->xp_reserved = TRUE; 2523 2524 return (1); 2525 } 2526 2527 /* 2528 * Cancel a reservation for a thread. 2529 * - decrement the `reserved threads' count for the pool 2530 * - clear the flag in the clone transport handle for this thread. 2531 */ 2532 void 2533 svc_unreserve_thread(SVCXPRT *clone_xprt) 2534 { 2535 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2536 2537 /* Thread must have a reservation */ 2538 ASSERT(clone_xprt->xp_reserved); 2539 ASSERT(!clone_xprt->xp_detached); 2540 2541 /* Decrement global count */ 2542 mutex_enter(&pool->p_thread_lock); 2543 pool->p_reserved_threads--; 2544 mutex_exit(&pool->p_thread_lock); 2545 2546 /* Clear reservation flag */ 2547 clone_xprt->xp_reserved = FALSE; 2548 } 2549 2550 /* 2551 * Detach a thread from its transport, so that it can block for an 2552 * extended time. Because the transport can be closed after the thread is 2553 * detached, the thread should have already sent off a reply if it was 2554 * going to send one. 2555 * 2556 * - decrement `non-detached threads' count and increment `detached threads' 2557 * counts for the transport 2558 * - decrement the `non-detached threads' and `reserved threads' 2559 * counts and increment the `detached threads' count for the pool 2560 * - release the rpcmod slot 2561 * - mark the clone (thread) as detached. 2562 * 2563 * No need to return a pointer to the thread's CPR information, since 2564 * the thread has a userland identity. 2565 * 2566 * NOTICE: a thread must not detach itself without making a prior reservation 2567 * through svc_thread_reserve(). 2568 */ 2569 callb_cpr_t * 2570 svc_detach_thread(SVCXPRT *clone_xprt) 2571 { 2572 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 2573 SVCPOOL *pool = xprt->xp_pool; 2574 2575 /* Thread must have a reservation */ 2576 ASSERT(clone_xprt->xp_reserved); 2577 ASSERT(!clone_xprt->xp_detached); 2578 2579 /* Bookkeeping for this transport */ 2580 mutex_enter(&xprt->xp_thread_lock); 2581 xprt->xp_threads--; 2582 xprt->xp_detached_threads++; 2583 mutex_exit(&xprt->xp_thread_lock); 2584 2585 /* Bookkeeping for the pool */ 2586 mutex_enter(&pool->p_thread_lock); 2587 pool->p_threads--; 2588 pool->p_reserved_threads--; 2589 pool->p_detached_threads++; 2590 mutex_exit(&pool->p_thread_lock); 2591 2592 /* Release an rpcmod slot for this request */ 2593 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2594 2595 /* Mark the clone (thread) as detached */ 2596 clone_xprt->xp_reserved = FALSE; 2597 clone_xprt->xp_detached = TRUE; 2598 2599 return (NULL); 2600 } 2601 2602 /* 2603 * This routine is responsible for extracting RDMA plugin master XPRT, 2604 * unregister from the SVCPOOL and initiate plugin specific cleanup. 2605 * It is passed a list/group of rdma transports as records which are 2606 * active in a given registered or unregistered kRPC thread pool. Its shuts 2607 * all active rdma transports in that pool. If the thread active on the trasport 2608 * happens to be last thread for that pool, it will signal the creater thread 2609 * to cleanup the pool and destroy the xprt in svc_queueclose() 2610 */ 2611 void 2612 rdma_stop(rdma_xprt_group_t rdma_xprts) 2613 { 2614 SVCMASTERXPRT *xprt; 2615 rdma_xprt_record_t *curr_rec; 2616 queue_t *q; 2617 mblk_t *mp; 2618 int i; 2619 2620 if (rdma_xprts.rtg_count == 0) 2621 return; 2622 2623 for (i = 0; i < rdma_xprts.rtg_count; i++) { 2624 curr_rec = rdma_xprts.rtg_listhead; 2625 rdma_xprts.rtg_listhead = curr_rec->rtr_next; 2626 curr_rec->rtr_next = NULL; 2627 xprt = curr_rec->rtr_xprt_ptr; 2628 q = xprt->xp_wq; 2629 svc_rdma_kstop(xprt); 2630 2631 mutex_enter(&xprt->xp_req_lock); 2632 while ((mp = xprt->xp_req_head) != NULL) { 2633 xprt->xp_req_head = mp->b_next; 2634 mp->b_next = (mblk_t *)0; 2635 if (mp) 2636 freemsg(mp); 2637 } 2638 mutex_exit(&xprt->xp_req_lock); 2639 svc_queueclose(q); 2640 #ifdef DEBUG 2641 if (rdma_check) 2642 cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n"); 2643 #endif 2644 /* 2645 * Free the rdma transport record for the expunged rdma 2646 * based master transport handle. 2647 */ 2648 kmem_free(curr_rec, sizeof (rdma_xprt_record_t)); 2649 if (!rdma_xprts.rtg_listhead) 2650 break; 2651 } 2652 } 2653