1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved. 29 */ 30 31 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 32 /* All Rights Reserved */ 33 34 /* 35 * Portions of this source code were derived from Berkeley 4.3 BSD 36 * under license from the Regents of the University of California. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * Server-side remote procedure call interface. 43 * 44 * Master transport handle (SVCMASTERXPRT). 45 * The master transport handle structure is shared among service 46 * threads processing events on the transport. Some fields in the 47 * master structure are protected by locks 48 * - xp_req_lock protects the request queue: 49 * xp_req_head, xp_req_tail 50 * - xp_thread_lock protects the thread (clone) counts 51 * xp_threads, xp_detached_threads, xp_wq 52 * Each master transport is registered to exactly one thread pool. 53 * 54 * Clone transport handle (SVCXPRT) 55 * The clone transport handle structure is a per-service-thread handle 56 * to the transport. The structure carries all the fields/buffers used 57 * for request processing. A service thread or, in other words, a clone 58 * structure, can be linked to an arbitrary master structure to process 59 * requests on this transport. The master handle keeps track of reference 60 * counts of threads (clones) linked to it. A service thread can switch 61 * to another transport by unlinking its clone handle from the current 62 * transport and linking to a new one. Switching is relatively inexpensive 63 * but it involves locking (master's xprt->xp_thread_lock). 64 * 65 * Pools. 66 * A pool represents a kernel RPC service (NFS, Lock Manager, etc.). 67 * Transports related to the service are registered to the service pool. 68 * Service threads can switch between different transports in the pool. 69 * Thus, each service has its own pool of service threads. The maximum 70 * number of threads in a pool is pool->p_maxthreads. This limit allows 71 * to restrict resource usage by the service. Some fields are protected 72 * by locks: 73 * - p_req_lock protects several counts and flags: 74 * p_reqs, p_walkers, p_asleep, p_drowsy, p_req_cv 75 * - p_thread_lock governs other thread counts: 76 * p_threads, p_detached_threads, p_reserved_threads, p_closing 77 * 78 * In addition, each pool contains a doubly-linked list of transports, 79 * an `xprt-ready' queue and a creator thread (see below). Threads in 80 * the pool share some other parameters such as stack size and 81 * polling timeout. 82 * 83 * Pools are initialized through the svc_pool_create() function called from 84 * the nfssys() system call. However, thread creation must be done by 85 * the userland agent. This is done by using SVCPOOL_WAIT and 86 * SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and 87 * svc_do_run(), respectively. Once the pool has been initialized, 88 * the userland process must set up a 'creator' thread. This thread 89 * should park itself in the kernel by calling svc_wait(). If 90 * svc_wait() returns successfully, it should fork off a new worker 91 * thread, which then calls svc_do_run() in order to get work. When 92 * that thread is complete, svc_do_run() will return, and the user 93 * program should call thr_exit(). 94 * 95 * When we try to register a new pool and there is an old pool with 96 * the same id in the doubly linked pool list (this happens when we kill 97 * and restart nfsd or lockd), then we unlink the old pool from the list 98 * and mark its state as `closing'. After that the transports can still 99 * process requests but new transports won't be registered. When all the 100 * transports and service threads associated with the pool are gone the 101 * creator thread (see below) will clean up the pool structure and exit. 102 * 103 * svc_queuereq() and svc_run(). 104 * The kernel RPC server is interrupt driven. The svc_queuereq() interrupt 105 * routine is called to deliver an RPC request. The service threads 106 * loop in svc_run(). The interrupt function queues a request on the 107 * transport's queue and it makes sure that the request is serviced. 108 * It may either wake up one of sleeping threads, or ask for a new thread 109 * to be created, or, if the previous request is just being picked up, do 110 * nothing. In the last case the service thread that is picking up the 111 * previous request will wake up or create the next thread. After a service 112 * thread processes a request and sends a reply it returns to svc_run() 113 * and svc_run() calls svc_poll() to find new input. 114 * 115 * There is no longer an "inconsistent" but "safe" optimization in the 116 * svc_queuereq() code. This "inconsistent" state was leading to 117 * inconsistencies between the actual number of requests and the value 118 * of p_reqs (the total number of requests). Because of this, hangs were 119 * occurring in svc_poll() where p_reqs was greater than one and no 120 * requests were found on the request queues. 121 * 122 * svc_poll(). 123 * In order to avoid unnecessary locking, which causes performance 124 * problems, we always look for a pending request on the current transport. 125 * If there is none we take a hint from the pool's `xprt-ready' queue. 126 * If the queue had an overflow we switch to the `drain' mode checking 127 * each transport in the pool's transport list. Once we find a 128 * master transport handle with a pending request we latch the request 129 * lock on this transport and return to svc_run(). If the request 130 * belongs to a transport different than the one the service thread is 131 * linked to we need to unlink and link again. 132 * 133 * A service thread goes asleep when there are no pending 134 * requests on the transports registered on the pool's transports. 135 * All the pool's threads sleep on the same condition variable. 136 * If a thread has been sleeping for too long period of time 137 * (by default 5 seconds) it wakes up and exits. Also when a transport 138 * is closing sleeping threads wake up to unlink from this transport. 139 * 140 * The `xprt-ready' queue. 141 * If a service thread finds no request on a transport it is currently linked 142 * to it will find another transport with a pending request. To make 143 * this search more efficient each pool has an `xprt-ready' queue. 144 * The queue is a FIFO. When the interrupt routine queues a request it also 145 * inserts a pointer to the transport into the `xprt-ready' queue. A 146 * thread looking for a transport with a pending request can pop up a 147 * transport and check for a request. The request can be already gone 148 * since it could be taken by a thread linked to that transport. In such a 149 * case we try the next hint. The `xprt-ready' queue has fixed size (by 150 * default 256 nodes). If it overflows svc_poll() has to switch to the 151 * less efficient but safe `drain' mode and walk through the pool's 152 * transport list. 153 * 154 * Both the svc_poll() loop and the `xprt-ready' queue are optimized 155 * for the peak load case that is for the situation when the queue is not 156 * empty, there are all the time few pending requests, and a service 157 * thread which has just processed a request does not go asleep but picks 158 * up immediately the next request. 159 * 160 * Thread creator. 161 * Each pool has a thread creator associated with it. The creator thread 162 * sleeps on a condition variable and waits for a signal to create a 163 * service thread. The actual thread creation is done in userland by 164 * the method described in "Pools" above. 165 * 166 * Signaling threads should turn on the `creator signaled' flag, and 167 * can avoid sending signals when the flag is on. The flag is cleared 168 * when the thread is created. 169 * 170 * When the pool is in closing state (ie it has been already unregistered 171 * from the pool list) the last thread on the last transport in the pool 172 * should turn the p_creator_exit flag on. The creator thread will 173 * clean up the pool structure and exit. 174 * 175 * Thread reservation; Detaching service threads. 176 * A service thread can detach itself to block for an extended amount 177 * of time. However, to keep the service active we need to guarantee 178 * at least pool->p_redline non-detached threads that can process incoming 179 * requests. This, the maximum number of detached and reserved threads is 180 * p->p_maxthreads - p->p_redline. A service thread should first acquire 181 * a reservation, and if the reservation was granted it can detach itself. 182 * If a reservation was granted but the thread does not detach itself 183 * it should cancel the reservation before it returns to svc_run(). 184 */ 185 186 #include <sys/param.h> 187 #include <sys/types.h> 188 #include <rpc/types.h> 189 #include <sys/socket.h> 190 #include <sys/time.h> 191 #include <sys/tiuser.h> 192 #include <sys/t_kuser.h> 193 #include <netinet/in.h> 194 #include <rpc/xdr.h> 195 #include <rpc/auth.h> 196 #include <rpc/clnt.h> 197 #include <rpc/rpc_msg.h> 198 #include <rpc/svc.h> 199 #include <sys/proc.h> 200 #include <sys/user.h> 201 #include <sys/stream.h> 202 #include <sys/strsubr.h> 203 #include <sys/tihdr.h> 204 #include <sys/debug.h> 205 #include <sys/cmn_err.h> 206 #include <sys/file.h> 207 #include <sys/systm.h> 208 #include <sys/callb.h> 209 #include <sys/vtrace.h> 210 #include <sys/zone.h> 211 #include <nfs/nfs.h> 212 #include <sys/tsol/label_macro.h> 213 214 #define RQCRED_SIZE 400 /* this size is excessive */ 215 216 /* 217 * Defines for svc_poll() 218 */ 219 #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1) /* Transport is closing */ 220 #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2) /* Timeout */ 221 #define SVC_EINTR ((SVCMASTERXPRT *)3) /* Interrupted by signal */ 222 223 /* 224 * Default stack size for service threads. 225 */ 226 #define DEFAULT_SVC_RUN_STKSIZE (0) /* default kernel stack */ 227 228 int svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE; 229 230 /* 231 * Default polling timeout for service threads. 232 * Multiplied by hz when used. 233 */ 234 #define DEFAULT_SVC_POLL_TIMEOUT (5) /* seconds */ 235 236 clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT; 237 238 /* 239 * Size of the `xprt-ready' queue. 240 */ 241 #define DEFAULT_SVC_QSIZE (256) /* qnodes */ 242 243 size_t svc_default_qsize = DEFAULT_SVC_QSIZE; 244 245 /* 246 * Default limit for the number of service threads. 247 */ 248 #define DEFAULT_SVC_MAXTHREADS (INT16_MAX) 249 250 int svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS; 251 252 /* 253 * Maximum number of requests from the same transport (in `drain' mode). 254 */ 255 #define DEFAULT_SVC_MAX_SAME_XPRT (8) 256 257 int svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT; 258 259 260 /* 261 * Default `Redline' of non-detached threads. 262 * Total number of detached and reserved threads in an RPC server 263 * thread pool is limited to pool->p_maxthreads - svc_redline. 264 */ 265 #define DEFAULT_SVC_REDLINE (1) 266 267 int svc_default_redline = DEFAULT_SVC_REDLINE; 268 269 /* 270 * A node for the `xprt-ready' queue. 271 * See below. 272 */ 273 struct __svcxprt_qnode { 274 __SVCXPRT_QNODE *q_next; 275 SVCMASTERXPRT *q_xprt; 276 }; 277 278 /* 279 * Global SVC variables (private). 280 */ 281 struct svc_globals { 282 SVCPOOL *svc_pools; 283 kmutex_t svc_plock; 284 }; 285 286 /* 287 * Debug variable to check for rdma based 288 * transport startup and cleanup. Contorlled 289 * through /etc/system. Off by default. 290 */ 291 int rdma_check = 0; 292 293 /* 294 * Authentication parameters list. 295 */ 296 static caddr_t rqcred_head; 297 static kmutex_t rqcred_lock; 298 299 /* 300 * Pointers to transport specific `rele' routines in rpcmod (set from rpcmod). 301 */ 302 void (*rpc_rele)(queue_t *, mblk_t *) = NULL; 303 void (*mir_rele)(queue_t *, mblk_t *) = NULL; 304 305 /* ARGSUSED */ 306 void 307 rpc_rdma_rele(queue_t *q, mblk_t *mp) 308 { 309 } 310 void (*rdma_rele)(queue_t *, mblk_t *) = rpc_rdma_rele; 311 312 313 /* 314 * This macro picks which `rele' routine to use, based on the transport type. 315 */ 316 #define RELE_PROC(xprt) \ 317 ((xprt)->xp_type == T_RDMA ? rdma_rele : \ 318 (((xprt)->xp_type == T_CLTS) ? rpc_rele : mir_rele)) 319 320 /* 321 * If true, then keep quiet about version mismatch. 322 * This macro is for broadcast RPC only. We have no broadcast RPC in 323 * kernel now but one may define a flag in the transport structure 324 * and redefine this macro. 325 */ 326 #define version_keepquiet(xprt) (FALSE) 327 328 /* 329 * ZSD key used to retrieve zone-specific svc globals 330 */ 331 static zone_key_t svc_zone_key; 332 333 static void svc_callout_free(SVCMASTERXPRT *); 334 static void svc_xprt_qinit(SVCPOOL *, size_t); 335 static void svc_xprt_qdestroy(SVCPOOL *); 336 static void svc_thread_creator(SVCPOOL *); 337 static void svc_creator_signal(SVCPOOL *); 338 static void svc_creator_signalexit(SVCPOOL *); 339 static void svc_pool_unregister(struct svc_globals *, SVCPOOL *); 340 static int svc_run(SVCPOOL *); 341 342 /* ARGSUSED */ 343 static void * 344 svc_zoneinit(zoneid_t zoneid) 345 { 346 struct svc_globals *svc; 347 348 svc = kmem_alloc(sizeof (*svc), KM_SLEEP); 349 mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL); 350 svc->svc_pools = NULL; 351 return (svc); 352 } 353 354 /* ARGSUSED */ 355 static void 356 svc_zoneshutdown(zoneid_t zoneid, void *arg) 357 { 358 struct svc_globals *svc = arg; 359 SVCPOOL *pool; 360 361 mutex_enter(&svc->svc_plock); 362 while ((pool = svc->svc_pools) != NULL) { 363 svc_pool_unregister(svc, pool); 364 } 365 mutex_exit(&svc->svc_plock); 366 } 367 368 /* ARGSUSED */ 369 static void 370 svc_zonefini(zoneid_t zoneid, void *arg) 371 { 372 struct svc_globals *svc = arg; 373 374 ASSERT(svc->svc_pools == NULL); 375 mutex_destroy(&svc->svc_plock); 376 kmem_free(svc, sizeof (*svc)); 377 } 378 379 /* 380 * Global SVC init routine. 381 * Initialize global generic and transport type specific structures 382 * used by the kernel RPC server side. This routine is called only 383 * once when the module is being loaded. 384 */ 385 void 386 svc_init() 387 { 388 zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown, 389 svc_zonefini); 390 svc_cots_init(); 391 svc_clts_init(); 392 } 393 394 /* 395 * Destroy the SVCPOOL structure. 396 */ 397 static void 398 svc_pool_cleanup(SVCPOOL *pool) 399 { 400 ASSERT(pool->p_threads + pool->p_detached_threads == 0); 401 ASSERT(pool->p_lcount == 0); 402 ASSERT(pool->p_closing); 403 404 /* 405 * Call the user supplied shutdown function. This is done 406 * here so the user of the pool will be able to cleanup 407 * service related resources. 408 */ 409 if (pool->p_shutdown != NULL) 410 (pool->p_shutdown)(); 411 412 /* Destroy `xprt-ready' queue */ 413 svc_xprt_qdestroy(pool); 414 415 /* Destroy transport list */ 416 rw_destroy(&pool->p_lrwlock); 417 418 /* Destroy locks and condition variables */ 419 mutex_destroy(&pool->p_thread_lock); 420 mutex_destroy(&pool->p_req_lock); 421 cv_destroy(&pool->p_req_cv); 422 423 /* Destroy creator's locks and condition variables */ 424 mutex_destroy(&pool->p_creator_lock); 425 cv_destroy(&pool->p_creator_cv); 426 mutex_destroy(&pool->p_user_lock); 427 cv_destroy(&pool->p_user_cv); 428 429 /* Free pool structure */ 430 kmem_free(pool, sizeof (SVCPOOL)); 431 } 432 433 /* 434 * If all the transports and service threads are already gone 435 * signal the creator thread to clean up and exit. 436 */ 437 static bool_t 438 svc_pool_tryexit(SVCPOOL *pool) 439 { 440 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 441 ASSERT(pool->p_closing); 442 443 if (pool->p_threads + pool->p_detached_threads == 0) { 444 rw_enter(&pool->p_lrwlock, RW_READER); 445 if (pool->p_lcount == 0) { 446 /* 447 * Release the locks before sending a signal. 448 */ 449 rw_exit(&pool->p_lrwlock); 450 mutex_exit(&pool->p_thread_lock); 451 452 /* 453 * Notify the creator thread to clean up and exit 454 * 455 * NOTICE: No references to the pool beyond this point! 456 * The pool is being destroyed. 457 */ 458 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 459 svc_creator_signalexit(pool); 460 461 return (TRUE); 462 } 463 rw_exit(&pool->p_lrwlock); 464 } 465 466 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 467 return (FALSE); 468 } 469 470 /* 471 * Find a pool with a given id. 472 */ 473 static SVCPOOL * 474 svc_pool_find(struct svc_globals *svc, int id) 475 { 476 SVCPOOL *pool; 477 478 ASSERT(MUTEX_HELD(&svc->svc_plock)); 479 480 /* 481 * Search the list for a pool with a matching id 482 * and register the transport handle with that pool. 483 */ 484 for (pool = svc->svc_pools; pool; pool = pool->p_next) 485 if (pool->p_id == id) 486 return (pool); 487 488 return (NULL); 489 } 490 491 /* 492 * PSARC 2003/523 Contract Private Interface 493 * svc_do_run 494 * Changes must be reviewed by Solaris File Sharing 495 * Changes must be communicated to contract-2003-523@sun.com 496 */ 497 int 498 svc_do_run(int id) 499 { 500 SVCPOOL *pool; 501 int err = 0; 502 struct svc_globals *svc; 503 504 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 505 mutex_enter(&svc->svc_plock); 506 507 pool = svc_pool_find(svc, id); 508 509 mutex_exit(&svc->svc_plock); 510 511 if (pool == NULL) 512 return (ENOENT); 513 514 /* 515 * Increment counter of pool threads now 516 * that a thread has been created. 517 */ 518 mutex_enter(&pool->p_thread_lock); 519 pool->p_threads++; 520 mutex_exit(&pool->p_thread_lock); 521 522 /* Give work to the new thread. */ 523 err = svc_run(pool); 524 525 return (err); 526 } 527 528 /* 529 * Unregister a pool from the pool list. 530 * Set the closing state. If all the transports and service threads 531 * are already gone signal the creator thread to clean up and exit. 532 */ 533 static void 534 svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool) 535 { 536 SVCPOOL *next = pool->p_next; 537 SVCPOOL *prev = pool->p_prev; 538 539 ASSERT(MUTEX_HELD(&svc->svc_plock)); 540 541 /* Remove from the list */ 542 if (pool == svc->svc_pools) 543 svc->svc_pools = next; 544 if (next) 545 next->p_prev = prev; 546 if (prev) 547 prev->p_next = next; 548 pool->p_next = pool->p_prev = NULL; 549 550 /* 551 * Offline the pool. Mark the pool as closing. 552 * If there are no transports in this pool notify 553 * the creator thread to clean it up and exit. 554 */ 555 mutex_enter(&pool->p_thread_lock); 556 if (pool->p_offline != NULL) 557 (pool->p_offline)(); 558 pool->p_closing = TRUE; 559 if (svc_pool_tryexit(pool)) 560 return; 561 mutex_exit(&pool->p_thread_lock); 562 } 563 564 /* 565 * Register a pool with a given id in the global doubly linked pool list. 566 * - if there is a pool with the same id in the list then unregister it 567 * - insert the new pool into the list. 568 */ 569 static void 570 svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id) 571 { 572 SVCPOOL *old_pool; 573 574 /* 575 * If there is a pool with the same id then remove it from 576 * the list and mark the pool as closing. 577 */ 578 mutex_enter(&svc->svc_plock); 579 580 if (old_pool = svc_pool_find(svc, id)) 581 svc_pool_unregister(svc, old_pool); 582 583 /* Insert into the doubly linked list */ 584 pool->p_id = id; 585 pool->p_next = svc->svc_pools; 586 pool->p_prev = NULL; 587 if (svc->svc_pools) 588 svc->svc_pools->p_prev = pool; 589 svc->svc_pools = pool; 590 591 mutex_exit(&svc->svc_plock); 592 } 593 594 /* 595 * Initialize a newly created pool structure 596 */ 597 static int 598 svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline, 599 uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt) 600 { 601 klwp_t *lwp = ttolwp(curthread); 602 603 ASSERT(pool); 604 605 if (maxthreads == 0) 606 maxthreads = svc_default_maxthreads; 607 if (redline == 0) 608 redline = svc_default_redline; 609 if (qsize == 0) 610 qsize = svc_default_qsize; 611 if (timeout == 0) 612 timeout = svc_default_timeout; 613 if (stksize == 0) 614 stksize = svc_default_stksize; 615 if (max_same_xprt == 0) 616 max_same_xprt = svc_default_max_same_xprt; 617 618 if (maxthreads < redline) 619 return (EINVAL); 620 621 /* Allocate and initialize the `xprt-ready' queue */ 622 svc_xprt_qinit(pool, qsize); 623 624 /* Initialize doubly-linked xprt list */ 625 rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL); 626 627 /* 628 * Setting lwp_childstksz on the current lwp so that 629 * descendants of this lwp get the modified stacksize, if 630 * it is defined. It is important that either this lwp or 631 * one of its descendants do the actual servicepool thread 632 * creation to maintain the stacksize inheritance. 633 */ 634 if (lwp != NULL) 635 lwp->lwp_childstksz = stksize; 636 637 /* Initialize thread limits, locks and condition variables */ 638 pool->p_maxthreads = maxthreads; 639 pool->p_redline = redline; 640 pool->p_timeout = timeout * hz; 641 pool->p_stksize = stksize; 642 pool->p_max_same_xprt = max_same_xprt; 643 mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL); 644 mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL); 645 cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL); 646 647 /* Initialize userland creator */ 648 pool->p_user_exit = FALSE; 649 pool->p_signal_create_thread = FALSE; 650 pool->p_user_waiting = FALSE; 651 mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL); 652 cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL); 653 654 /* Initialize the creator and start the creator thread */ 655 pool->p_creator_exit = FALSE; 656 mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL); 657 cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL); 658 659 (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator, 660 pool, 0, minclsyspri); 661 662 return (0); 663 } 664 665 /* 666 * PSARC 2003/523 Contract Private Interface 667 * svc_pool_create 668 * Changes must be reviewed by Solaris File Sharing 669 * Changes must be communicated to contract-2003-523@sun.com 670 * 671 * Create an kernel RPC server-side thread/transport pool. 672 * 673 * This is public interface for creation of a server RPC thread pool 674 * for a given service provider. Transports registered with the pool's id 675 * will be served by a pool's threads. This function is called from the 676 * nfssys() system call. 677 */ 678 int 679 svc_pool_create(struct svcpool_args *args) 680 { 681 SVCPOOL *pool; 682 int error; 683 struct svc_globals *svc; 684 685 /* 686 * Caller should check credentials in a way appropriate 687 * in the context of the call. 688 */ 689 690 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 691 /* Allocate a new pool */ 692 pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP); 693 694 /* 695 * Initialize the pool structure and create a creator thread. 696 */ 697 error = svc_pool_init(pool, args->maxthreads, args->redline, 698 args->qsize, args->timeout, args->stksize, args->max_same_xprt); 699 700 if (error) { 701 kmem_free(pool, sizeof (SVCPOOL)); 702 return (error); 703 } 704 705 /* Register the pool with the global pool list */ 706 svc_pool_register(svc, pool, args->id); 707 708 return (0); 709 } 710 711 int 712 svc_pool_control(int id, int cmd, void *arg) 713 { 714 SVCPOOL *pool; 715 struct svc_globals *svc; 716 717 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 718 719 switch (cmd) { 720 case SVCPSET_SHUTDOWN_PROC: 721 /* 722 * Search the list for a pool with a matching id 723 * and register the transport handle with that pool. 724 */ 725 mutex_enter(&svc->svc_plock); 726 727 if ((pool = svc_pool_find(svc, id)) == NULL) { 728 mutex_exit(&svc->svc_plock); 729 return (ENOENT); 730 } 731 /* 732 * Grab the transport list lock before releasing the 733 * pool list lock 734 */ 735 rw_enter(&pool->p_lrwlock, RW_WRITER); 736 mutex_exit(&svc->svc_plock); 737 738 pool->p_shutdown = *((void (*)())arg); 739 740 rw_exit(&pool->p_lrwlock); 741 742 return (0); 743 case SVCPSET_UNREGISTER_PROC: 744 /* 745 * Search the list for a pool with a matching id 746 * and register the unregister callback handle with that pool. 747 */ 748 mutex_enter(&svc->svc_plock); 749 750 if ((pool = svc_pool_find(svc, id)) == NULL) { 751 mutex_exit(&svc->svc_plock); 752 return (ENOENT); 753 } 754 /* 755 * Grab the transport list lock before releasing the 756 * pool list lock 757 */ 758 rw_enter(&pool->p_lrwlock, RW_WRITER); 759 mutex_exit(&svc->svc_plock); 760 761 pool->p_offline = *((void (*)())arg); 762 763 rw_exit(&pool->p_lrwlock); 764 765 return (0); 766 default: 767 return (EINVAL); 768 } 769 } 770 771 /* 772 * Pool's transport list manipulation routines. 773 * - svc_xprt_register() 774 * - svc_xprt_unregister() 775 * 776 * svc_xprt_register() is called from svc_tli_kcreate() to 777 * insert a new master transport handle into the doubly linked 778 * list of server transport handles (one list per pool). 779 * 780 * The list is used by svc_poll(), when it operates in `drain' 781 * mode, to search for a next transport with a pending request. 782 */ 783 784 int 785 svc_xprt_register(SVCMASTERXPRT *xprt, int id) 786 { 787 SVCMASTERXPRT *prev, *next; 788 SVCPOOL *pool; 789 struct svc_globals *svc; 790 791 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 792 /* 793 * Search the list for a pool with a matching id 794 * and register the transport handle with that pool. 795 */ 796 mutex_enter(&svc->svc_plock); 797 798 if ((pool = svc_pool_find(svc, id)) == NULL) { 799 mutex_exit(&svc->svc_plock); 800 return (ENOENT); 801 } 802 803 /* Grab the transport list lock before releasing the pool list lock */ 804 rw_enter(&pool->p_lrwlock, RW_WRITER); 805 mutex_exit(&svc->svc_plock); 806 807 /* Don't register new transports when the pool is in closing state */ 808 if (pool->p_closing) { 809 rw_exit(&pool->p_lrwlock); 810 return (EBUSY); 811 } 812 813 /* 814 * Initialize xp_pool to point to the pool. 815 * We don't want to go through the pool list every time. 816 */ 817 xprt->xp_pool = pool; 818 819 /* 820 * Insert a transport handle into the list. 821 * The list head points to the most recently inserted transport. 822 */ 823 if (pool->p_lhead == NULL) 824 pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt; 825 else { 826 next = pool->p_lhead; 827 prev = pool->p_lhead->xp_prev; 828 829 xprt->xp_next = next; 830 xprt->xp_prev = prev; 831 832 pool->p_lhead = prev->xp_next = next->xp_prev = xprt; 833 } 834 835 /* Increment the transports count */ 836 pool->p_lcount++; 837 838 rw_exit(&pool->p_lrwlock); 839 return (0); 840 } 841 842 /* 843 * Called from svc_xprt_cleanup() to remove a master transport handle 844 * from the pool's list of server transports (when a transport is 845 * being destroyed). 846 */ 847 void 848 svc_xprt_unregister(SVCMASTERXPRT *xprt) 849 { 850 SVCPOOL *pool = xprt->xp_pool; 851 852 /* 853 * Unlink xprt from the list. 854 * If the list head points to this xprt then move it 855 * to the next xprt or reset to NULL if this is the last 856 * xprt in the list. 857 */ 858 rw_enter(&pool->p_lrwlock, RW_WRITER); 859 860 if (xprt == xprt->xp_next) 861 pool->p_lhead = NULL; 862 else { 863 SVCMASTERXPRT *next = xprt->xp_next; 864 SVCMASTERXPRT *prev = xprt->xp_prev; 865 866 next->xp_prev = prev; 867 prev->xp_next = next; 868 869 if (pool->p_lhead == xprt) 870 pool->p_lhead = next; 871 } 872 873 xprt->xp_next = xprt->xp_prev = NULL; 874 875 /* Decrement list count */ 876 pool->p_lcount--; 877 878 rw_exit(&pool->p_lrwlock); 879 } 880 881 static void 882 svc_xprt_qdestroy(SVCPOOL *pool) 883 { 884 mutex_destroy(&pool->p_qend_lock); 885 kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE)); 886 } 887 888 /* 889 * Initialize an `xprt-ready' queue for a given pool. 890 */ 891 static void 892 svc_xprt_qinit(SVCPOOL *pool, size_t qsize) 893 { 894 int i; 895 896 pool->p_qsize = qsize; 897 pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE), 898 KM_SLEEP); 899 900 for (i = 0; i < pool->p_qsize - 1; i++) 901 pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]); 902 903 pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]); 904 pool->p_qtop = &(pool->p_qbody[0]); 905 pool->p_qend = &(pool->p_qbody[0]); 906 907 mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL); 908 } 909 910 /* 911 * Called from the svc_queuereq() interrupt routine to queue 912 * a hint for svc_poll() which transport has a pending request. 913 * - insert a pointer to xprt into the xprt-ready queue (FIFO) 914 * - if the xprt-ready queue is full turn the overflow flag on. 915 * 916 * NOTICE: pool->p_qtop is protected by the the pool's request lock 917 * and the caller (svc_queuereq()) must hold the lock. 918 */ 919 static void 920 svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt) 921 { 922 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 923 924 /* If the overflow flag is there is nothing we can do */ 925 if (pool->p_qoverflow) 926 return; 927 928 /* If the queue is full turn the overflow flag on and exit */ 929 if (pool->p_qtop->q_next == pool->p_qend) { 930 mutex_enter(&pool->p_qend_lock); 931 if (pool->p_qtop->q_next == pool->p_qend) { 932 pool->p_qoverflow = TRUE; 933 mutex_exit(&pool->p_qend_lock); 934 return; 935 } 936 mutex_exit(&pool->p_qend_lock); 937 } 938 939 /* Insert a hint and move pool->p_qtop */ 940 pool->p_qtop->q_xprt = xprt; 941 pool->p_qtop = pool->p_qtop->q_next; 942 } 943 944 /* 945 * Called from svc_poll() to get a hint which transport has a 946 * pending request. Returns a pointer to a transport or NULL if the 947 * `xprt-ready' queue is empty. 948 * 949 * Since we do not acquire the pool's request lock while checking if 950 * the queue is empty we may miss a request that is just being delivered. 951 * However this is ok since svc_poll() will retry again until the 952 * count indicates that there are pending requests for this pool. 953 */ 954 static SVCMASTERXPRT * 955 svc_xprt_qget(SVCPOOL *pool) 956 { 957 SVCMASTERXPRT *xprt; 958 959 mutex_enter(&pool->p_qend_lock); 960 do { 961 /* 962 * If the queue is empty return NULL. 963 * Since we do not acquire the pool's request lock which 964 * protects pool->p_qtop this is not exact check. However, 965 * this is safe - if we miss a request here svc_poll() 966 * will retry again. 967 */ 968 if (pool->p_qend == pool->p_qtop) { 969 mutex_exit(&pool->p_qend_lock); 970 return (NULL); 971 } 972 973 /* Get a hint and move pool->p_qend */ 974 xprt = pool->p_qend->q_xprt; 975 pool->p_qend = pool->p_qend->q_next; 976 977 /* Skip fields deleted by svc_xprt_qdelete() */ 978 } while (xprt == NULL); 979 mutex_exit(&pool->p_qend_lock); 980 981 return (xprt); 982 } 983 984 /* 985 * Delete all the references to a transport handle that 986 * is being destroyed from the xprt-ready queue. 987 * Deleted pointers are replaced with NULLs. 988 */ 989 static void 990 svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt) 991 { 992 __SVCXPRT_QNODE *q = pool->p_qend; 993 __SVCXPRT_QNODE *qtop = pool->p_qtop; 994 995 /* 996 * Delete all the references to xprt between the current 997 * position of pool->p_qend and current pool->p_qtop. 998 */ 999 for (;;) { 1000 if (q->q_xprt == xprt) 1001 q->q_xprt = NULL; 1002 if (q == qtop) 1003 return; 1004 q = q->q_next; 1005 } 1006 } 1007 1008 /* 1009 * Destructor for a master server transport handle. 1010 * - if there are no more non-detached threads linked to this transport 1011 * then, if requested, call xp_closeproc (we don't wait for detached 1012 * threads linked to this transport to complete). 1013 * - if there are no more threads linked to this 1014 * transport then 1015 * a) remove references to this transport from the xprt-ready queue 1016 * b) remove a reference to this transport from the pool's transport list 1017 * c) call a transport specific `destroy' function 1018 * d) cancel remaining thread reservations. 1019 * 1020 * NOTICE: Caller must hold the transport's thread lock. 1021 */ 1022 static void 1023 svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached) 1024 { 1025 ASSERT(MUTEX_HELD(&xprt->xp_thread_lock)); 1026 ASSERT(xprt->xp_wq == NULL); 1027 1028 /* 1029 * If called from the last non-detached thread 1030 * it should call the closeproc on this transport. 1031 */ 1032 if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) { 1033 (*(xprt->xp_closeproc)) (xprt); 1034 } 1035 1036 if (xprt->xp_threads + xprt->xp_detached_threads > 0) 1037 mutex_exit(&xprt->xp_thread_lock); 1038 else { 1039 /* Remove references to xprt from the `xprt-ready' queue */ 1040 svc_xprt_qdelete(xprt->xp_pool, xprt); 1041 1042 /* Unregister xprt from the pool's transport list */ 1043 svc_xprt_unregister(xprt); 1044 svc_callout_free(xprt); 1045 SVC_DESTROY(xprt); 1046 } 1047 } 1048 1049 /* 1050 * Find a dispatch routine for a given prog/vers pair. 1051 * This function is called from svc_getreq() to search the callout 1052 * table for an entry with a matching RPC program number `prog' 1053 * and a version range that covers `vers'. 1054 * - if it finds a matching entry it returns pointer to the dispatch routine 1055 * - otherwise it returns NULL and, if `minp' or `maxp' are not NULL, 1056 * fills them with, respectively, lowest version and highest version 1057 * supported for the program `prog' 1058 */ 1059 static SVC_DISPATCH * 1060 svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers, 1061 rpcvers_t *vers_min, rpcvers_t *vers_max) 1062 { 1063 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1064 int i; 1065 1066 *vers_min = ~(rpcvers_t)0; 1067 *vers_max = 0; 1068 1069 for (i = 0; i < sct->sct_size; i++) { 1070 SVC_CALLOUT *sc = &sct->sct_sc[i]; 1071 1072 if (prog == sc->sc_prog) { 1073 if (vers >= sc->sc_versmin && vers <= sc->sc_versmax) 1074 return (sc->sc_dispatch); 1075 1076 if (*vers_max < sc->sc_versmax) 1077 *vers_max = sc->sc_versmax; 1078 if (*vers_min > sc->sc_versmin) 1079 *vers_min = sc->sc_versmin; 1080 } 1081 } 1082 1083 return (NULL); 1084 } 1085 1086 /* 1087 * Optionally free callout table allocated for this transport by 1088 * the service provider. 1089 */ 1090 static void 1091 svc_callout_free(SVCMASTERXPRT *xprt) 1092 { 1093 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1094 1095 if (sct->sct_free) { 1096 kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT)); 1097 kmem_free(sct, sizeof (SVC_CALLOUT_TABLE)); 1098 } 1099 } 1100 1101 /* 1102 * Send a reply to an RPC request 1103 * 1104 * PSARC 2003/523 Contract Private Interface 1105 * svc_sendreply 1106 * Changes must be reviewed by Solaris File Sharing 1107 * Changes must be communicated to contract-2003-523@sun.com 1108 */ 1109 bool_t 1110 svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results, 1111 const caddr_t xdr_location) 1112 { 1113 struct rpc_msg rply; 1114 1115 rply.rm_direction = REPLY; 1116 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1117 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1118 rply.acpted_rply.ar_stat = SUCCESS; 1119 rply.acpted_rply.ar_results.where = xdr_location; 1120 rply.acpted_rply.ar_results.proc = xdr_results; 1121 1122 return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply)); 1123 } 1124 1125 /* 1126 * No procedure error reply 1127 * 1128 * PSARC 2003/523 Contract Private Interface 1129 * svcerr_noproc 1130 * Changes must be reviewed by Solaris File Sharing 1131 * Changes must be communicated to contract-2003-523@sun.com 1132 */ 1133 void 1134 svcerr_noproc(const SVCXPRT *clone_xprt) 1135 { 1136 struct rpc_msg rply; 1137 1138 rply.rm_direction = REPLY; 1139 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1140 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1141 rply.acpted_rply.ar_stat = PROC_UNAVAIL; 1142 SVC_FREERES((SVCXPRT *)clone_xprt); 1143 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1144 } 1145 1146 /* 1147 * Can't decode arguments error reply 1148 * 1149 * PSARC 2003/523 Contract Private Interface 1150 * svcerr_decode 1151 * Changes must be reviewed by Solaris File Sharing 1152 * Changes must be communicated to contract-2003-523@sun.com 1153 */ 1154 void 1155 svcerr_decode(const SVCXPRT *clone_xprt) 1156 { 1157 struct rpc_msg rply; 1158 1159 rply.rm_direction = REPLY; 1160 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1161 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1162 rply.acpted_rply.ar_stat = GARBAGE_ARGS; 1163 SVC_FREERES((SVCXPRT *)clone_xprt); 1164 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1165 } 1166 1167 /* 1168 * Some system error 1169 */ 1170 void 1171 svcerr_systemerr(const SVCXPRT *clone_xprt) 1172 { 1173 struct rpc_msg rply; 1174 1175 rply.rm_direction = REPLY; 1176 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1177 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1178 rply.acpted_rply.ar_stat = SYSTEM_ERR; 1179 SVC_FREERES((SVCXPRT *)clone_xprt); 1180 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1181 } 1182 1183 /* 1184 * Authentication error reply 1185 */ 1186 void 1187 svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why) 1188 { 1189 struct rpc_msg rply; 1190 1191 rply.rm_direction = REPLY; 1192 rply.rm_reply.rp_stat = MSG_DENIED; 1193 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1194 rply.rjcted_rply.rj_why = why; 1195 SVC_FREERES((SVCXPRT *)clone_xprt); 1196 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1197 } 1198 1199 /* 1200 * Authentication too weak error reply 1201 */ 1202 void 1203 svcerr_weakauth(const SVCXPRT *clone_xprt) 1204 { 1205 svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK); 1206 } 1207 1208 /* 1209 * Authentication error; bad credentials 1210 */ 1211 void 1212 svcerr_badcred(const SVCXPRT *clone_xprt) 1213 { 1214 struct rpc_msg rply; 1215 1216 rply.rm_direction = REPLY; 1217 rply.rm_reply.rp_stat = MSG_DENIED; 1218 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1219 rply.rjcted_rply.rj_why = AUTH_BADCRED; 1220 SVC_FREERES((SVCXPRT *)clone_xprt); 1221 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1222 } 1223 1224 /* 1225 * Program unavailable error reply 1226 * 1227 * PSARC 2003/523 Contract Private Interface 1228 * svcerr_noprog 1229 * Changes must be reviewed by Solaris File Sharing 1230 * Changes must be communicated to contract-2003-523@sun.com 1231 */ 1232 void 1233 svcerr_noprog(const SVCXPRT *clone_xprt) 1234 { 1235 struct rpc_msg rply; 1236 1237 rply.rm_direction = REPLY; 1238 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1239 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1240 rply.acpted_rply.ar_stat = PROG_UNAVAIL; 1241 SVC_FREERES((SVCXPRT *)clone_xprt); 1242 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1243 } 1244 1245 /* 1246 * Program version mismatch error reply 1247 * 1248 * PSARC 2003/523 Contract Private Interface 1249 * svcerr_progvers 1250 * Changes must be reviewed by Solaris File Sharing 1251 * Changes must be communicated to contract-2003-523@sun.com 1252 */ 1253 void 1254 svcerr_progvers(const SVCXPRT *clone_xprt, 1255 const rpcvers_t low_vers, const rpcvers_t high_vers) 1256 { 1257 struct rpc_msg rply; 1258 1259 rply.rm_direction = REPLY; 1260 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1261 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1262 rply.acpted_rply.ar_stat = PROG_MISMATCH; 1263 rply.acpted_rply.ar_vers.low = low_vers; 1264 rply.acpted_rply.ar_vers.high = high_vers; 1265 SVC_FREERES((SVCXPRT *)clone_xprt); 1266 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1267 } 1268 1269 /* 1270 * Get server side input from some transport. 1271 * 1272 * Statement of authentication parameters management: 1273 * This function owns and manages all authentication parameters, specifically 1274 * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and 1275 * the "cooked" credentials (rqst->rq_clntcred). 1276 * However, this function does not know the structure of the cooked 1277 * credentials, so it make the following assumptions: 1278 * a) the structure is contiguous (no pointers), and 1279 * b) the cred structure size does not exceed RQCRED_SIZE bytes. 1280 * In all events, all three parameters are freed upon exit from this routine. 1281 * The storage is trivially managed on the call stack in user land, but 1282 * is malloced in kernel land. 1283 * 1284 * Note: the xprt's xp_svc_lock is not held while the service's dispatch 1285 * routine is running. If we decide to implement svc_unregister(), we'll 1286 * need to decide whether it's okay for a thread to unregister a service 1287 * while a request is being processed. If we decide that this is a 1288 * problem, we can probably use some sort of reference counting scheme to 1289 * keep the callout entry from going away until the request has completed. 1290 */ 1291 static void 1292 svc_getreq( 1293 SVCXPRT *clone_xprt, /* clone transport handle */ 1294 mblk_t *mp) 1295 { 1296 struct rpc_msg msg; 1297 struct svc_req r; 1298 char *cred_area; /* too big to allocate on call stack */ 1299 1300 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START, 1301 "svc_getreq_start:"); 1302 1303 ASSERT(clone_xprt->xp_master != NULL); 1304 ASSERT(!is_system_labeled() || DB_CRED(mp) != NULL || 1305 mp->b_datap->db_type != M_DATA); 1306 1307 /* 1308 * Firstly, allocate the authentication parameters' storage 1309 */ 1310 mutex_enter(&rqcred_lock); 1311 if (rqcred_head) { 1312 cred_area = rqcred_head; 1313 1314 /* LINTED pointer alignment */ 1315 rqcred_head = *(caddr_t *)rqcred_head; 1316 mutex_exit(&rqcred_lock); 1317 } else { 1318 mutex_exit(&rqcred_lock); 1319 cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE, 1320 KM_SLEEP); 1321 } 1322 msg.rm_call.cb_cred.oa_base = cred_area; 1323 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1324 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1325 1326 /* 1327 * underlying transport recv routine may modify mblk data 1328 * and make it difficult to extract label afterwards. So 1329 * get the label from the raw mblk data now. 1330 */ 1331 if (is_system_labeled()) { 1332 mblk_t *lmp; 1333 1334 r.rq_label = kmem_alloc(sizeof (bslabel_t), KM_SLEEP); 1335 if (DB_CRED(mp) != NULL) 1336 lmp = mp; 1337 else { 1338 ASSERT(mp->b_cont != NULL); 1339 lmp = mp->b_cont; 1340 ASSERT(DB_CRED(lmp) != NULL); 1341 } 1342 bcopy(label2bslabel(crgetlabel(DB_CRED(lmp))), r.rq_label, 1343 sizeof (bslabel_t)); 1344 } else { 1345 r.rq_label = NULL; 1346 } 1347 1348 /* 1349 * Now receive a message from the transport. 1350 */ 1351 if (SVC_RECV(clone_xprt, mp, &msg)) { 1352 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1353 rpcvers_t vers_min; 1354 rpcvers_t vers_max; 1355 bool_t no_dispatch; 1356 enum auth_stat why; 1357 1358 /* 1359 * Find the registered program and call its 1360 * dispatch routine. 1361 */ 1362 r.rq_xprt = clone_xprt; 1363 r.rq_prog = msg.rm_call.cb_prog; 1364 r.rq_vers = msg.rm_call.cb_vers; 1365 r.rq_proc = msg.rm_call.cb_proc; 1366 r.rq_cred = msg.rm_call.cb_cred; 1367 1368 /* 1369 * First authenticate the message. 1370 */ 1371 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START, 1372 "svc_getreq_auth_start:"); 1373 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) { 1374 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1375 "svc_getreq_auth_end:(%S)", "failed"); 1376 svcerr_auth(clone_xprt, why); 1377 /* 1378 * Free the arguments. 1379 */ 1380 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1381 } else if (no_dispatch) { 1382 /* 1383 * XXX - when bug id 4053736 is done, remove 1384 * the SVC_FREEARGS() call. 1385 */ 1386 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1387 } else { 1388 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1389 "svc_getreq_auth_end:(%S)", "good"); 1390 1391 dispatchroutine = svc_callout_find(clone_xprt, 1392 r.rq_prog, r.rq_vers, &vers_min, &vers_max); 1393 1394 if (dispatchroutine) { 1395 (*dispatchroutine) (&r, clone_xprt); 1396 } else { 1397 /* 1398 * If we got here, the program or version 1399 * is not served ... 1400 */ 1401 if (vers_max == 0 || 1402 version_keepquiet(clone_xprt)) 1403 svcerr_noprog(clone_xprt); 1404 else 1405 svcerr_progvers(clone_xprt, vers_min, 1406 vers_max); 1407 1408 /* 1409 * Free the arguments. For successful calls 1410 * this is done by the dispatch routine. 1411 */ 1412 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1413 /* Fall through to ... */ 1414 } 1415 /* 1416 * Call cleanup procedure for RPCSEC_GSS. 1417 * This is a hack since there is currently no 1418 * op, such as SVC_CLEANAUTH. rpc_gss_cleanup 1419 * should only be called for a non null proc. 1420 * Null procs in RPC GSS are overloaded to 1421 * provide context setup and control. The main 1422 * purpose of rpc_gss_cleanup is to decrement the 1423 * reference count associated with the cached 1424 * GSS security context. We should never get here 1425 * for an RPCSEC_GSS null proc since *no_dispatch 1426 * would have been set to true from sec_svc_msg above. 1427 */ 1428 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1429 rpc_gss_cleanup(clone_xprt); 1430 } 1431 } 1432 1433 if (r.rq_label != NULL) 1434 kmem_free(r.rq_label, sizeof (bslabel_t)); 1435 1436 /* 1437 * Free authentication parameters' storage 1438 */ 1439 mutex_enter(&rqcred_lock); 1440 /* LINTED pointer alignment */ 1441 *(caddr_t *)cred_area = rqcred_head; 1442 rqcred_head = cred_area; 1443 mutex_exit(&rqcred_lock); 1444 } 1445 1446 /* 1447 * Allocate new clone transport handle. 1448 */ 1449 static SVCXPRT * 1450 svc_clone_init(void) 1451 { 1452 SVCXPRT *clone_xprt; 1453 1454 clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP); 1455 clone_xprt->xp_cred = crget(); 1456 return (clone_xprt); 1457 } 1458 1459 /* 1460 * Free memory allocated by svc_clone_init. 1461 */ 1462 static void 1463 svc_clone_free(SVCXPRT *clone_xprt) 1464 { 1465 /* Fre credentials from crget() */ 1466 if (clone_xprt->xp_cred) 1467 crfree(clone_xprt->xp_cred); 1468 kmem_free(clone_xprt, sizeof (SVCXPRT)); 1469 } 1470 1471 /* 1472 * Link a per-thread clone transport handle to a master 1473 * - increment a thread reference count on the master 1474 * - copy some of the master's fields to the clone 1475 * - call a transport specific clone routine. 1476 */ 1477 static void 1478 svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1479 { 1480 cred_t *cred = clone_xprt->xp_cred; 1481 1482 ASSERT(cred); 1483 1484 /* 1485 * Bump up master's thread count. 1486 * Linking a per-thread clone transport handle to a master 1487 * associates a service thread with the master. 1488 */ 1489 mutex_enter(&xprt->xp_thread_lock); 1490 xprt->xp_threads++; 1491 mutex_exit(&xprt->xp_thread_lock); 1492 1493 /* Clear everything */ 1494 bzero(clone_xprt, sizeof (SVCXPRT)); 1495 1496 /* Set pointer to the master transport stucture */ 1497 clone_xprt->xp_master = xprt; 1498 1499 /* Structure copy of all the common fields */ 1500 clone_xprt->xp_xpc = xprt->xp_xpc; 1501 1502 /* Restore per-thread fields (xp_cred) */ 1503 clone_xprt->xp_cred = cred; 1504 1505 1506 /* 1507 * NOTICE: There is no transport-type specific code now. 1508 * If you want to add a transport-type specific cloning code 1509 * add one more operation (e.g. xp_clone()) to svc_ops, 1510 * implement it for each transport type, and call it here 1511 * through an appropriate macro (e.g. SVC_CLONE()). 1512 */ 1513 } 1514 1515 /* 1516 * Unlink a non-detached clone transport handle from a master 1517 * - decrement a thread reference count on the master 1518 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1519 * if this is the last non-detached/absolute thread on this transport 1520 * then it will close/destroy the transport 1521 * - call transport specific function to destroy the clone handle 1522 * - clear xp_master to avoid recursion. 1523 */ 1524 static void 1525 svc_clone_unlink(SVCXPRT *clone_xprt) 1526 { 1527 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1528 1529 /* This cannot be a detached thread */ 1530 ASSERT(!clone_xprt->xp_detached); 1531 ASSERT(xprt->xp_threads > 0); 1532 1533 /* Decrement a reference count on the transport */ 1534 mutex_enter(&xprt->xp_thread_lock); 1535 xprt->xp_threads--; 1536 1537 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1538 if (xprt->xp_wq) 1539 mutex_exit(&xprt->xp_thread_lock); 1540 else 1541 svc_xprt_cleanup(xprt, FALSE); 1542 1543 /* Call a transport specific clone `destroy' function */ 1544 SVC_CLONE_DESTROY(clone_xprt); 1545 1546 /* Clear xp_master */ 1547 clone_xprt->xp_master = NULL; 1548 } 1549 1550 /* 1551 * Unlink a detached clone transport handle from a master 1552 * - decrement the thread count on the master 1553 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1554 * if this is the last thread on this transport then it will destroy 1555 * the transport. 1556 * - call a transport specific function to destroy the clone handle 1557 * - clear xp_master to avoid recursion. 1558 */ 1559 static void 1560 svc_clone_unlinkdetached(SVCXPRT *clone_xprt) 1561 { 1562 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1563 1564 /* This must be a detached thread */ 1565 ASSERT(clone_xprt->xp_detached); 1566 ASSERT(xprt->xp_detached_threads > 0); 1567 ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0); 1568 1569 /* Grab xprt->xp_thread_lock and decrement link counts */ 1570 mutex_enter(&xprt->xp_thread_lock); 1571 xprt->xp_detached_threads--; 1572 1573 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1574 if (xprt->xp_wq) 1575 mutex_exit(&xprt->xp_thread_lock); 1576 else 1577 svc_xprt_cleanup(xprt, TRUE); 1578 1579 /* Call transport specific clone `destroy' function */ 1580 SVC_CLONE_DESTROY(clone_xprt); 1581 1582 /* Clear xp_master */ 1583 clone_xprt->xp_master = NULL; 1584 } 1585 1586 /* 1587 * Try to exit a non-detached service thread 1588 * - check if there are enough threads left 1589 * - if this thread (ie its clone transport handle) are linked 1590 * to a master transport then unlink it 1591 * - free the clone structure 1592 * - return to userland for thread exit 1593 * 1594 * If this is the last non-detached or the last thread on this 1595 * transport then the call to svc_clone_unlink() will, respectively, 1596 * close and/or destroy the transport. 1597 */ 1598 static void 1599 svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt) 1600 { 1601 if (clone_xprt->xp_master) 1602 svc_clone_unlink(clone_xprt); 1603 svc_clone_free(clone_xprt); 1604 1605 mutex_enter(&pool->p_thread_lock); 1606 pool->p_threads--; 1607 if (pool->p_closing && svc_pool_tryexit(pool)) 1608 /* return - thread exit will be handled at user level */ 1609 return; 1610 mutex_exit(&pool->p_thread_lock); 1611 1612 /* return - thread exit will be handled at user level */ 1613 } 1614 1615 /* 1616 * Exit a detached service thread that returned to svc_run 1617 * - decrement the `detached thread' count for the pool 1618 * - unlink the detached clone transport handle from the master 1619 * - free the clone structure 1620 * - return to userland for thread exit 1621 * 1622 * If this is the last thread on this transport then the call 1623 * to svc_clone_unlinkdetached() will destroy the transport. 1624 */ 1625 static void 1626 svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt) 1627 { 1628 /* This must be a detached thread */ 1629 ASSERT(clone_xprt->xp_master); 1630 ASSERT(clone_xprt->xp_detached); 1631 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 1632 1633 svc_clone_unlinkdetached(clone_xprt); 1634 svc_clone_free(clone_xprt); 1635 1636 mutex_enter(&pool->p_thread_lock); 1637 1638 ASSERT(pool->p_reserved_threads >= 0); 1639 ASSERT(pool->p_detached_threads > 0); 1640 1641 pool->p_detached_threads--; 1642 if (pool->p_closing && svc_pool_tryexit(pool)) 1643 /* return - thread exit will be handled at user level */ 1644 return; 1645 mutex_exit(&pool->p_thread_lock); 1646 1647 /* return - thread exit will be handled at user level */ 1648 } 1649 1650 /* 1651 * PSARC 2003/523 Contract Private Interface 1652 * svc_wait 1653 * Changes must be reviewed by Solaris File Sharing 1654 * Changes must be communicated to contract-2003-523@sun.com 1655 */ 1656 int 1657 svc_wait(int id) 1658 { 1659 SVCPOOL *pool; 1660 int err = 0; 1661 struct svc_globals *svc; 1662 1663 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 1664 mutex_enter(&svc->svc_plock); 1665 pool = svc_pool_find(svc, id); 1666 mutex_exit(&svc->svc_plock); 1667 1668 if (pool == NULL) 1669 return (ENOENT); 1670 1671 mutex_enter(&pool->p_user_lock); 1672 1673 /* Check if there's already a user thread waiting on this pool */ 1674 if (pool->p_user_waiting) { 1675 mutex_exit(&pool->p_user_lock); 1676 return (EBUSY); 1677 } 1678 1679 pool->p_user_waiting = TRUE; 1680 1681 /* Go to sleep, waiting for the signaled flag. */ 1682 while (!pool->p_signal_create_thread && !pool->p_user_exit) { 1683 if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) { 1684 /* Interrupted, return to handle exit or signal */ 1685 pool->p_user_waiting = FALSE; 1686 pool->p_signal_create_thread = FALSE; 1687 mutex_exit(&pool->p_user_lock); 1688 1689 /* 1690 * Thread has been interrupted and therefore 1691 * the service daemon is leaving as well so 1692 * let's go ahead and remove the service 1693 * pool at this time. 1694 */ 1695 mutex_enter(&svc->svc_plock); 1696 svc_pool_unregister(svc, pool); 1697 mutex_exit(&svc->svc_plock); 1698 1699 return (EINTR); 1700 } 1701 } 1702 1703 pool->p_signal_create_thread = FALSE; 1704 pool->p_user_waiting = FALSE; 1705 1706 /* 1707 * About to exit the service pool. Set return value 1708 * to let the userland code know our intent. Signal 1709 * svc_thread_creator() so that it can clean up the 1710 * pool structure. 1711 */ 1712 if (pool->p_user_exit) { 1713 err = ECANCELED; 1714 cv_signal(&pool->p_user_cv); 1715 } 1716 1717 mutex_exit(&pool->p_user_lock); 1718 1719 /* Return to userland with error code, for possible thread creation. */ 1720 return (err); 1721 } 1722 1723 /* 1724 * `Service threads' creator thread. 1725 * The creator thread waits for a signal to create new thread. 1726 */ 1727 static void 1728 svc_thread_creator(SVCPOOL *pool) 1729 { 1730 callb_cpr_t cpr_info; /* CPR info for the creator thread */ 1731 1732 CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr, 1733 "svc_thread_creator"); 1734 1735 for (;;) { 1736 mutex_enter(&pool->p_creator_lock); 1737 1738 /* Check if someone set the exit flag */ 1739 if (pool->p_creator_exit) 1740 break; 1741 1742 /* Clear the `signaled' flag and go asleep */ 1743 pool->p_creator_signaled = FALSE; 1744 1745 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1746 cv_wait(&pool->p_creator_cv, &pool->p_creator_lock); 1747 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1748 1749 /* Check if someone signaled to exit */ 1750 if (pool->p_creator_exit) 1751 break; 1752 1753 mutex_exit(&pool->p_creator_lock); 1754 1755 mutex_enter(&pool->p_thread_lock); 1756 1757 /* 1758 * When the pool is in closing state and all the transports 1759 * are gone the creator should not create any new threads. 1760 */ 1761 if (pool->p_closing) { 1762 rw_enter(&pool->p_lrwlock, RW_READER); 1763 if (pool->p_lcount == 0) { 1764 rw_exit(&pool->p_lrwlock); 1765 mutex_exit(&pool->p_thread_lock); 1766 continue; 1767 } 1768 rw_exit(&pool->p_lrwlock); 1769 } 1770 1771 /* 1772 * Create a new service thread now. 1773 */ 1774 ASSERT(pool->p_reserved_threads >= 0); 1775 ASSERT(pool->p_detached_threads >= 0); 1776 1777 if (pool->p_threads + pool->p_detached_threads < 1778 pool->p_maxthreads) { 1779 /* 1780 * Signal the service pool wait thread 1781 * only if it hasn't already been signaled. 1782 */ 1783 mutex_enter(&pool->p_user_lock); 1784 if (pool->p_signal_create_thread == FALSE) { 1785 pool->p_signal_create_thread = TRUE; 1786 cv_signal(&pool->p_user_cv); 1787 } 1788 mutex_exit(&pool->p_user_lock); 1789 1790 } 1791 1792 mutex_exit(&pool->p_thread_lock); 1793 } 1794 1795 /* 1796 * Pool is closed. Cleanup and exit. 1797 */ 1798 1799 /* Signal userland creator thread that it can stop now. */ 1800 mutex_enter(&pool->p_user_lock); 1801 pool->p_user_exit = TRUE; 1802 cv_broadcast(&pool->p_user_cv); 1803 mutex_exit(&pool->p_user_lock); 1804 1805 /* Wait for svc_wait() to be done with the pool */ 1806 mutex_enter(&pool->p_user_lock); 1807 while (pool->p_user_waiting) { 1808 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1809 cv_wait(&pool->p_user_cv, &pool->p_user_lock); 1810 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1811 } 1812 mutex_exit(&pool->p_user_lock); 1813 1814 CALLB_CPR_EXIT(&cpr_info); 1815 svc_pool_cleanup(pool); 1816 zthread_exit(); 1817 } 1818 1819 /* 1820 * If the creator thread is idle signal it to create 1821 * a new service thread. 1822 */ 1823 static void 1824 svc_creator_signal(SVCPOOL *pool) 1825 { 1826 mutex_enter(&pool->p_creator_lock); 1827 if (pool->p_creator_signaled == FALSE) { 1828 pool->p_creator_signaled = TRUE; 1829 cv_signal(&pool->p_creator_cv); 1830 } 1831 mutex_exit(&pool->p_creator_lock); 1832 } 1833 1834 /* 1835 * Notify the creator thread to clean up and exit. 1836 */ 1837 static void 1838 svc_creator_signalexit(SVCPOOL *pool) 1839 { 1840 mutex_enter(&pool->p_creator_lock); 1841 pool->p_creator_exit = TRUE; 1842 cv_signal(&pool->p_creator_cv); 1843 mutex_exit(&pool->p_creator_lock); 1844 } 1845 1846 /* 1847 * Polling part of the svc_run(). 1848 * - search for a transport with a pending request 1849 * - when one is found then latch the request lock and return to svc_run() 1850 * - if there is no request go asleep and wait for a signal 1851 * - handle two exceptions: 1852 * a) current transport is closing 1853 * b) timeout waiting for a new request 1854 * in both cases return to svc_run() 1855 */ 1856 static SVCMASTERXPRT * 1857 svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1858 { 1859 /* 1860 * Main loop iterates until 1861 * a) we find a pending request, 1862 * b) detect that the current transport is closing 1863 * c) time out waiting for a new request. 1864 */ 1865 for (;;) { 1866 SVCMASTERXPRT *next; 1867 clock_t timeleft; 1868 1869 /* 1870 * Step 1. 1871 * Check if there is a pending request on the current 1872 * transport handle so that we can avoid cloning. 1873 * If so then decrement the `pending-request' count for 1874 * the pool and return to svc_run(). 1875 * 1876 * We need to prevent a potential starvation. When 1877 * a selected transport has all pending requests coming in 1878 * all the time then the service threads will never switch to 1879 * another transport. With a limited number of service 1880 * threads some transports may be never serviced. 1881 * To prevent such a scenario we pick up at most 1882 * pool->p_max_same_xprt requests from the same transport 1883 * and then take a hint from the xprt-ready queue or walk 1884 * the transport list. 1885 */ 1886 if (xprt && xprt->xp_req_head && (!pool->p_qoverflow || 1887 clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) { 1888 mutex_enter(&xprt->xp_req_lock); 1889 if (xprt->xp_req_head) { 1890 mutex_enter(&pool->p_req_lock); 1891 pool->p_reqs--; 1892 if (pool->p_reqs == 0) 1893 pool->p_qoverflow = FALSE; 1894 mutex_exit(&pool->p_req_lock); 1895 1896 return (xprt); 1897 } 1898 mutex_exit(&xprt->xp_req_lock); 1899 } 1900 clone_xprt->xp_same_xprt = 0; 1901 1902 /* 1903 * Step 2. 1904 * If there is no request on the current transport try to 1905 * find another transport with a pending request. 1906 */ 1907 mutex_enter(&pool->p_req_lock); 1908 pool->p_walkers++; 1909 mutex_exit(&pool->p_req_lock); 1910 1911 /* 1912 * Make sure that transports will not be destroyed just 1913 * while we are checking them. 1914 */ 1915 rw_enter(&pool->p_lrwlock, RW_READER); 1916 1917 for (;;) { 1918 SVCMASTERXPRT *hint; 1919 1920 /* 1921 * Get the next transport from the xprt-ready queue. 1922 * This is a hint. There is no guarantee that the 1923 * transport still has a pending request since it 1924 * could be picked up by another thread in step 1. 1925 * 1926 * If the transport has a pending request then keep 1927 * it locked. Decrement the `pending-requests' for 1928 * the pool and `walking-threads' counts, and return 1929 * to svc_run(). 1930 */ 1931 hint = svc_xprt_qget(pool); 1932 1933 if (hint && hint->xp_req_head) { 1934 mutex_enter(&hint->xp_req_lock); 1935 if (hint->xp_req_head) { 1936 rw_exit(&pool->p_lrwlock); 1937 1938 mutex_enter(&pool->p_req_lock); 1939 pool->p_reqs--; 1940 if (pool->p_reqs == 0) 1941 pool->p_qoverflow = FALSE; 1942 pool->p_walkers--; 1943 mutex_exit(&pool->p_req_lock); 1944 1945 return (hint); 1946 } 1947 mutex_exit(&hint->xp_req_lock); 1948 } 1949 1950 /* 1951 * If there was no hint in the xprt-ready queue then 1952 * - if there is less pending requests than polling 1953 * threads go asleep 1954 * - otherwise check if there was an overflow in the 1955 * xprt-ready queue; if so, then we need to break 1956 * the `drain' mode 1957 */ 1958 if (hint == NULL) { 1959 if (pool->p_reqs < pool->p_walkers) { 1960 mutex_enter(&pool->p_req_lock); 1961 if (pool->p_reqs < pool->p_walkers) 1962 goto sleep; 1963 mutex_exit(&pool->p_req_lock); 1964 } 1965 if (pool->p_qoverflow) { 1966 break; 1967 } 1968 } 1969 } 1970 1971 /* 1972 * If there was an overflow in the xprt-ready queue then we 1973 * need to switch to the `drain' mode, i.e. walk through the 1974 * pool's transport list and search for a transport with a 1975 * pending request. If we manage to drain all the pending 1976 * requests then we can clear the overflow flag. This will 1977 * switch svc_poll() back to taking hints from the xprt-ready 1978 * queue (which is generally more efficient). 1979 * 1980 * If there are no registered transports simply go asleep. 1981 */ 1982 if (xprt == NULL && pool->p_lhead == NULL) { 1983 mutex_enter(&pool->p_req_lock); 1984 goto sleep; 1985 } 1986 1987 /* 1988 * `Walk' through the pool's list of master server 1989 * transport handles. Continue to loop until there are less 1990 * looping threads then pending requests. 1991 */ 1992 next = xprt ? xprt->xp_next : pool->p_lhead; 1993 1994 for (;;) { 1995 /* 1996 * Check if there is a request on this transport. 1997 * 1998 * Since blocking on a locked mutex is very expensive 1999 * check for a request without a lock first. If we miss 2000 * a request that is just being delivered but this will 2001 * cost at most one full walk through the list. 2002 */ 2003 if (next->xp_req_head) { 2004 /* 2005 * Check again, now with a lock. 2006 */ 2007 mutex_enter(&next->xp_req_lock); 2008 if (next->xp_req_head) { 2009 rw_exit(&pool->p_lrwlock); 2010 2011 mutex_enter(&pool->p_req_lock); 2012 pool->p_reqs--; 2013 if (pool->p_reqs == 0) 2014 pool->p_qoverflow = FALSE; 2015 pool->p_walkers--; 2016 mutex_exit(&pool->p_req_lock); 2017 2018 return (next); 2019 } 2020 mutex_exit(&next->xp_req_lock); 2021 } 2022 2023 /* 2024 * Continue to `walk' through the pool's 2025 * transport list until there is less requests 2026 * than walkers. Check this condition without 2027 * a lock first to avoid contention on a mutex. 2028 */ 2029 if (pool->p_reqs < pool->p_walkers) { 2030 /* Check again, now with the lock. */ 2031 mutex_enter(&pool->p_req_lock); 2032 if (pool->p_reqs < pool->p_walkers) 2033 break; /* goto sleep */ 2034 mutex_exit(&pool->p_req_lock); 2035 } 2036 2037 next = next->xp_next; 2038 } 2039 2040 sleep: 2041 /* 2042 * No work to do. Stop the `walk' and go asleep. 2043 * Decrement the `walking-threads' count for the pool. 2044 */ 2045 pool->p_walkers--; 2046 rw_exit(&pool->p_lrwlock); 2047 2048 /* 2049 * Count us as asleep, mark this thread as safe 2050 * for suspend and wait for a request. 2051 */ 2052 pool->p_asleep++; 2053 timeleft = cv_timedwait_sig(&pool->p_req_cv, &pool->p_req_lock, 2054 pool->p_timeout + lbolt); 2055 2056 /* 2057 * If the drowsy flag is on this means that 2058 * someone has signaled a wakeup. In such a case 2059 * the `asleep-threads' count has already updated 2060 * so just clear the flag. 2061 * 2062 * If the drowsy flag is off then we need to update 2063 * the `asleep-threads' count. 2064 */ 2065 if (pool->p_drowsy) { 2066 pool->p_drowsy = FALSE; 2067 /* 2068 * If the thread is here because it timedout, 2069 * instead of returning SVC_ETIMEDOUT, it is 2070 * time to do some more work. 2071 */ 2072 if (timeleft == -1) 2073 timeleft = 1; 2074 } else { 2075 pool->p_asleep--; 2076 } 2077 mutex_exit(&pool->p_req_lock); 2078 2079 /* 2080 * If we received a signal while waiting for a 2081 * request, inform svc_run(), so that we can return 2082 * to user level and restart the call. 2083 */ 2084 if (timeleft == 0) 2085 return (SVC_EINTR); 2086 2087 /* 2088 * If the current transport is gone then notify 2089 * svc_run() to unlink from it. 2090 */ 2091 if (xprt && xprt->xp_wq == NULL) 2092 return (SVC_EXPRTGONE); 2093 2094 /* 2095 * If we have timed out waiting for a request inform 2096 * svc_run() that we probably don't need this thread. 2097 */ 2098 if (timeleft == -1) 2099 return (SVC_ETIMEDOUT); 2100 } 2101 } 2102 2103 /* 2104 * Main loop of the kernel RPC server 2105 * - wait for input (find a transport with a pending request). 2106 * - dequeue the request 2107 * - call a registered server routine to process the requests 2108 * 2109 * There can many threads running concurrently in this loop 2110 * on the same or on different transports. 2111 */ 2112 static int 2113 svc_run(SVCPOOL *pool) 2114 { 2115 SVCMASTERXPRT *xprt = NULL; /* master transport handle */ 2116 SVCXPRT *clone_xprt; /* clone for this thread */ 2117 struct svc_globals *svc; 2118 proc_t *p = ttoproc(curthread); 2119 2120 /* Allocate a clone transport handle for this thread */ 2121 clone_xprt = svc_clone_init(); 2122 2123 /* 2124 * The loop iterates until the thread becomes 2125 * idle too long or the transport is gone. 2126 */ 2127 for (;;) { 2128 SVCMASTERXPRT *next; 2129 mblk_t *mp; 2130 2131 TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run"); 2132 2133 /* 2134 * If the process is exiting/killed, return 2135 * immediately without processing any more 2136 * requests. 2137 */ 2138 if (p->p_flag & (SEXITING | SKILLED)) { 2139 svc_thread_exit(pool, clone_xprt); 2140 2141 /* 2142 * Thread has been interrupted and therefore 2143 * the service daemon is leaving as well so 2144 * let's go ahead and remove the service 2145 * pool at this time. 2146 */ 2147 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2148 mutex_enter(&svc->svc_plock); 2149 svc_pool_unregister(svc, pool); 2150 mutex_exit(&svc->svc_plock); 2151 2152 return (0); 2153 } 2154 2155 /* Find a transport with a pending request */ 2156 next = svc_poll(pool, xprt, clone_xprt); 2157 2158 /* 2159 * If svc_poll() finds a transport with a request 2160 * it latches xp_req_lock on it. Therefore we need 2161 * to dequeue the request and release the lock as 2162 * soon as possible. 2163 */ 2164 ASSERT(next != NULL && 2165 (next == SVC_EXPRTGONE || 2166 next == SVC_ETIMEDOUT || 2167 next == SVC_EINTR || 2168 MUTEX_HELD(&next->xp_req_lock))); 2169 2170 /* Ooops! Current transport is closing. Unlink now */ 2171 if (next == SVC_EXPRTGONE) { 2172 svc_clone_unlink(clone_xprt); 2173 xprt = NULL; 2174 continue; 2175 } 2176 2177 /* Ooops! Timeout while waiting for a request. Exit */ 2178 if (next == SVC_ETIMEDOUT) { 2179 svc_thread_exit(pool, clone_xprt); 2180 return (0); 2181 } 2182 2183 /* 2184 * Interrupted by a signal while waiting for a 2185 * request. Return to userspace and restart. 2186 */ 2187 if (next == SVC_EINTR) { 2188 svc_thread_exit(pool, clone_xprt); 2189 2190 /* 2191 * Thread has been interrupted and therefore 2192 * the service daemon is leaving as well so 2193 * let's go ahead and remove the service 2194 * pool at this time. 2195 */ 2196 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 2197 mutex_enter(&svc->svc_plock); 2198 svc_pool_unregister(svc, pool); 2199 mutex_exit(&svc->svc_plock); 2200 2201 return (EINTR); 2202 } 2203 2204 /* 2205 * De-queue the request and release the request lock 2206 * on this transport (latched by svc_poll()). 2207 */ 2208 mp = next->xp_req_head; 2209 next->xp_req_head = mp->b_next; 2210 mp->b_next = (mblk_t *)0; 2211 2212 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ, 2213 "rpc_que_req_deq:pool %p mp %p", pool, mp); 2214 mutex_exit(&next->xp_req_lock); 2215 2216 /* 2217 * If this is a new request on a current transport then 2218 * the clone structure is already properly initialized. 2219 * Otherwise, if the request is on a different transport, 2220 * unlink from the current master and link to 2221 * the one we got a request on. 2222 */ 2223 if (next != xprt) { 2224 if (xprt) 2225 svc_clone_unlink(clone_xprt); 2226 svc_clone_link(next, clone_xprt); 2227 xprt = next; 2228 } 2229 2230 /* 2231 * If there are more requests and req_cv hasn't 2232 * been signaled yet then wake up one more thread now. 2233 * 2234 * We avoid signaling req_cv until the most recently 2235 * signaled thread wakes up and gets CPU to clear 2236 * the `drowsy' flag. 2237 */ 2238 if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2239 pool->p_asleep == 0)) { 2240 mutex_enter(&pool->p_req_lock); 2241 2242 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2243 pool->p_asleep == 0) 2244 mutex_exit(&pool->p_req_lock); 2245 else { 2246 pool->p_asleep--; 2247 pool->p_drowsy = TRUE; 2248 2249 cv_signal(&pool->p_req_cv); 2250 mutex_exit(&pool->p_req_lock); 2251 } 2252 } 2253 2254 /* 2255 * If there are no asleep/signaled threads, we are 2256 * still below pool->p_maxthreads limit, and no thread is 2257 * currently being created then signal the creator 2258 * for one more service thread. 2259 * 2260 * The asleep and drowsy checks are not protected 2261 * by a lock since it hurts performance and a wrong 2262 * decision is not essential. 2263 */ 2264 if (pool->p_asleep == 0 && !pool->p_drowsy && 2265 pool->p_threads + pool->p_detached_threads < 2266 pool->p_maxthreads) 2267 svc_creator_signal(pool); 2268 2269 /* 2270 * Process the request. 2271 */ 2272 svc_getreq(clone_xprt, mp); 2273 2274 /* If thread had a reservation it should have been canceled */ 2275 ASSERT(!clone_xprt->xp_reserved); 2276 2277 /* 2278 * If the clone is marked detached then exit. 2279 * The rpcmod slot has already been released 2280 * when we detached this thread. 2281 */ 2282 if (clone_xprt->xp_detached) { 2283 svc_thread_exitdetached(pool, clone_xprt); 2284 return (0); 2285 } 2286 2287 /* 2288 * Release our reference on the rpcmod 2289 * slot attached to xp_wq->q_ptr. 2290 */ 2291 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2292 } 2293 /* NOTREACHED */ 2294 } 2295 2296 /* 2297 * Flush any pending requests for the queue and 2298 * and free the associated mblks. 2299 */ 2300 void 2301 svc_queueclean(queue_t *q) 2302 { 2303 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2304 mblk_t *mp; 2305 SVCPOOL *pool; 2306 2307 /* 2308 * clean up the requests 2309 */ 2310 mutex_enter(&xprt->xp_req_lock); 2311 pool = xprt->xp_pool; 2312 while ((mp = xprt->xp_req_head) != NULL) { 2313 /* remove the request from the list and decrement p_reqs */ 2314 xprt->xp_req_head = mp->b_next; 2315 mutex_enter(&pool->p_req_lock); 2316 mp->b_next = (mblk_t *)0; 2317 pool->p_reqs--; 2318 mutex_exit(&pool->p_req_lock); 2319 (*RELE_PROC(xprt)) (xprt->xp_wq, mp); 2320 } 2321 mutex_exit(&xprt->xp_req_lock); 2322 } 2323 2324 /* 2325 * This routine is called by rpcmod to inform kernel RPC that a 2326 * queue is closing. It is called after all the requests have been 2327 * picked up (that is after all the slots on the queue have 2328 * been released by kernel RPC). It is also guaranteed that no more 2329 * request will be delivered on this transport. 2330 * 2331 * - clear xp_wq to mark the master server transport handle as closing 2332 * - if there are no more threads on this transport close/destroy it 2333 * - otherwise, broadcast threads sleeping in svc_poll(); the last 2334 * thread will close/destroy the transport. 2335 */ 2336 void 2337 svc_queueclose(queue_t *q) 2338 { 2339 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2340 2341 if (xprt == NULL) { 2342 /* 2343 * If there is no master xprt associated with this stream, 2344 * then there is nothing to do. This happens regularly 2345 * with connection-oriented listening streams created by 2346 * nfsd. 2347 */ 2348 return; 2349 } 2350 2351 mutex_enter(&xprt->xp_thread_lock); 2352 2353 ASSERT(xprt->xp_req_head == NULL); 2354 ASSERT(xprt->xp_wq != NULL); 2355 2356 xprt->xp_wq = NULL; 2357 2358 if (xprt->xp_threads == 0) { 2359 SVCPOOL *pool = xprt->xp_pool; 2360 2361 /* 2362 * svc_xprt_cleanup() destroys the transport 2363 * or releases the transport thread lock 2364 */ 2365 svc_xprt_cleanup(xprt, FALSE); 2366 2367 mutex_enter(&pool->p_thread_lock); 2368 2369 /* 2370 * If the pool is in closing state and this was 2371 * the last transport in the pool then signal the creator 2372 * thread to clean up and exit. 2373 */ 2374 if (pool->p_closing && svc_pool_tryexit(pool)) { 2375 return; 2376 } 2377 mutex_exit(&pool->p_thread_lock); 2378 } else { 2379 /* 2380 * Wakeup threads sleeping in svc_poll() so that they 2381 * unlink from the transport 2382 */ 2383 mutex_enter(&xprt->xp_pool->p_req_lock); 2384 cv_broadcast(&xprt->xp_pool->p_req_cv); 2385 mutex_exit(&xprt->xp_pool->p_req_lock); 2386 2387 /* 2388 * NOTICE: No references to the master transport structure 2389 * beyond this point! 2390 */ 2391 mutex_exit(&xprt->xp_thread_lock); 2392 } 2393 } 2394 2395 /* 2396 * Interrupt `request delivery' routine called from rpcmod 2397 * - put a request at the tail of the transport request queue 2398 * - insert a hint for svc_poll() into the xprt-ready queue 2399 * - increment the `pending-requests' count for the pool 2400 * - wake up a thread sleeping in svc_poll() if necessary 2401 * - if all the threads are running ask the creator for a new one. 2402 */ 2403 void 2404 svc_queuereq(queue_t *q, mblk_t *mp) 2405 { 2406 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2407 SVCPOOL *pool = xprt->xp_pool; 2408 2409 TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start"); 2410 2411 ASSERT(!is_system_labeled() || DB_CRED(mp) != NULL || 2412 mp->b_datap->db_type != M_DATA); 2413 2414 /* 2415 * Step 1. 2416 * Grab the transport's request lock and the 2417 * pool's request lock so that when we put 2418 * the request at the tail of the transport's 2419 * request queue, possibly put the request on 2420 * the xprt ready queue and increment the 2421 * pending request count it looks atomic. 2422 */ 2423 mutex_enter(&xprt->xp_req_lock); 2424 mutex_enter(&pool->p_req_lock); 2425 if (xprt->xp_req_head == NULL) 2426 xprt->xp_req_head = mp; 2427 else 2428 xprt->xp_req_tail->b_next = mp; 2429 xprt->xp_req_tail = mp; 2430 2431 /* 2432 * Step 2. 2433 * Insert a hint into the xprt-ready queue, increment 2434 * `pending-requests' count for the pool, and wake up 2435 * a thread sleeping in svc_poll() if necessary. 2436 */ 2437 2438 /* Insert pointer to this transport into the xprt-ready queue */ 2439 svc_xprt_qput(pool, xprt); 2440 2441 /* Increment the `pending-requests' count for the pool */ 2442 pool->p_reqs++; 2443 2444 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ, 2445 "rpc_que_req_enq:pool %p mp %p", pool, mp); 2446 2447 /* 2448 * If there are more requests and req_cv hasn't 2449 * been signaled yet then wake up one more thread now. 2450 * 2451 * We avoid signaling req_cv until the most recently 2452 * signaled thread wakes up and gets CPU to clear 2453 * the `drowsy' flag. 2454 */ 2455 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2456 pool->p_asleep == 0) { 2457 mutex_exit(&pool->p_req_lock); 2458 } else { 2459 pool->p_drowsy = TRUE; 2460 pool->p_asleep--; 2461 2462 /* 2463 * Signal wakeup and drop the request lock. 2464 */ 2465 cv_signal(&pool->p_req_cv); 2466 mutex_exit(&pool->p_req_lock); 2467 } 2468 mutex_exit(&xprt->xp_req_lock); 2469 2470 /* 2471 * Step 3. 2472 * If there are no asleep/signaled threads, we are 2473 * still below pool->p_maxthreads limit, and no thread is 2474 * currently being created then signal the creator 2475 * for one more service thread. 2476 * 2477 * The asleep and drowsy checks are not not protected 2478 * by a lock since it hurts performance and a wrong 2479 * decision is not essential. 2480 */ 2481 if (pool->p_asleep == 0 && !pool->p_drowsy && 2482 pool->p_threads + pool->p_detached_threads < pool->p_maxthreads) 2483 svc_creator_signal(pool); 2484 2485 TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END, 2486 "svc_queuereq_end:(%S)", "end"); 2487 } 2488 2489 /* 2490 * Reserve a service thread so that it can be detached later. 2491 * This reservation is required to make sure that when it tries to 2492 * detach itself the total number of detached threads does not exceed 2493 * pool->p_maxthreads - pool->p_redline (i.e. that we can have 2494 * up to pool->p_redline non-detached threads). 2495 * 2496 * If the thread does not detach itself later, it should cancel the 2497 * reservation before returning to svc_run(). 2498 * 2499 * - check if there is room for more reserved/detached threads 2500 * - if so, then increment the `reserved threads' count for the pool 2501 * - mark the thread as reserved (setting the flag in the clone transport 2502 * handle for this thread 2503 * - returns 1 if the reservation succeeded, 0 if it failed. 2504 */ 2505 int 2506 svc_reserve_thread(SVCXPRT *clone_xprt) 2507 { 2508 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2509 2510 /* Recursive reservations are not allowed */ 2511 ASSERT(!clone_xprt->xp_reserved); 2512 ASSERT(!clone_xprt->xp_detached); 2513 2514 /* Check pool counts if there is room for reservation */ 2515 mutex_enter(&pool->p_thread_lock); 2516 if (pool->p_reserved_threads + pool->p_detached_threads >= 2517 pool->p_maxthreads - pool->p_redline) { 2518 mutex_exit(&pool->p_thread_lock); 2519 return (0); 2520 } 2521 pool->p_reserved_threads++; 2522 mutex_exit(&pool->p_thread_lock); 2523 2524 /* Mark the thread (clone handle) as reserved */ 2525 clone_xprt->xp_reserved = TRUE; 2526 2527 return (1); 2528 } 2529 2530 /* 2531 * Cancel a reservation for a thread. 2532 * - decrement the `reserved threads' count for the pool 2533 * - clear the flag in the clone transport handle for this thread. 2534 */ 2535 void 2536 svc_unreserve_thread(SVCXPRT *clone_xprt) 2537 { 2538 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2539 2540 /* Thread must have a reservation */ 2541 ASSERT(clone_xprt->xp_reserved); 2542 ASSERT(!clone_xprt->xp_detached); 2543 2544 /* Decrement global count */ 2545 mutex_enter(&pool->p_thread_lock); 2546 pool->p_reserved_threads--; 2547 mutex_exit(&pool->p_thread_lock); 2548 2549 /* Clear reservation flag */ 2550 clone_xprt->xp_reserved = FALSE; 2551 } 2552 2553 /* 2554 * Detach a thread from its transport, so that it can block for an 2555 * extended time. Because the transport can be closed after the thread is 2556 * detached, the thread should have already sent off a reply if it was 2557 * going to send one. 2558 * 2559 * - decrement `non-detached threads' count and increment `detached threads' 2560 * counts for the transport 2561 * - decrement the `non-detached threads' and `reserved threads' 2562 * counts and increment the `detached threads' count for the pool 2563 * - release the rpcmod slot 2564 * - mark the clone (thread) as detached. 2565 * 2566 * No need to return a pointer to the thread's CPR information, since 2567 * the thread has a userland identity. 2568 * 2569 * NOTICE: a thread must not detach itself without making a prior reservation 2570 * through svc_thread_reserve(). 2571 */ 2572 callb_cpr_t * 2573 svc_detach_thread(SVCXPRT *clone_xprt) 2574 { 2575 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 2576 SVCPOOL *pool = xprt->xp_pool; 2577 2578 /* Thread must have a reservation */ 2579 ASSERT(clone_xprt->xp_reserved); 2580 ASSERT(!clone_xprt->xp_detached); 2581 2582 /* Bookkeeping for this transport */ 2583 mutex_enter(&xprt->xp_thread_lock); 2584 xprt->xp_threads--; 2585 xprt->xp_detached_threads++; 2586 mutex_exit(&xprt->xp_thread_lock); 2587 2588 /* Bookkeeping for the pool */ 2589 mutex_enter(&pool->p_thread_lock); 2590 pool->p_threads--; 2591 pool->p_reserved_threads--; 2592 pool->p_detached_threads++; 2593 mutex_exit(&pool->p_thread_lock); 2594 2595 /* Release an rpcmod slot for this request */ 2596 (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL); 2597 2598 /* Mark the clone (thread) as detached */ 2599 clone_xprt->xp_reserved = FALSE; 2600 clone_xprt->xp_detached = TRUE; 2601 2602 return (NULL); 2603 } 2604 2605 /* 2606 * This routine is responsible for extracting RDMA plugin master XPRT, 2607 * unregister from the SVCPOOL and initiate plugin specific cleanup. 2608 * It is passed a list/group of rdma transports as records which are 2609 * active in a given registered or unregistered kRPC thread pool. Its shuts 2610 * all active rdma transports in that pool. If the thread active on the trasport 2611 * happens to be last thread for that pool, it will signal the creater thread 2612 * to cleanup the pool and destroy the xprt in svc_queueclose() 2613 */ 2614 void 2615 rdma_stop(rdma_xprt_group_t rdma_xprts) 2616 { 2617 SVCMASTERXPRT *xprt; 2618 rdma_xprt_record_t *curr_rec; 2619 queue_t *q; 2620 mblk_t *mp; 2621 int i; 2622 SVCPOOL *pool; 2623 2624 if (rdma_xprts.rtg_count == 0) 2625 return; 2626 2627 for (i = 0; i < rdma_xprts.rtg_count; i++) { 2628 curr_rec = rdma_xprts.rtg_listhead; 2629 rdma_xprts.rtg_listhead = curr_rec->rtr_next; 2630 curr_rec->rtr_next = NULL; 2631 xprt = curr_rec->rtr_xprt_ptr; 2632 q = xprt->xp_wq; 2633 svc_rdma_kstop(xprt); 2634 2635 mutex_enter(&xprt->xp_req_lock); 2636 pool = xprt->xp_pool; 2637 while ((mp = xprt->xp_req_head) != NULL) { 2638 /* 2639 * remove the request from the list and 2640 * decrement p_reqs 2641 */ 2642 xprt->xp_req_head = mp->b_next; 2643 mutex_enter(&pool->p_req_lock); 2644 mp->b_next = (mblk_t *)0; 2645 pool->p_reqs--; 2646 mutex_exit(&pool->p_req_lock); 2647 if (mp) 2648 freemsg(mp); 2649 } 2650 mutex_exit(&xprt->xp_req_lock); 2651 svc_queueclose(q); 2652 #ifdef DEBUG 2653 if (rdma_check) 2654 cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n"); 2655 #endif 2656 /* 2657 * Free the rdma transport record for the expunged rdma 2658 * based master transport handle. 2659 */ 2660 kmem_free(curr_rec, sizeof (rdma_xprt_record_t)); 2661 if (!rdma_xprts.rtg_listhead) 2662 break; 2663 } 2664 } 2665