17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5f9c1591dSVallish Vaidyeshwara * Common Development and Distribution License (the "License"). 6f9c1591dSVallish Vaidyeshwara * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f9c1591dSVallish Vaidyeshwara * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <thread.h> 277c478bd9Sstevel@tonic-gate #include <stdlib.h> 287c478bd9Sstevel@tonic-gate #include <errno.h> 297c478bd9Sstevel@tonic-gate #include <strings.h> 307c478bd9Sstevel@tonic-gate #include <tiuser.h> 317c478bd9Sstevel@tonic-gate #include <syslog.h> 327c478bd9Sstevel@tonic-gate #include <zone.h> 337c478bd9Sstevel@tonic-gate #include <sys/priocntl.h> 347c478bd9Sstevel@tonic-gate #include <sys/fxpriocntl.h> 357c478bd9Sstevel@tonic-gate #include <nfs/nfs.h> 367c478bd9Sstevel@tonic-gate #include <nfs/nfssys.h> 377c478bd9Sstevel@tonic-gate #include "thrpool.h" 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate extern int _nfssys(int, void *); 407c478bd9Sstevel@tonic-gate 417c478bd9Sstevel@tonic-gate /* 427c478bd9Sstevel@tonic-gate * Thread to call into the kernel and do work on behalf of NFS. 437c478bd9Sstevel@tonic-gate */ 447c478bd9Sstevel@tonic-gate static void * 457c478bd9Sstevel@tonic-gate svcstart(void *arg) 467c478bd9Sstevel@tonic-gate { 477c478bd9Sstevel@tonic-gate int id = (int)arg; 487c478bd9Sstevel@tonic-gate 497c478bd9Sstevel@tonic-gate /* 50*f7b93e0cSVallish Vaidyeshwara * Create a kernel worker thread to service 51*f7b93e0cSVallish Vaidyeshwara * new incoming requests on a pool. 527c478bd9Sstevel@tonic-gate */ 53*f7b93e0cSVallish Vaidyeshwara _nfssys(SVCPOOL_RUN, &id); 547c478bd9Sstevel@tonic-gate 557c478bd9Sstevel@tonic-gate /* 56*f7b93e0cSVallish Vaidyeshwara * Returned from the kernel, this thread's work is done, 57*f7b93e0cSVallish Vaidyeshwara * and it should exit. For new incoming requests, 58*f7b93e0cSVallish Vaidyeshwara * svcblock() will spawn another worker thread by 59*f7b93e0cSVallish Vaidyeshwara * calling svcstart() again. 607c478bd9Sstevel@tonic-gate */ 617c478bd9Sstevel@tonic-gate thr_exit(NULL); 627c478bd9Sstevel@tonic-gate return (NULL); 637c478bd9Sstevel@tonic-gate } 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate static void * 667c478bd9Sstevel@tonic-gate svc_rdma_creator(void *arg) 677c478bd9Sstevel@tonic-gate { 687c478bd9Sstevel@tonic-gate struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg; 697c478bd9Sstevel@tonic-gate 70f9c1591dSVallish Vaidyeshwara if (_nfssys(RDMA_SVC_INIT, rsap) < 0) { 71f9c1591dSVallish Vaidyeshwara if (errno != ENODEV) { 727c478bd9Sstevel@tonic-gate (void) syslog(LOG_INFO, "RDMA transport startup " 737c478bd9Sstevel@tonic-gate "failed with %m"); 747c478bd9Sstevel@tonic-gate } 757c478bd9Sstevel@tonic-gate } 767c478bd9Sstevel@tonic-gate free(rsap); 777c478bd9Sstevel@tonic-gate thr_exit(NULL); 787c478bd9Sstevel@tonic-gate return (NULL); 797c478bd9Sstevel@tonic-gate } 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* 827c478bd9Sstevel@tonic-gate * User-space "creator" thread. This thread blocks in the kernel 837c478bd9Sstevel@tonic-gate * until new worker threads need to be created for the service 847c478bd9Sstevel@tonic-gate * pool. On return to userspace, if there is no error, create a 857c478bd9Sstevel@tonic-gate * new thread for the service pool. 867c478bd9Sstevel@tonic-gate */ 877c478bd9Sstevel@tonic-gate static void * 887c478bd9Sstevel@tonic-gate svcblock(void *arg) 897c478bd9Sstevel@tonic-gate { 907c478bd9Sstevel@tonic-gate int id = (int)arg; 917c478bd9Sstevel@tonic-gate 927c478bd9Sstevel@tonic-gate /* CONSTCOND */ 937c478bd9Sstevel@tonic-gate while (1) { 947c478bd9Sstevel@tonic-gate thread_t tid; 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate /* 977c478bd9Sstevel@tonic-gate * Call into the kernel, and hang out there 987c478bd9Sstevel@tonic-gate * until a thread needs to be created. 997c478bd9Sstevel@tonic-gate */ 100f9c1591dSVallish Vaidyeshwara if (_nfssys(SVCPOOL_WAIT, &id) < 0) { 101*f7b93e0cSVallish Vaidyeshwara if (errno == ECANCELED || errno == EINTR || 102*f7b93e0cSVallish Vaidyeshwara errno == EBUSY) 1037c478bd9Sstevel@tonic-gate /* 104*f7b93e0cSVallish Vaidyeshwara * If we get back ECANCELED or EINTR, 105*f7b93e0cSVallish Vaidyeshwara * the service pool is exiting, and we 106*f7b93e0cSVallish Vaidyeshwara * may as well clean up this thread. If 107*f7b93e0cSVallish Vaidyeshwara * EBUSY is returned, there's already a 108*f7b93e0cSVallish Vaidyeshwara * thread looping on this pool, so we 109*f7b93e0cSVallish Vaidyeshwara * should give up. 1107c478bd9Sstevel@tonic-gate */ 1117c478bd9Sstevel@tonic-gate break; 1127c478bd9Sstevel@tonic-gate else 1137c478bd9Sstevel@tonic-gate continue; 1147c478bd9Sstevel@tonic-gate } 1157c478bd9Sstevel@tonic-gate 1167c478bd9Sstevel@tonic-gate /* 1177c478bd9Sstevel@tonic-gate * User portion of the thread does no real work since 1187c478bd9Sstevel@tonic-gate * the svcpool threads actually spend their entire 1197c478bd9Sstevel@tonic-gate * lives in the kernel. So, user portion of the thread 1207c478bd9Sstevel@tonic-gate * should have the smallest stack possible. 1217c478bd9Sstevel@tonic-gate */ 1227c478bd9Sstevel@tonic-gate (void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id, 1237c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid); 1247c478bd9Sstevel@tonic-gate } 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate thr_exit(NULL); 1277c478bd9Sstevel@tonic-gate return (NULL); 1287c478bd9Sstevel@tonic-gate } 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate void 1317c478bd9Sstevel@tonic-gate svcsetprio(void) 1327c478bd9Sstevel@tonic-gate { 1337c478bd9Sstevel@tonic-gate pcinfo_t pcinfo; 1347c478bd9Sstevel@tonic-gate pri_t maxupri; 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate /* 1377c478bd9Sstevel@tonic-gate * By default, all threads should be part of the FX scheduler 1387c478bd9Sstevel@tonic-gate * class. As nfsd/lockd server threads used to be part of the 1397c478bd9Sstevel@tonic-gate * kernel, they're used to being scheduled in the SYS class. 1407c478bd9Sstevel@tonic-gate * Userland threads shouldn't be in SYS, but they can be given a 1417c478bd9Sstevel@tonic-gate * higher priority by default. This change still renders nfsd/lockd 1427c478bd9Sstevel@tonic-gate * managable by an admin by utilizing commands to change scheduling 1437c478bd9Sstevel@tonic-gate * manually, or by using resource management tools such as pools 1447c478bd9Sstevel@tonic-gate * to associate them with a different scheduling class and segregate 1457c478bd9Sstevel@tonic-gate * the workload. 1467c478bd9Sstevel@tonic-gate * 1477c478bd9Sstevel@tonic-gate * We set the threads' priority to the upper bound for priorities 1487c478bd9Sstevel@tonic-gate * in FX. This should be 60, but since the desired action is to 1497c478bd9Sstevel@tonic-gate * make nfsd/lockd more important than TS threads, we bow to the 1507c478bd9Sstevel@tonic-gate * system's knowledge rather than setting it manually. Furthermore, 1517c478bd9Sstevel@tonic-gate * since the SYS class doesn't timeslice, use an "infinite" quantum. 1527c478bd9Sstevel@tonic-gate * If anything fails, just log the failure and let the daemon 1537c478bd9Sstevel@tonic-gate * default to TS. 1547c478bd9Sstevel@tonic-gate * 1557c478bd9Sstevel@tonic-gate * The change of scheduling class is expected to fail in a non-global 1567c478bd9Sstevel@tonic-gate * zone, so we avoid worrying the zone administrator unnecessarily. 1577c478bd9Sstevel@tonic-gate */ 1587c478bd9Sstevel@tonic-gate (void) strcpy(pcinfo.pc_clname, "FX"); 1597c478bd9Sstevel@tonic-gate if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) { 1607c478bd9Sstevel@tonic-gate maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri; 1617c478bd9Sstevel@tonic-gate if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX", 1627c478bd9Sstevel@tonic-gate FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri, 1637c478bd9Sstevel@tonic-gate FX_KY_TQNSECS, FX_TQINF, NULL) != 0 && 1647c478bd9Sstevel@tonic-gate getzoneid() == GLOBAL_ZONEID) 1657c478bd9Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to use FX scheduler: " 1667c478bd9Sstevel@tonic-gate "%m. Using system default scheduler."); 1677c478bd9Sstevel@tonic-gate } else 1687c478bd9Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to determine parameters " 1697c478bd9Sstevel@tonic-gate "for FX scheduler. Using system default scheduler."); 1707c478bd9Sstevel@tonic-gate } 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate int 1737c478bd9Sstevel@tonic-gate svcrdma(int id, int versmin, int versmax, int delegation) 1747c478bd9Sstevel@tonic-gate { 1757c478bd9Sstevel@tonic-gate thread_t tid; 1767c478bd9Sstevel@tonic-gate struct rdma_svc_args *rsa; 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args)); 1797c478bd9Sstevel@tonic-gate rsa->poolid = (uint32_t)id; 1807c478bd9Sstevel@tonic-gate rsa->netid = NULL; 1817c478bd9Sstevel@tonic-gate rsa->nfs_versmin = versmin; 1827c478bd9Sstevel@tonic-gate rsa->nfs_versmax = versmax; 1837c478bd9Sstevel@tonic-gate rsa->delegation = delegation; 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * Create a thread to handle RDMA start and stop. 1877c478bd9Sstevel@tonic-gate */ 1887c478bd9Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa, 1897c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid)) 1907c478bd9Sstevel@tonic-gate return (1); 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate return (0); 1937c478bd9Sstevel@tonic-gate } 1947c478bd9Sstevel@tonic-gate 1957c478bd9Sstevel@tonic-gate int 1967c478bd9Sstevel@tonic-gate svcwait(int id) 1977c478bd9Sstevel@tonic-gate { 1987c478bd9Sstevel@tonic-gate thread_t tid; 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate /* 2017c478bd9Sstevel@tonic-gate * Create a bound thread to wait for kernel LWPs that 2027c478bd9Sstevel@tonic-gate * need to be created. This thread also has little need 2037c478bd9Sstevel@tonic-gate * of stackspace, so should be created with that in mind. 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id, 2067c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid)) 2077c478bd9Sstevel@tonic-gate return (1); 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate return (0); 2107c478bd9Sstevel@tonic-gate } 211