19ec7b004SRick Macklem /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 49ec7b004SRick Macklem * Copyright (c) 1989, 1993 59ec7b004SRick Macklem * The Regents of the University of California. All rights reserved. 69ec7b004SRick Macklem * 79ec7b004SRick Macklem * This code is derived from software contributed to Berkeley by 89ec7b004SRick Macklem * Rick Macklem at The University of Guelph. 99ec7b004SRick Macklem * 109ec7b004SRick Macklem * Redistribution and use in source and binary forms, with or without 119ec7b004SRick Macklem * modification, are permitted provided that the following conditions 129ec7b004SRick Macklem * are met: 139ec7b004SRick Macklem * 1. Redistributions of source code must retain the above copyright 149ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer. 159ec7b004SRick Macklem * 2. Redistributions in binary form must reproduce the above copyright 169ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer in the 179ec7b004SRick Macklem * documentation and/or other materials provided with the distribution. 18fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 199ec7b004SRick Macklem * may be used to endorse or promote products derived from this software 209ec7b004SRick Macklem * without specific prior written permission. 219ec7b004SRick Macklem * 229ec7b004SRick Macklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 239ec7b004SRick Macklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 249ec7b004SRick Macklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 259ec7b004SRick Macklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 269ec7b004SRick Macklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 279ec7b004SRick Macklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 289ec7b004SRick Macklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 299ec7b004SRick Macklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 309ec7b004SRick Macklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 319ec7b004SRick Macklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 329ec7b004SRick Macklem * SUCH DAMAGE. 339ec7b004SRick Macklem * 349ec7b004SRick Macklem * from nfs_syscalls.c 8.5 (Berkeley) 3/30/95 359ec7b004SRick Macklem */ 369ec7b004SRick Macklem 379ec7b004SRick Macklem #include <sys/param.h> 389ec7b004SRick Macklem #include <sys/systm.h> 399ec7b004SRick Macklem #include <sys/sysproto.h> 409ec7b004SRick Macklem #include <sys/kernel.h> 419ec7b004SRick Macklem #include <sys/sysctl.h> 429ec7b004SRick Macklem #include <sys/file.h> 439ec7b004SRick Macklem #include <sys/vnode.h> 449ec7b004SRick Macklem #include <sys/malloc.h> 459ec7b004SRick Macklem #include <sys/mount.h> 469ec7b004SRick Macklem #include <sys/proc.h> 479ec7b004SRick Macklem #include <sys/bio.h> 489ec7b004SRick Macklem #include <sys/buf.h> 499ec7b004SRick Macklem #include <sys/mbuf.h> 509ec7b004SRick Macklem #include <sys/socket.h> 519ec7b004SRick Macklem #include <sys/socketvar.h> 529ec7b004SRick Macklem #include <sys/domain.h> 539ec7b004SRick Macklem #include <sys/protosw.h> 549ec7b004SRick Macklem #include <sys/namei.h> 559ec7b004SRick Macklem #include <sys/unistd.h> 569ec7b004SRick Macklem #include <sys/kthread.h> 579ec7b004SRick Macklem #include <sys/fcntl.h> 589ec7b004SRick Macklem #include <sys/lockf.h> 599ec7b004SRick Macklem #include <sys/mutex.h> 607b8c319bSRick Macklem #include <sys/taskqueue.h> 619ec7b004SRick Macklem 629ec7b004SRick Macklem #include <netinet/in.h> 639ec7b004SRick Macklem #include <netinet/tcp.h> 649ec7b004SRick Macklem 659ec7b004SRick Macklem #include <fs/nfs/nfsport.h> 669ec7b004SRick Macklem #include <fs/nfsclient/nfsmount.h> 679ec7b004SRick Macklem #include <fs/nfsclient/nfs.h> 689ec7b004SRick Macklem #include <fs/nfsclient/nfsnode.h> 699ec7b004SRick Macklem 709ec7b004SRick Macklem extern struct mtx ncl_iod_mutex; 717b8c319bSRick Macklem extern struct task ncl_nfsiodnew_task; 729ec7b004SRick Macklem 739ec7b004SRick Macklem int ncl_numasync; 747b8c319bSRick Macklem enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; 757b8c319bSRick Macklem struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; 769ec7b004SRick Macklem 779ec7b004SRick Macklem static void nfssvc_iod(void *); 789ec7b004SRick Macklem 797b8c319bSRick Macklem static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; 809ec7b004SRick Macklem 811f376590SRick Macklem SYSCTL_DECL(_vfs_nfs); 829ec7b004SRick Macklem 839ec7b004SRick Macklem /* Maximum number of seconds a nfsiod kthread will sleep before exiting */ 847b8c319bSRick Macklem static unsigned int nfs_iodmaxidle = 120; 851f376590SRick Macklem SYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0, 867b8c319bSRick Macklem "Max number of seconds an nfsiod kthread will sleep before exiting"); 879ec7b004SRick Macklem 889ec7b004SRick Macklem /* Maximum number of nfsiod kthreads */ 897b8c319bSRick Macklem unsigned int ncl_iodmax = 20; 909ec7b004SRick Macklem 919ec7b004SRick Macklem /* Minimum number of nfsiod kthreads to keep as spares */ 929ec7b004SRick Macklem static unsigned int nfs_iodmin = 0; 939ec7b004SRick Macklem 947b8c319bSRick Macklem static int nfs_nfsiodnew_sync(void); 957b8c319bSRick Macklem 969ec7b004SRick Macklem static int 979ec7b004SRick Macklem sysctl_iodmin(SYSCTL_HANDLER_ARGS) 989ec7b004SRick Macklem { 999ec7b004SRick Macklem int error, i; 1009ec7b004SRick Macklem int newmin; 1019ec7b004SRick Macklem 1029ec7b004SRick Macklem newmin = nfs_iodmin; 1039ec7b004SRick Macklem error = sysctl_handle_int(oidp, &newmin, 0, req); 1049ec7b004SRick Macklem if (error || (req->newptr == NULL)) 1059ec7b004SRick Macklem return (error); 106b662b41eSRick Macklem NFSLOCKIOD(); 1079ec7b004SRick Macklem if (newmin > ncl_iodmax) { 1089ec7b004SRick Macklem error = EINVAL; 1099ec7b004SRick Macklem goto out; 1109ec7b004SRick Macklem } 1119ec7b004SRick Macklem nfs_iodmin = newmin; 1129ec7b004SRick Macklem if (ncl_numasync >= nfs_iodmin) 1139ec7b004SRick Macklem goto out; 1149ec7b004SRick Macklem /* 1159ec7b004SRick Macklem * If the current number of nfsiod is lower 1169ec7b004SRick Macklem * than the new minimum, create some more. 1179ec7b004SRick Macklem */ 1189ec7b004SRick Macklem for (i = nfs_iodmin - ncl_numasync; i > 0; i--) 1197b8c319bSRick Macklem nfs_nfsiodnew_sync(); 1209ec7b004SRick Macklem out: 121b662b41eSRick Macklem NFSUNLOCKIOD(); 1229ec7b004SRick Macklem return (0); 1239ec7b004SRick Macklem } 1247029da5cSPawel Biernacki SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, 12595c01e9bSZhenlei Huang CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 12695c01e9bSZhenlei Huang 0, sizeof (nfs_iodmin), sysctl_iodmin, "IU", 1277b8c319bSRick Macklem "Min number of nfsiod kthreads to keep as spares"); 1289ec7b004SRick Macklem 1299ec7b004SRick Macklem static int 1309ec7b004SRick Macklem sysctl_iodmax(SYSCTL_HANDLER_ARGS) 1319ec7b004SRick Macklem { 1329ec7b004SRick Macklem int error, i; 1339ec7b004SRick Macklem int iod, newmax; 1349ec7b004SRick Macklem 1359ec7b004SRick Macklem newmax = ncl_iodmax; 1369ec7b004SRick Macklem error = sysctl_handle_int(oidp, &newmax, 0, req); 1379ec7b004SRick Macklem if (error || (req->newptr == NULL)) 1389ec7b004SRick Macklem return (error); 1397b8c319bSRick Macklem if (newmax > NFS_MAXASYNCDAEMON) 1409ec7b004SRick Macklem return (EINVAL); 141b662b41eSRick Macklem NFSLOCKIOD(); 1429ec7b004SRick Macklem ncl_iodmax = newmax; 1439ec7b004SRick Macklem if (ncl_numasync <= ncl_iodmax) 1449ec7b004SRick Macklem goto out; 1459ec7b004SRick Macklem /* 1469ec7b004SRick Macklem * If there are some asleep nfsiods that should 1479ec7b004SRick Macklem * exit, wakeup() them so that they check ncl_iodmax 1489ec7b004SRick Macklem * and exit. Those who are active will exit as 1499ec7b004SRick Macklem * soon as they finish I/O. 1509ec7b004SRick Macklem */ 1519ec7b004SRick Macklem iod = ncl_numasync - 1; 1529ec7b004SRick Macklem for (i = 0; i < ncl_numasync - ncl_iodmax; i++) { 15380169e41SRick Macklem if (ncl_iodwant[iod] == NFSIOD_AVAILABLE) 1549ec7b004SRick Macklem wakeup(&ncl_iodwant[iod]); 1559ec7b004SRick Macklem iod--; 1569ec7b004SRick Macklem } 1579ec7b004SRick Macklem out: 158b662b41eSRick Macklem NFSUNLOCKIOD(); 1599ec7b004SRick Macklem return (0); 1609ec7b004SRick Macklem } 1617029da5cSPawel Biernacki SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, 1627029da5cSPawel Biernacki CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof (ncl_iodmax), 1637029da5cSPawel Biernacki sysctl_iodmax, "IU", 1647b8c319bSRick Macklem "Max number of nfsiod kthreads"); 1659ec7b004SRick Macklem 1667b8c319bSRick Macklem static int 1677b8c319bSRick Macklem nfs_nfsiodnew_sync(void) 1689ec7b004SRick Macklem { 1699ec7b004SRick Macklem int error, i; 1709ec7b004SRick Macklem 171ee7201a7SRick Macklem NFSASSERTIOD(); 1727b8c319bSRick Macklem for (i = 0; i < ncl_iodmax; i++) { 1739ec7b004SRick Macklem if (nfs_asyncdaemon[i] == 0) { 1747b8c319bSRick Macklem nfs_asyncdaemon[i] = 1; 1759ec7b004SRick Macklem break; 1769ec7b004SRick Macklem } 17780169e41SRick Macklem } 1787b8c319bSRick Macklem if (i == ncl_iodmax) 1797b8c319bSRick Macklem return (0); 180b662b41eSRick Macklem NFSUNLOCKIOD(); 1817b8c319bSRick Macklem error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, 1827b8c319bSRick Macklem RFHIGHPID, 0, "newnfs %d", i); 183b662b41eSRick Macklem NFSLOCKIOD(); 1847b8c319bSRick Macklem if (error == 0) { 1859ec7b004SRick Macklem ncl_numasync++; 1867b8c319bSRick Macklem ncl_iodwant[i] = NFSIOD_AVAILABLE; 1877b8c319bSRick Macklem } else 1887b8c319bSRick Macklem nfs_asyncdaemon[i] = 0; 1897b8c319bSRick Macklem return (error); 1907b8c319bSRick Macklem } 1917b8c319bSRick Macklem 1927b8c319bSRick Macklem void 1937b8c319bSRick Macklem ncl_nfsiodnew_tq(__unused void *arg, int pending) 1947b8c319bSRick Macklem { 1957b8c319bSRick Macklem 196b662b41eSRick Macklem NFSLOCKIOD(); 1977b8c319bSRick Macklem while (pending > 0) { 1987b8c319bSRick Macklem pending--; 1997b8c319bSRick Macklem nfs_nfsiodnew_sync(); 2007b8c319bSRick Macklem } 201b662b41eSRick Macklem NFSUNLOCKIOD(); 2027b8c319bSRick Macklem } 2037b8c319bSRick Macklem 2047b8c319bSRick Macklem void 2057b8c319bSRick Macklem ncl_nfsiodnew(void) 2067b8c319bSRick Macklem { 2077b8c319bSRick Macklem 208ee7201a7SRick Macklem NFSASSERTIOD(); 2097b8c319bSRick Macklem taskqueue_enqueue(taskqueue_thread, &ncl_nfsiodnew_task); 2109ec7b004SRick Macklem } 2119ec7b004SRick Macklem 2129ec7b004SRick Macklem static void 2139ec7b004SRick Macklem nfsiod_setup(void *dummy) 2149ec7b004SRick Macklem { 2159ec7b004SRick Macklem int error; 2169ec7b004SRick Macklem 2171f376590SRick Macklem TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); 2189ec7b004SRick Macklem nfscl_init(); 219b662b41eSRick Macklem NFSLOCKIOD(); 2209ec7b004SRick Macklem /* Silently limit the start number of nfsiod's */ 2217b8c319bSRick Macklem if (nfs_iodmin > NFS_MAXASYNCDAEMON) 2227b8c319bSRick Macklem nfs_iodmin = NFS_MAXASYNCDAEMON; 2239ec7b004SRick Macklem 2247b8c319bSRick Macklem while (ncl_numasync < nfs_iodmin) { 2257b8c319bSRick Macklem error = nfs_nfsiodnew_sync(); 2269ec7b004SRick Macklem if (error == -1) 2277b8c319bSRick Macklem panic("nfsiod_setup: nfs_nfsiodnew failed"); 2289ec7b004SRick Macklem } 229b662b41eSRick Macklem NFSUNLOCKIOD(); 2309ec7b004SRick Macklem } 2319ec7b004SRick Macklem SYSINIT(newnfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); 2329ec7b004SRick Macklem 2339ec7b004SRick Macklem static int nfs_defect = 0; 2341f376590SRick Macklem SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, 2357b8c319bSRick Macklem "Allow nfsiods to migrate serving different mounts"); 2369ec7b004SRick Macklem 2379ec7b004SRick Macklem /* 2389ec7b004SRick Macklem * Asynchronous I/O daemons for client nfs. 2399ec7b004SRick Macklem * They do read-ahead and write-behind operations on the block I/O cache. 2409ec7b004SRick Macklem * Returns if we hit the timeout defined by the iodmaxidle sysctl. 2419ec7b004SRick Macklem */ 2429ec7b004SRick Macklem static void 2439ec7b004SRick Macklem nfssvc_iod(void *instance) 2449ec7b004SRick Macklem { 2459ec7b004SRick Macklem struct buf *bp; 2469ec7b004SRick Macklem struct nfsmount *nmp; 2479ec7b004SRick Macklem int myiod, timo; 2489ec7b004SRick Macklem int error = 0; 2499ec7b004SRick Macklem 250b662b41eSRick Macklem NFSLOCKIOD(); 2519ec7b004SRick Macklem myiod = (int *)instance - nfs_asyncdaemon; 2529ec7b004SRick Macklem /* 2539ec7b004SRick Macklem * Main loop 2549ec7b004SRick Macklem */ 2559ec7b004SRick Macklem for (;;) { 2569ec7b004SRick Macklem while (((nmp = ncl_iodmount[myiod]) == NULL) 2579ec7b004SRick Macklem || !TAILQ_FIRST(&nmp->nm_bufq)) { 2589ec7b004SRick Macklem if (myiod >= ncl_iodmax) 2599ec7b004SRick Macklem goto finish; 2609ec7b004SRick Macklem if (nmp) 2619ec7b004SRick Macklem nmp->nm_bufqiods--; 26280169e41SRick Macklem if (ncl_iodwant[myiod] == NFSIOD_NOT_AVAILABLE) 26380169e41SRick Macklem ncl_iodwant[myiod] = NFSIOD_AVAILABLE; 2649ec7b004SRick Macklem ncl_iodmount[myiod] = NULL; 2659ec7b004SRick Macklem /* 2669ec7b004SRick Macklem * Always keep at least nfs_iodmin kthreads. 2679ec7b004SRick Macklem */ 2687b8c319bSRick Macklem timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; 2699ec7b004SRick Macklem error = msleep(&ncl_iodwant[myiod], &ncl_iod_mutex, PWAIT | PCATCH, 2709ec7b004SRick Macklem "-", timo); 2719ec7b004SRick Macklem if (error) { 2729ec7b004SRick Macklem nmp = ncl_iodmount[myiod]; 2739ec7b004SRick Macklem /* 2749ec7b004SRick Macklem * Rechecking the nm_bufq closes a rare race where the 2759ec7b004SRick Macklem * nfsiod is woken up at the exact time the idle timeout 2769ec7b004SRick Macklem * fires 2779ec7b004SRick Macklem */ 2789ec7b004SRick Macklem if (nmp && TAILQ_FIRST(&nmp->nm_bufq)) 2799ec7b004SRick Macklem error = 0; 2809ec7b004SRick Macklem break; 2819ec7b004SRick Macklem } 2829ec7b004SRick Macklem } 2839ec7b004SRick Macklem if (error) 2849ec7b004SRick Macklem break; 2859ec7b004SRick Macklem while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { 2869ec7b004SRick Macklem /* Take one off the front of the list */ 2879ec7b004SRick Macklem TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); 2889ec7b004SRick Macklem nmp->nm_bufqlen--; 2899ec7b004SRick Macklem if (nmp->nm_bufqwant && nmp->nm_bufqlen <= ncl_numasync) { 2909ec7b004SRick Macklem nmp->nm_bufqwant = 0; 2919ec7b004SRick Macklem wakeup(&nmp->nm_bufq); 2929ec7b004SRick Macklem } 293b662b41eSRick Macklem NFSUNLOCKIOD(); 294*03a39a17SRick Macklem KASSERT((bp->b_flags & B_DIRECT) == 0, 295*03a39a17SRick Macklem ("nfssvc_iod: B_DIRECT set")); 2969ec7b004SRick Macklem if (bp->b_iocmd == BIO_READ) 29767c5c2d2SRick Macklem (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, 29867c5c2d2SRick Macklem NULL, 0); 2999ec7b004SRick Macklem else 30067c5c2d2SRick Macklem (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, 30167c5c2d2SRick Macklem NULL, 0); 302b662b41eSRick Macklem NFSLOCKIOD(); 3039ec7b004SRick Macklem /* 30464a0e848SRick Macklem * Make sure the nmp hasn't been dismounted as soon as 30564a0e848SRick Macklem * ncl_doio() completes for the last buffer. 30664a0e848SRick Macklem */ 30764a0e848SRick Macklem nmp = ncl_iodmount[myiod]; 30864a0e848SRick Macklem if (nmp == NULL) 30964a0e848SRick Macklem break; 31064a0e848SRick Macklem 31164a0e848SRick Macklem /* 3129ec7b004SRick Macklem * If there are more than one iod on this mount, then defect 3139ec7b004SRick Macklem * so that the iods can be shared out fairly between the mounts 3149ec7b004SRick Macklem */ 3159ec7b004SRick Macklem if (nfs_defect && nmp->nm_bufqiods > 1) { 3169ec7b004SRick Macklem NFS_DPF(ASYNCIO, 3179ec7b004SRick Macklem ("nfssvc_iod: iod %d defecting from mount %p\n", 3189ec7b004SRick Macklem myiod, nmp)); 3199ec7b004SRick Macklem ncl_iodmount[myiod] = NULL; 3209ec7b004SRick Macklem nmp->nm_bufqiods--; 3219ec7b004SRick Macklem break; 3229ec7b004SRick Macklem } 3239ec7b004SRick Macklem } 3249ec7b004SRick Macklem } 3259ec7b004SRick Macklem finish: 3269ec7b004SRick Macklem nfs_asyncdaemon[myiod] = 0; 3279ec7b004SRick Macklem if (nmp) 3289ec7b004SRick Macklem nmp->nm_bufqiods--; 32980169e41SRick Macklem ncl_iodwant[myiod] = NFSIOD_NOT_AVAILABLE; 3309ec7b004SRick Macklem ncl_iodmount[myiod] = NULL; 3319ec7b004SRick Macklem /* Someone may be waiting for the last nfsiod to terminate. */ 3329ec7b004SRick Macklem if (--ncl_numasync == 0) 3339ec7b004SRick Macklem wakeup(&ncl_numasync); 334b662b41eSRick Macklem NFSUNLOCKIOD(); 3359ec7b004SRick Macklem if ((error == 0) || (error == EWOULDBLOCK)) 3369ec7b004SRick Macklem kproc_exit(0); 3379ec7b004SRick Macklem /* Abnormal termination */ 3389ec7b004SRick Macklem kproc_exit(1); 3399ec7b004SRick Macklem } 340