xref: /freebsd/sys/fs/nfsserver/nfs_fha_new.c (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
5  * Copyright (c) 2013 Spectra Logic Corporation
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/types.h>
33 #include <sys/mbuf.h>
34 #include <sys/sbuf.h>
35 
36 #include <fs/nfs/nfsport.h>
37 #include <fs/nfsserver/nfs_fha_new.h>
38 
39 #include <rpc/rpc.h>
40 
41 static MALLOC_DEFINE(M_NFS_FHA, "NFS FHA", "NFS FHA");
42 
43 static void		fhanew_init(void *foo);
44 static void		fhanew_uninit(void *foo);
45 static rpcproc_t	fhanew_get_procnum(rpcproc_t procnum);
46 static int		fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md,
47 			    caddr_t *dpos);
48 static int		fhanew_is_read(rpcproc_t procnum);
49 static int		fhanew_is_write(rpcproc_t procnum);
50 static int		fhanew_get_offset(struct mbuf **md, caddr_t *dpos,
51 			    int v3, struct fha_info *info);
52 static int		fhanew_no_offset(rpcproc_t procnum);
53 static void		fhanew_set_locktype(rpcproc_t procnum,
54 			    struct fha_info *info);
55 static int		fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS);
56 static void		fha_extract_info(struct svc_req *req,
57 			    struct fha_info *i);
58 
59 NFSD_VNET_DEFINE_STATIC(struct fha_params *, fhanew_softc);
60 NFSD_VNET_DEFINE_STATIC(struct fha_ctls, nfsfha_ctls);
61 
62 SYSCTL_DECL(_vfs_nfsd);
63 SYSCTL_NODE(_vfs_nfsd, OID_AUTO, fha, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
64     "NFS File Handle Affinity (FHA)");
65 
66 SYSCTL_UINT(_vfs_nfsd_fha,
67     OID_AUTO, enable, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
68     &NFSD_VNET_NAME(nfsfha_ctls).enable, 0,
69     "Enable NFS File Handle Affinity (FHA)");
70 
71 SYSCTL_UINT(_vfs_nfsd_fha,
72     OID_AUTO, read, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
73     &NFSD_VNET_NAME(nfsfha_ctls).read, 0,
74     "Enable NFS FHA read locality");
75 
76 SYSCTL_UINT(_vfs_nfsd_fha,
77     OID_AUTO, write, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
78     &NFSD_VNET_NAME(nfsfha_ctls).write, 0,
79     "Enable NFS FHA write locality");
80 
81 SYSCTL_UINT(_vfs_nfsd_fha,
82     OID_AUTO, bin_shift, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
83     &NFSD_VNET_NAME(nfsfha_ctls).bin_shift, 0,
84     "Maximum locality distance 2^(bin_shift) bytes");
85 
86 SYSCTL_UINT(_vfs_nfsd_fha,
87     OID_AUTO, max_nfsds_per_fh, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
88     &NFSD_VNET_NAME(nfsfha_ctls).max_nfsds_per_fh, 0,
89     "Maximum nfsd threads that "
90     "should be working on requests for the same file handle");
91 
92 SYSCTL_UINT(_vfs_nfsd_fha,
93     OID_AUTO, max_reqs_per_nfsd, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN,
94     &NFSD_VNET_NAME(nfsfha_ctls).max_reqs_per_nfsd, 0, "Maximum requests that "
95     "single nfsd thread should be working on at any time");
96 
97 SYSCTL_PROC(_vfs_nfsd_fha, OID_AUTO, fhe_stats,
98     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
99     fhenew_stats_sysctl, "A", "");
100 
101 extern int newnfs_nfsv3_procid[];
102 
103 VNET_SYSINIT(nfs_fhanew, SI_SUB_VNET_DONE, SI_ORDER_ANY, fhanew_init, NULL);
104 VNET_SYSUNINIT(nfs_fhanew, SI_SUB_VNET_DONE, SI_ORDER_ANY, fhanew_uninit, NULL);
105 
106 static void
107 fhanew_init(void *foo)
108 {
109 	struct fha_params *softc;
110 	int i;
111 
112 	NFSD_VNET(fhanew_softc) = malloc(sizeof(struct fha_params), M_TEMP,
113 	    M_WAITOK | M_ZERO);
114 	softc = NFSD_VNET(fhanew_softc);
115 
116 	snprintf(softc->server_name, sizeof(softc->server_name),
117 	    FHANEW_SERVER_NAME);
118 
119 	for (i = 0; i < FHA_HASH_SIZE; i++)
120 		mtx_init(&softc->fha_hash[i].mtx, "fhalock", NULL, MTX_DEF);
121 
122 	/*
123 	 * Set the default tuning parameters.
124 	 */
125 	NFSD_VNET(nfsfha_ctls).enable = FHA_DEF_ENABLE;
126 	NFSD_VNET(nfsfha_ctls).read = FHA_DEF_READ;
127 	NFSD_VNET(nfsfha_ctls).write = FHA_DEF_WRITE;
128 	NFSD_VNET(nfsfha_ctls).bin_shift = FHA_DEF_BIN_SHIFT;
129 	NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH;
130 	NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD;
131 
132 }
133 
134 static void
135 fhanew_uninit(void *foo)
136 {
137 	struct fha_params *softc;
138 	int i;
139 
140 	softc = NFSD_VNET(fhanew_softc);
141 
142 	for (i = 0; i < FHA_HASH_SIZE; i++)
143 		mtx_destroy(&softc->fha_hash[i].mtx);
144 	free(softc, M_TEMP);
145 }
146 
147 static rpcproc_t
148 fhanew_get_procnum(rpcproc_t procnum)
149 {
150 	if (procnum > NFSV2PROC_STATFS)
151 		return (-1);
152 
153 	return (newnfs_nfsv3_procid[procnum]);
154 }
155 
156 static int
157 fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos)
158 {
159 	struct nfsrv_descript lnd, *nd;
160 	uint32_t *tl;
161 	uint8_t *buf;
162 	uint64_t t;
163 	int error, len, i;
164 
165 	error = 0;
166 	len = 0;
167 	nd = &lnd;
168 
169 	nd->nd_md = *md;
170 	nd->nd_dpos = *dpos;
171 
172 	if (v3) {
173 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED);
174 		if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) {
175 			error = EBADRPC;
176 			goto nfsmout;
177 		}
178 	} else {
179 		len = NFSX_V2FH;
180 	}
181 
182 	t = 0;
183 	if (len != 0) {
184 		NFSM_DISSECT_NONBLOCK(buf, uint8_t *, len);
185 		for (i = 0; i < len; i++)
186 			t ^= ((uint64_t)buf[i] << (i & 7) * 8);
187 	}
188 	*fh = t;
189 
190 nfsmout:
191 	*md = nd->nd_md;
192 	*dpos = nd->nd_dpos;
193 
194 	return (error);
195 }
196 
197 static int
198 fhanew_is_read(rpcproc_t procnum)
199 {
200 	if (procnum == NFSPROC_READ)
201 		return (1);
202 	else
203 		return (0);
204 }
205 
206 static int
207 fhanew_is_write(rpcproc_t procnum)
208 {
209 	if (procnum == NFSPROC_WRITE)
210 		return (1);
211 	else
212 		return (0);
213 }
214 
215 static int
216 fhanew_get_offset(struct mbuf **md, caddr_t *dpos, int v3,
217     struct fha_info *info)
218 {
219 	struct nfsrv_descript lnd, *nd;
220 	uint32_t *tl;
221 	int error;
222 
223 	error = 0;
224 
225 	nd = &lnd;
226 	nd->nd_md = *md;
227 	nd->nd_dpos = *dpos;
228 
229 	if (v3) {
230 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, 2 * NFSX_UNSIGNED);
231 		info->offset = fxdr_hyper(tl);
232 	} else {
233 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED);
234 		info->offset = fxdr_unsigned(uint32_t, *tl);
235 	}
236 
237 nfsmout:
238 	*md = nd->nd_md;
239 	*dpos = nd->nd_dpos;
240 
241 	return (error);
242 }
243 
244 static int
245 fhanew_no_offset(rpcproc_t procnum)
246 {
247 	if (procnum == NFSPROC_FSSTAT ||
248 	    procnum == NFSPROC_FSINFO ||
249 	    procnum == NFSPROC_PATHCONF ||
250 	    procnum == NFSPROC_NOOP ||
251 	    procnum == NFSPROC_NULL)
252 		return (1);
253 	else
254 		return (0);
255 }
256 
257 static void
258 fhanew_set_locktype(rpcproc_t procnum, struct fha_info *info)
259 {
260 	switch (procnum) {
261 	case NFSPROC_NULL:
262 	case NFSPROC_GETATTR:
263 	case NFSPROC_LOOKUP:
264 	case NFSPROC_ACCESS:
265 	case NFSPROC_READLINK:
266 	case NFSPROC_READ:
267 	case NFSPROC_READDIR:
268 	case NFSPROC_READDIRPLUS:
269 	case NFSPROC_WRITE:
270 		info->locktype = LK_SHARED;
271 		break;
272 	case NFSPROC_SETATTR:
273 	case NFSPROC_CREATE:
274 	case NFSPROC_MKDIR:
275 	case NFSPROC_SYMLINK:
276 	case NFSPROC_MKNOD:
277 	case NFSPROC_REMOVE:
278 	case NFSPROC_RMDIR:
279 	case NFSPROC_RENAME:
280 	case NFSPROC_LINK:
281 	case NFSPROC_FSSTAT:
282 	case NFSPROC_FSINFO:
283 	case NFSPROC_PATHCONF:
284 	case NFSPROC_COMMIT:
285 	case NFSPROC_NOOP:
286 		info->locktype = LK_EXCLUSIVE;
287 		break;
288 	}
289 }
290 
291 /*
292  * This just specifies that offsets should obey affinity when within
293  * the same 1Mbyte (1<<20) chunk for the file (reads only for now).
294  */
295 static void
296 fha_extract_info(struct svc_req *req, struct fha_info *i)
297 {
298 	struct mbuf *md;
299 	caddr_t dpos;
300 	static u_int64_t random_fh = 0;
301 	int error;
302 	int v3 = (req->rq_vers == 3);
303 	rpcproc_t procnum;
304 
305 	/*
306 	 * We start off with a random fh.  If we get a reasonable
307 	 * procnum, we set the fh.  If there's a concept of offset
308 	 * that we're interested in, we set that.
309 	 */
310 	i->fh = ++random_fh;
311 	i->offset = 0;
312 	i->locktype = LK_EXCLUSIVE;
313 	i->read = i->write = 0;
314 
315 	/*
316 	 * Extract the procnum and convert to v3 form if necessary,
317 	 * taking care to deal with out-of-range procnums.  Caller will
318 	 * ensure that rq_vers is either 2 or 3.
319 	 */
320 	procnum = req->rq_proc;
321 	if (!v3) {
322 		rpcproc_t tmp_procnum;
323 
324 		tmp_procnum = fhanew_get_procnum(procnum);
325 		if (tmp_procnum == -1)
326 			goto out;
327 		procnum = tmp_procnum;
328 	}
329 
330 	/*
331 	 * We do affinity for most.  However, we divide a realm of affinity
332 	 * by file offset so as to allow for concurrent random access.  We
333 	 * only do this for reads today, but this may change when IFS supports
334 	 * efficient concurrent writes.
335 	 */
336 	if (fhanew_no_offset(procnum))
337 		goto out;
338 
339 	i->read = fhanew_is_read(procnum);
340 	i->write = fhanew_is_write(procnum);
341 
342 	error = newnfs_realign(&req->rq_args, M_NOWAIT);
343 	if (error)
344 		goto out;
345 	md = req->rq_args;
346 	dpos = mtod(md, caddr_t);
347 
348 	/* Grab the filehandle. */
349 	error = fhanew_get_fh(&i->fh, v3, &md, &dpos);
350 	if (error)
351 		goto out;
352 
353 	/* Content ourselves with zero offset for all but reads. */
354 	if (i->read || i->write)
355 		fhanew_get_offset(&md, &dpos, v3, i);
356 
357 out:
358 	fhanew_set_locktype(procnum, i);
359 }
360 
361 static struct fha_hash_entry *
362 fha_hash_entry_new(u_int64_t fh)
363 {
364 	struct fha_hash_entry *e;
365 
366 	e = malloc(sizeof(*e), M_NFS_FHA, M_WAITOK);
367 	e->fh = fh;
368 	e->num_rw = 0;
369 	e->num_exclusive = 0;
370 	e->num_threads = 0;
371 	LIST_INIT(&e->threads);
372 
373 	return (e);
374 }
375 
376 static void
377 fha_hash_entry_destroy(struct fha_hash_entry *e)
378 {
379 
380 	mtx_assert(e->mtx, MA_OWNED);
381 	KASSERT(e->num_rw == 0,
382 	    ("%d reqs on destroyed fhe %p", e->num_rw, e));
383 	KASSERT(e->num_exclusive == 0,
384 	    ("%d exclusive reqs on destroyed fhe %p", e->num_exclusive, e));
385 	KASSERT(e->num_threads == 0,
386 	    ("%d threads on destroyed fhe %p", e->num_threads, e));
387 	free(e, M_NFS_FHA);
388 }
389 
390 static void
391 fha_hash_entry_remove(struct fha_hash_entry *e)
392 {
393 
394 	mtx_assert(e->mtx, MA_OWNED);
395 	LIST_REMOVE(e, link);
396 	fha_hash_entry_destroy(e);
397 }
398 
399 static struct fha_hash_entry *
400 fha_hash_entry_lookup(struct fha_params *softc, u_int64_t fh)
401 {
402 	struct fha_hash_slot *fhs;
403 	struct fha_hash_entry *fhe, *new_fhe;
404 
405 	fhs = &softc->fha_hash[fh % FHA_HASH_SIZE];
406 	new_fhe = fha_hash_entry_new(fh);
407 	new_fhe->mtx = &fhs->mtx;
408 	mtx_lock(&fhs->mtx);
409 	LIST_FOREACH(fhe, &fhs->list, link)
410 		if (fhe->fh == fh)
411 			break;
412 	if (!fhe) {
413 		fhe = new_fhe;
414 		LIST_INSERT_HEAD(&fhs->list, fhe, link);
415 	} else
416 		fha_hash_entry_destroy(new_fhe);
417 	return (fhe);
418 }
419 
420 static void
421 fha_hash_entry_add_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
422 {
423 
424 	mtx_assert(fhe->mtx, MA_OWNED);
425 	thread->st_p2 = 0;
426 	LIST_INSERT_HEAD(&fhe->threads, thread, st_alink);
427 	fhe->num_threads++;
428 }
429 
430 static void
431 fha_hash_entry_remove_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
432 {
433 
434 	mtx_assert(fhe->mtx, MA_OWNED);
435 	KASSERT(thread->st_p2 == 0,
436 	    ("%d reqs on removed thread %p", thread->st_p2, thread));
437 	LIST_REMOVE(thread, st_alink);
438 	fhe->num_threads--;
439 }
440 
441 /*
442  * Account for an ongoing operation associated with this file.
443  */
444 static void
445 fha_hash_entry_add_op(struct fha_hash_entry *fhe, int locktype, int count)
446 {
447 
448 	mtx_assert(fhe->mtx, MA_OWNED);
449 	if (LK_EXCLUSIVE == locktype)
450 		fhe->num_exclusive += count;
451 	else
452 		fhe->num_rw += count;
453 }
454 
455 /*
456  * Get the service thread currently associated with the fhe that is
457  * appropriate to handle this operation.
458  */
459 static SVCTHREAD *
460 fha_hash_entry_choose_thread(struct fha_params *softc,
461     struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread)
462 {
463 	SVCTHREAD *thread, *min_thread = NULL;
464 	int req_count, min_count = 0;
465 	off_t offset1, offset2;
466 
467 	LIST_FOREACH(thread, &fhe->threads, st_alink) {
468 		req_count = thread->st_p2;
469 
470 		/* If there are any writes in progress, use the first thread. */
471 		if (fhe->num_exclusive) {
472 #if 0
473 			ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
474 			    "fha: %p(%d)w", thread, req_count);
475 #endif
476 			return (thread);
477 		}
478 
479 		/* Check whether we should consider locality. */
480 		if ((i->read && !NFSD_VNET(nfsfha_ctls).read) ||
481 		    (i->write && !NFSD_VNET(nfsfha_ctls).write))
482 			goto noloc;
483 
484 		/*
485 		 * Check for locality, making sure that we won't
486 		 * exceed our per-thread load limit in the process.
487 		 */
488 		offset1 = i->offset;
489 		offset2 = thread->st_p3;
490 
491 		if (((offset1 >= offset2)
492 		  && ((offset1 - offset2) < (1 << NFSD_VNET(nfsfha_ctls).bin_shift)))
493 		 || ((offset2 > offset1)
494 		  && ((offset2 - offset1) < (1 << NFSD_VNET(nfsfha_ctls).bin_shift)))) {
495 			if ((NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd == 0) ||
496 			    (req_count < NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd)) {
497 #if 0
498 				ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
499 				    "fha: %p(%d)r", thread, req_count);
500 #endif
501 				return (thread);
502 			}
503 		}
504 
505 noloc:
506 		/*
507 		 * We don't have a locality match, so skip this thread,
508 		 * but keep track of the most attractive thread in case
509 		 * we need to come back to it later.
510 		 */
511 #if 0
512 		ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
513 		    "fha: %p(%d)s off1 %llu off2 %llu", thread,
514 		    req_count, offset1, offset2);
515 #endif
516 		if ((min_thread == NULL) || (req_count < min_count)) {
517 			min_count = req_count;
518 			min_thread = thread;
519 		}
520 	}
521 
522 	/*
523 	 * We didn't find a good match yet.  See if we can add
524 	 * a new thread to this file handle entry's thread list.
525 	 */
526 	if ((NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh == 0) ||
527 	    (fhe->num_threads < NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh)) {
528 		thread = this_thread;
529 #if 0
530 		ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
531 		    "fha: %p(%d)t", thread, thread->st_p2);
532 #endif
533 		fha_hash_entry_add_thread(fhe, thread);
534 	} else {
535 		/*
536 		 * We don't want to use any more threads for this file, so
537 		 * go back to the most attractive nfsd we're already using.
538 		 */
539 		thread = min_thread;
540 	}
541 
542 	return (thread);
543 }
544 
545 /*
546  * After getting a request, try to assign it to some thread.  Usually we
547  * handle it ourselves.
548  */
549 SVCTHREAD *
550 fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req)
551 {
552 	struct fha_params *softc;
553 	SVCTHREAD *thread;
554 	struct fha_info i;
555 	struct fha_hash_entry *fhe;
556 
557 	NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread));
558 	softc = NFSD_VNET(fhanew_softc);
559 	/* Check to see whether we're enabled. */
560 	if (NFSD_VNET(nfsfha_ctls).enable == 0)
561 		goto thist;
562 
563 	/*
564 	 * Only do placement if this is an NFS request.
565 	 */
566 	if (req->rq_prog != NFS_PROG)
567 		goto thist;
568 
569 	if (req->rq_vers != 2 && req->rq_vers != 3)
570 		goto thist;
571 
572 	fha_extract_info(req, &i);
573 
574 	/*
575 	 * We save the offset associated with this request for later
576 	 * nfsd matching.
577 	 */
578 	fhe = fha_hash_entry_lookup(softc, i.fh);
579 	req->rq_p1 = fhe;
580 	req->rq_p2 = i.locktype;
581 	req->rq_p3 = i.offset;
582 
583 	/*
584 	 * Choose a thread, taking into consideration locality, thread load,
585 	 * and the number of threads already working on this file.
586 	 */
587 	thread = fha_hash_entry_choose_thread(softc, fhe, &i, this_thread);
588 	KASSERT(thread, ("fha_assign: NULL thread!"));
589 	fha_hash_entry_add_op(fhe, i.locktype, 1);
590 	thread->st_p2++;
591 	thread->st_p3 = i.offset;
592 
593 	/*
594 	 * Grab the pool lock here to not let chosen thread go away before
595 	 * the new request inserted to its queue while we drop fhe lock.
596 	 */
597 	mtx_lock(&thread->st_lock);
598 	mtx_unlock(fhe->mtx);
599 
600 	NFSD_CURVNET_RESTORE();
601 	return (thread);
602 thist:
603 	req->rq_p1 = NULL;
604 	NFSD_CURVNET_RESTORE();
605 	mtx_lock(&this_thread->st_lock);
606 	return (this_thread);
607 }
608 
609 /*
610  * Called when we're done with an operation.  The request has already
611  * been de-queued.
612  */
613 void
614 fhanew_nd_complete(SVCTHREAD *thread, struct svc_req *req)
615 {
616 	struct fha_hash_entry *fhe = req->rq_p1;
617 	struct mtx *mtx;
618 
619 	NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread));
620 	/*
621 	 * This may be called for reqs that didn't go through
622 	 * fha_assign (e.g. extra NULL ops used for RPCSEC_GSS.
623 	 */
624 	if (!fhe) {
625 		NFSD_CURVNET_RESTORE();
626 		return;
627 	}
628 
629 	mtx = fhe->mtx;
630 	mtx_lock(mtx);
631 	fha_hash_entry_add_op(fhe, req->rq_p2, -1);
632 	thread->st_p2--;
633 	KASSERT(thread->st_p2 >= 0, ("Negative request count %d on %p",
634 	    thread->st_p2, thread));
635 	if (thread->st_p2 == 0) {
636 		fha_hash_entry_remove_thread(fhe, thread);
637 		if (0 == fhe->num_rw + fhe->num_exclusive)
638 			fha_hash_entry_remove(fhe);
639 	}
640 	mtx_unlock(mtx);
641 	NFSD_CURVNET_RESTORE();
642 }
643 
644 static int
645 fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS)
646 {
647 	struct fha_params *softc;
648 	int error, i;
649 	struct sbuf sb;
650 	struct fha_hash_entry *fhe;
651 	bool_t first, hfirst;
652 	SVCTHREAD *thread;
653 
654 	sbuf_new(&sb, NULL, 65536, SBUF_FIXEDLEN);
655 
656 	NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread));
657 	softc = NFSD_VNET(fhanew_softc);
658 	for (i = 0; i < FHA_HASH_SIZE; i++)
659 		if (!LIST_EMPTY(&softc->fha_hash[i].list))
660 			break;
661 
662 	if (i == FHA_HASH_SIZE) {
663 		sbuf_printf(&sb, "No file handle entries.\n");
664 		goto out;
665 	}
666 
667 	hfirst = TRUE;
668 	for (; i < FHA_HASH_SIZE; i++) {
669 		mtx_lock(&softc->fha_hash[i].mtx);
670 		if (LIST_EMPTY(&softc->fha_hash[i].list)) {
671 			mtx_unlock(&softc->fha_hash[i].mtx);
672 			continue;
673 		}
674 		sbuf_printf(&sb, "%shash %d: {\n", hfirst ? "" : ", ", i);
675 		first = TRUE;
676 		LIST_FOREACH(fhe, &softc->fha_hash[i].list, link) {
677 			sbuf_printf(&sb, "%sfhe %p: {\n", first ? "  " : ", ",
678 			    fhe);
679 			sbuf_printf(&sb, "    fh: %ju\n", (uintmax_t) fhe->fh);
680 			sbuf_printf(&sb, "    num_rw/exclusive: %d/%d\n",
681 			    fhe->num_rw, fhe->num_exclusive);
682 			sbuf_printf(&sb, "    num_threads: %d\n",
683 			    fhe->num_threads);
684 
685 			LIST_FOREACH(thread, &fhe->threads, st_alink) {
686 				sbuf_printf(&sb, "      thread %p offset %ju "
687 				    "reqs %d\n", thread,
688 				    thread->st_p3, thread->st_p2);
689 			}
690 
691 			sbuf_printf(&sb, "  }");
692 			first = FALSE;
693 		}
694 		sbuf_printf(&sb, "\n}");
695 		mtx_unlock(&softc->fha_hash[i].mtx);
696 		hfirst = FALSE;
697 	}
698 
699  out:
700 	NFSD_CURVNET_RESTORE();
701 	sbuf_trim(&sb);
702 	sbuf_finish(&sb);
703 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
704 	sbuf_delete(&sb);
705 	return (error);
706 }
707