xref: /freebsd/sys/fs/nfsserver/nfs_fha_new.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
5  * Copyright (c) 2013 Spectra Logic Corporation
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/types.h>
33 #include <sys/mbuf.h>
34 #include <sys/sbuf.h>
35 
36 #include <fs/nfs/nfsport.h>
37 #include <fs/nfsserver/nfs_fha_new.h>
38 
39 #include <rpc/rpc.h>
40 
41 static MALLOC_DEFINE(M_NFS_FHA, "NFS FHA", "NFS FHA");
42 
43 static void		fhanew_init(void *foo);
44 static void		fhanew_uninit(void *foo);
45 static rpcproc_t	fhanew_get_procnum(rpcproc_t procnum);
46 static int		fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md,
47 			    caddr_t *dpos);
48 static int		fhanew_is_read(rpcproc_t procnum);
49 static int		fhanew_is_write(rpcproc_t procnum);
50 static int		fhanew_get_offset(struct mbuf **md, caddr_t *dpos,
51 			    int v3, struct fha_info *info);
52 static int		fhanew_no_offset(rpcproc_t procnum);
53 static void		fhanew_set_locktype(rpcproc_t procnum,
54 			    struct fha_info *info);
55 static int		fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS);
56 static void		fha_extract_info(struct svc_req *req,
57 			    struct fha_info *i);
58 
59 static struct fha_params fhanew_softc;
60 
61 SYSCTL_DECL(_vfs_nfsd);
62 
63 extern int newnfs_nfsv3_procid[];
64 extern SVCPOOL	*nfsrvd_pool;
65 
66 SYSINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_init, NULL);
67 SYSUNINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_uninit, NULL);
68 
69 static void
70 fhanew_init(void *foo)
71 {
72 	struct fha_params *softc;
73 	int i;
74 
75 	softc = &fhanew_softc;
76 
77 	bzero(softc, sizeof(*softc));
78 
79 	snprintf(softc->server_name, sizeof(softc->server_name),
80 	    FHANEW_SERVER_NAME);
81 
82 	softc->pool = &nfsrvd_pool;
83 
84 	/*
85 	 * Initialize the sysctl context list for the fha module.
86 	 */
87 	sysctl_ctx_init(&softc->sysctl_ctx);
88 	softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
89 	    SYSCTL_STATIC_CHILDREN(_vfs_nfsd), OID_AUTO, "fha",
90 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "NFS File Handle Affinity (FHA)");
91 	if (softc->sysctl_tree == NULL) {
92 		printf("%s: unable to allocate sysctl tree\n", __func__);
93 		return;
94 	}
95 
96 	for (i = 0; i < FHA_HASH_SIZE; i++)
97 		mtx_init(&softc->fha_hash[i].mtx, "fhalock", NULL, MTX_DEF);
98 
99 	/*
100 	 * Set the default tuning parameters.
101 	 */
102 	softc->ctls.enable = FHA_DEF_ENABLE;
103 	softc->ctls.read = FHA_DEF_READ;
104 	softc->ctls.write = FHA_DEF_WRITE;
105 	softc->ctls.bin_shift = FHA_DEF_BIN_SHIFT;
106 	softc->ctls.max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH;
107 	softc->ctls.max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD;
108 
109 	/*
110 	 * Add sysctls so the user can change the tuning parameters.
111 	 */
112 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
113 	    OID_AUTO, "enable", CTLFLAG_RWTUN,
114 	    &softc->ctls.enable, 0, "Enable NFS File Handle Affinity (FHA)");
115 
116 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
117 	    OID_AUTO, "read", CTLFLAG_RWTUN,
118 	    &softc->ctls.read, 0, "Enable NFS FHA read locality");
119 
120 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
121 	    OID_AUTO, "write", CTLFLAG_RWTUN,
122 	    &softc->ctls.write, 0, "Enable NFS FHA write locality");
123 
124 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
125 	    OID_AUTO, "bin_shift", CTLFLAG_RWTUN,
126 	    &softc->ctls.bin_shift, 0,
127 	    "Maximum locality distance 2^(bin_shift) bytes");
128 
129 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
130 	    OID_AUTO, "max_nfsds_per_fh", CTLFLAG_RWTUN,
131 	    &softc->ctls.max_nfsds_per_fh, 0, "Maximum nfsd threads that "
132 	    "should be working on requests for the same file handle");
133 
134 	SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
135 	    OID_AUTO, "max_reqs_per_nfsd", CTLFLAG_RWTUN,
136 	    &softc->ctls.max_reqs_per_nfsd, 0, "Maximum requests that "
137 	    "single nfsd thread should be working on at any time");
138 
139 	SYSCTL_ADD_OID(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
140 	    OID_AUTO, "fhe_stats", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
141 	    0, 0, fhenew_stats_sysctl, "A", "");
142 }
143 
144 static void
145 fhanew_uninit(void *foo)
146 {
147 	struct fha_params *softc;
148 	int i;
149 
150 	softc = &fhanew_softc;
151 
152 	sysctl_ctx_free(&softc->sysctl_ctx);
153 	for (i = 0; i < FHA_HASH_SIZE; i++)
154 		mtx_destroy(&softc->fha_hash[i].mtx);
155 }
156 
157 static rpcproc_t
158 fhanew_get_procnum(rpcproc_t procnum)
159 {
160 	if (procnum > NFSV2PROC_STATFS)
161 		return (-1);
162 
163 	return (newnfs_nfsv3_procid[procnum]);
164 }
165 
166 static int
167 fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos)
168 {
169 	struct nfsrv_descript lnd, *nd;
170 	uint32_t *tl;
171 	uint8_t *buf;
172 	uint64_t t;
173 	int error, len, i;
174 
175 	error = 0;
176 	len = 0;
177 	nd = &lnd;
178 
179 	nd->nd_md = *md;
180 	nd->nd_dpos = *dpos;
181 
182 	if (v3) {
183 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED);
184 		if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) {
185 			error = EBADRPC;
186 			goto nfsmout;
187 		}
188 	} else {
189 		len = NFSX_V2FH;
190 	}
191 
192 	t = 0;
193 	if (len != 0) {
194 		NFSM_DISSECT_NONBLOCK(buf, uint8_t *, len);
195 		for (i = 0; i < len; i++)
196 			t ^= ((uint64_t)buf[i] << (i & 7) * 8);
197 	}
198 	*fh = t;
199 
200 nfsmout:
201 	*md = nd->nd_md;
202 	*dpos = nd->nd_dpos;
203 
204 	return (error);
205 }
206 
207 static int
208 fhanew_is_read(rpcproc_t procnum)
209 {
210 	if (procnum == NFSPROC_READ)
211 		return (1);
212 	else
213 		return (0);
214 }
215 
216 static int
217 fhanew_is_write(rpcproc_t procnum)
218 {
219 	if (procnum == NFSPROC_WRITE)
220 		return (1);
221 	else
222 		return (0);
223 }
224 
225 static int
226 fhanew_get_offset(struct mbuf **md, caddr_t *dpos, int v3,
227     struct fha_info *info)
228 {
229 	struct nfsrv_descript lnd, *nd;
230 	uint32_t *tl;
231 	int error;
232 
233 	error = 0;
234 
235 	nd = &lnd;
236 	nd->nd_md = *md;
237 	nd->nd_dpos = *dpos;
238 
239 	if (v3) {
240 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, 2 * NFSX_UNSIGNED);
241 		info->offset = fxdr_hyper(tl);
242 	} else {
243 		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED);
244 		info->offset = fxdr_unsigned(uint32_t, *tl);
245 	}
246 
247 nfsmout:
248 	*md = nd->nd_md;
249 	*dpos = nd->nd_dpos;
250 
251 	return (error);
252 }
253 
254 static int
255 fhanew_no_offset(rpcproc_t procnum)
256 {
257 	if (procnum == NFSPROC_FSSTAT ||
258 	    procnum == NFSPROC_FSINFO ||
259 	    procnum == NFSPROC_PATHCONF ||
260 	    procnum == NFSPROC_NOOP ||
261 	    procnum == NFSPROC_NULL)
262 		return (1);
263 	else
264 		return (0);
265 }
266 
267 static void
268 fhanew_set_locktype(rpcproc_t procnum, struct fha_info *info)
269 {
270 	switch (procnum) {
271 	case NFSPROC_NULL:
272 	case NFSPROC_GETATTR:
273 	case NFSPROC_LOOKUP:
274 	case NFSPROC_ACCESS:
275 	case NFSPROC_READLINK:
276 	case NFSPROC_READ:
277 	case NFSPROC_READDIR:
278 	case NFSPROC_READDIRPLUS:
279 	case NFSPROC_WRITE:
280 		info->locktype = LK_SHARED;
281 		break;
282 	case NFSPROC_SETATTR:
283 	case NFSPROC_CREATE:
284 	case NFSPROC_MKDIR:
285 	case NFSPROC_SYMLINK:
286 	case NFSPROC_MKNOD:
287 	case NFSPROC_REMOVE:
288 	case NFSPROC_RMDIR:
289 	case NFSPROC_RENAME:
290 	case NFSPROC_LINK:
291 	case NFSPROC_FSSTAT:
292 	case NFSPROC_FSINFO:
293 	case NFSPROC_PATHCONF:
294 	case NFSPROC_COMMIT:
295 	case NFSPROC_NOOP:
296 		info->locktype = LK_EXCLUSIVE;
297 		break;
298 	}
299 }
300 
301 /*
302  * This just specifies that offsets should obey affinity when within
303  * the same 1Mbyte (1<<20) chunk for the file (reads only for now).
304  */
305 static void
306 fha_extract_info(struct svc_req *req, struct fha_info *i)
307 {
308 	struct mbuf *md;
309 	caddr_t dpos;
310 	static u_int64_t random_fh = 0;
311 	int error;
312 	int v3 = (req->rq_vers == 3);
313 	rpcproc_t procnum;
314 
315 	/*
316 	 * We start off with a random fh.  If we get a reasonable
317 	 * procnum, we set the fh.  If there's a concept of offset
318 	 * that we're interested in, we set that.
319 	 */
320 	i->fh = ++random_fh;
321 	i->offset = 0;
322 	i->locktype = LK_EXCLUSIVE;
323 	i->read = i->write = 0;
324 
325 	/*
326 	 * Extract the procnum and convert to v3 form if necessary,
327 	 * taking care to deal with out-of-range procnums.  Caller will
328 	 * ensure that rq_vers is either 2 or 3.
329 	 */
330 	procnum = req->rq_proc;
331 	if (!v3) {
332 		rpcproc_t tmp_procnum;
333 
334 		tmp_procnum = fhanew_get_procnum(procnum);
335 		if (tmp_procnum == -1)
336 			goto out;
337 		procnum = tmp_procnum;
338 	}
339 
340 	/*
341 	 * We do affinity for most.  However, we divide a realm of affinity
342 	 * by file offset so as to allow for concurrent random access.  We
343 	 * only do this for reads today, but this may change when IFS supports
344 	 * efficient concurrent writes.
345 	 */
346 	if (fhanew_no_offset(procnum))
347 		goto out;
348 
349 	i->read = fhanew_is_read(procnum);
350 	i->write = fhanew_is_write(procnum);
351 
352 	error = newnfs_realign(&req->rq_args, M_NOWAIT);
353 	if (error)
354 		goto out;
355 	md = req->rq_args;
356 	dpos = mtod(md, caddr_t);
357 
358 	/* Grab the filehandle. */
359 	error = fhanew_get_fh(&i->fh, v3, &md, &dpos);
360 	if (error)
361 		goto out;
362 
363 	/* Content ourselves with zero offset for all but reads. */
364 	if (i->read || i->write)
365 		fhanew_get_offset(&md, &dpos, v3, i);
366 
367 out:
368 	fhanew_set_locktype(procnum, i);
369 }
370 
371 static struct fha_hash_entry *
372 fha_hash_entry_new(u_int64_t fh)
373 {
374 	struct fha_hash_entry *e;
375 
376 	e = malloc(sizeof(*e), M_NFS_FHA, M_WAITOK);
377 	e->fh = fh;
378 	e->num_rw = 0;
379 	e->num_exclusive = 0;
380 	e->num_threads = 0;
381 	LIST_INIT(&e->threads);
382 
383 	return (e);
384 }
385 
386 static void
387 fha_hash_entry_destroy(struct fha_hash_entry *e)
388 {
389 
390 	mtx_assert(e->mtx, MA_OWNED);
391 	KASSERT(e->num_rw == 0,
392 	    ("%d reqs on destroyed fhe %p", e->num_rw, e));
393 	KASSERT(e->num_exclusive == 0,
394 	    ("%d exclusive reqs on destroyed fhe %p", e->num_exclusive, e));
395 	KASSERT(e->num_threads == 0,
396 	    ("%d threads on destroyed fhe %p", e->num_threads, e));
397 	free(e, M_NFS_FHA);
398 }
399 
400 static void
401 fha_hash_entry_remove(struct fha_hash_entry *e)
402 {
403 
404 	mtx_assert(e->mtx, MA_OWNED);
405 	LIST_REMOVE(e, link);
406 	fha_hash_entry_destroy(e);
407 }
408 
409 static struct fha_hash_entry *
410 fha_hash_entry_lookup(struct fha_params *softc, u_int64_t fh)
411 {
412 	struct fha_hash_slot *fhs;
413 	struct fha_hash_entry *fhe, *new_fhe;
414 
415 	fhs = &softc->fha_hash[fh % FHA_HASH_SIZE];
416 	new_fhe = fha_hash_entry_new(fh);
417 	new_fhe->mtx = &fhs->mtx;
418 	mtx_lock(&fhs->mtx);
419 	LIST_FOREACH(fhe, &fhs->list, link)
420 		if (fhe->fh == fh)
421 			break;
422 	if (!fhe) {
423 		fhe = new_fhe;
424 		LIST_INSERT_HEAD(&fhs->list, fhe, link);
425 	} else
426 		fha_hash_entry_destroy(new_fhe);
427 	return (fhe);
428 }
429 
430 static void
431 fha_hash_entry_add_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
432 {
433 
434 	mtx_assert(fhe->mtx, MA_OWNED);
435 	thread->st_p2 = 0;
436 	LIST_INSERT_HEAD(&fhe->threads, thread, st_alink);
437 	fhe->num_threads++;
438 }
439 
440 static void
441 fha_hash_entry_remove_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
442 {
443 
444 	mtx_assert(fhe->mtx, MA_OWNED);
445 	KASSERT(thread->st_p2 == 0,
446 	    ("%d reqs on removed thread %p", thread->st_p2, thread));
447 	LIST_REMOVE(thread, st_alink);
448 	fhe->num_threads--;
449 }
450 
451 /*
452  * Account for an ongoing operation associated with this file.
453  */
454 static void
455 fha_hash_entry_add_op(struct fha_hash_entry *fhe, int locktype, int count)
456 {
457 
458 	mtx_assert(fhe->mtx, MA_OWNED);
459 	if (LK_EXCLUSIVE == locktype)
460 		fhe->num_exclusive += count;
461 	else
462 		fhe->num_rw += count;
463 }
464 
465 /*
466  * Get the service thread currently associated with the fhe that is
467  * appropriate to handle this operation.
468  */
469 static SVCTHREAD *
470 fha_hash_entry_choose_thread(struct fha_params *softc,
471     struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread)
472 {
473 	SVCTHREAD *thread, *min_thread = NULL;
474 	int req_count, min_count = 0;
475 	off_t offset1, offset2;
476 
477 	LIST_FOREACH(thread, &fhe->threads, st_alink) {
478 		req_count = thread->st_p2;
479 
480 		/* If there are any writes in progress, use the first thread. */
481 		if (fhe->num_exclusive) {
482 #if 0
483 			ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
484 			    "fha: %p(%d)w", thread, req_count);
485 #endif
486 			return (thread);
487 		}
488 
489 		/* Check whether we should consider locality. */
490 		if ((i->read && !softc->ctls.read) ||
491 		    (i->write && !softc->ctls.write))
492 			goto noloc;
493 
494 		/*
495 		 * Check for locality, making sure that we won't
496 		 * exceed our per-thread load limit in the process.
497 		 */
498 		offset1 = i->offset;
499 		offset2 = thread->st_p3;
500 
501 		if (((offset1 >= offset2)
502 		  && ((offset1 - offset2) < (1 << softc->ctls.bin_shift)))
503 		 || ((offset2 > offset1)
504 		  && ((offset2 - offset1) < (1 << softc->ctls.bin_shift)))) {
505 			if ((softc->ctls.max_reqs_per_nfsd == 0) ||
506 			    (req_count < softc->ctls.max_reqs_per_nfsd)) {
507 #if 0
508 				ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
509 				    "fha: %p(%d)r", thread, req_count);
510 #endif
511 				return (thread);
512 			}
513 		}
514 
515 noloc:
516 		/*
517 		 * We don't have a locality match, so skip this thread,
518 		 * but keep track of the most attractive thread in case
519 		 * we need to come back to it later.
520 		 */
521 #if 0
522 		ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
523 		    "fha: %p(%d)s off1 %llu off2 %llu", thread,
524 		    req_count, offset1, offset2);
525 #endif
526 		if ((min_thread == NULL) || (req_count < min_count)) {
527 			min_count = req_count;
528 			min_thread = thread;
529 		}
530 	}
531 
532 	/*
533 	 * We didn't find a good match yet.  See if we can add
534 	 * a new thread to this file handle entry's thread list.
535 	 */
536 	if ((softc->ctls.max_nfsds_per_fh == 0) ||
537 	    (fhe->num_threads < softc->ctls.max_nfsds_per_fh)) {
538 		thread = this_thread;
539 #if 0
540 		ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
541 		    "fha: %p(%d)t", thread, thread->st_p2);
542 #endif
543 		fha_hash_entry_add_thread(fhe, thread);
544 	} else {
545 		/*
546 		 * We don't want to use any more threads for this file, so
547 		 * go back to the most attractive nfsd we're already using.
548 		 */
549 		thread = min_thread;
550 	}
551 
552 	return (thread);
553 }
554 
555 /*
556  * After getting a request, try to assign it to some thread.  Usually we
557  * handle it ourselves.
558  */
559 SVCTHREAD *
560 fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req)
561 {
562 	struct fha_params *softc = &fhanew_softc;
563 	SVCTHREAD *thread;
564 	struct fha_info i;
565 	struct fha_hash_entry *fhe;
566 
567 	/* Check to see whether we're enabled. */
568 	if (softc->ctls.enable == 0)
569 		goto thist;
570 
571 	/*
572 	 * Only do placement if this is an NFS request.
573 	 */
574 	if (req->rq_prog != NFS_PROG)
575 		goto thist;
576 
577 	if (req->rq_vers != 2 && req->rq_vers != 3)
578 		goto thist;
579 
580 	fha_extract_info(req, &i);
581 
582 	/*
583 	 * We save the offset associated with this request for later
584 	 * nfsd matching.
585 	 */
586 	fhe = fha_hash_entry_lookup(softc, i.fh);
587 	req->rq_p1 = fhe;
588 	req->rq_p2 = i.locktype;
589 	req->rq_p3 = i.offset;
590 
591 	/*
592 	 * Choose a thread, taking into consideration locality, thread load,
593 	 * and the number of threads already working on this file.
594 	 */
595 	thread = fha_hash_entry_choose_thread(softc, fhe, &i, this_thread);
596 	KASSERT(thread, ("fha_assign: NULL thread!"));
597 	fha_hash_entry_add_op(fhe, i.locktype, 1);
598 	thread->st_p2++;
599 	thread->st_p3 = i.offset;
600 
601 	/*
602 	 * Grab the pool lock here to not let chosen thread go away before
603 	 * the new request inserted to its queue while we drop fhe lock.
604 	 */
605 	mtx_lock(&thread->st_lock);
606 	mtx_unlock(fhe->mtx);
607 
608 	return (thread);
609 thist:
610 	req->rq_p1 = NULL;
611 	mtx_lock(&this_thread->st_lock);
612 	return (this_thread);
613 }
614 
615 /*
616  * Called when we're done with an operation.  The request has already
617  * been de-queued.
618  */
619 void
620 fhanew_nd_complete(SVCTHREAD *thread, struct svc_req *req)
621 {
622 	struct fha_hash_entry *fhe = req->rq_p1;
623 	struct mtx *mtx;
624 
625 	/*
626 	 * This may be called for reqs that didn't go through
627 	 * fha_assign (e.g. extra NULL ops used for RPCSEC_GSS.
628 	 */
629 	if (!fhe)
630 		return;
631 
632 	mtx = fhe->mtx;
633 	mtx_lock(mtx);
634 	fha_hash_entry_add_op(fhe, req->rq_p2, -1);
635 	thread->st_p2--;
636 	KASSERT(thread->st_p2 >= 0, ("Negative request count %d on %p",
637 	    thread->st_p2, thread));
638 	if (thread->st_p2 == 0) {
639 		fha_hash_entry_remove_thread(fhe, thread);
640 		if (0 == fhe->num_rw + fhe->num_exclusive)
641 			fha_hash_entry_remove(fhe);
642 	}
643 	mtx_unlock(mtx);
644 }
645 
646 static int
647 fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS)
648 {
649 	struct fha_params *softc = &fhanew_softc;
650 	int error, i;
651 	struct sbuf sb;
652 	struct fha_hash_entry *fhe;
653 	bool_t first, hfirst;
654 	SVCTHREAD *thread;
655 
656 	sbuf_new(&sb, NULL, 65536, SBUF_FIXEDLEN);
657 
658 	if (!*softc->pool) {
659 		sbuf_printf(&sb, "NFSD not running\n");
660 		goto out;
661 	}
662 
663 	for (i = 0; i < FHA_HASH_SIZE; i++)
664 		if (!LIST_EMPTY(&softc->fha_hash[i].list))
665 			break;
666 
667 	if (i == FHA_HASH_SIZE) {
668 		sbuf_printf(&sb, "No file handle entries.\n");
669 		goto out;
670 	}
671 
672 	hfirst = TRUE;
673 	for (; i < FHA_HASH_SIZE; i++) {
674 		mtx_lock(&softc->fha_hash[i].mtx);
675 		if (LIST_EMPTY(&softc->fha_hash[i].list)) {
676 			mtx_unlock(&softc->fha_hash[i].mtx);
677 			continue;
678 		}
679 		sbuf_printf(&sb, "%shash %d: {\n", hfirst ? "" : ", ", i);
680 		first = TRUE;
681 		LIST_FOREACH(fhe, &softc->fha_hash[i].list, link) {
682 			sbuf_printf(&sb, "%sfhe %p: {\n", first ? "  " : ", ",
683 			    fhe);
684 			sbuf_printf(&sb, "    fh: %ju\n", (uintmax_t) fhe->fh);
685 			sbuf_printf(&sb, "    num_rw/exclusive: %d/%d\n",
686 			    fhe->num_rw, fhe->num_exclusive);
687 			sbuf_printf(&sb, "    num_threads: %d\n",
688 			    fhe->num_threads);
689 
690 			LIST_FOREACH(thread, &fhe->threads, st_alink) {
691 				sbuf_printf(&sb, "      thread %p offset %ju "
692 				    "reqs %d\n", thread,
693 				    thread->st_p3, thread->st_p2);
694 			}
695 
696 			sbuf_printf(&sb, "  }");
697 			first = FALSE;
698 		}
699 		sbuf_printf(&sb, "\n}");
700 		mtx_unlock(&softc->fha_hash[i].mtx);
701 		hfirst = FALSE;
702 	}
703 
704  out:
705 	sbuf_trim(&sb);
706 	sbuf_finish(&sb);
707 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
708 	sbuf_delete(&sb);
709 	return (error);
710 }
711