xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c (revision df58e8b1506f241670be86a560fb6e8432043aee)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
4  *  Copyright (C) 2007 The Regents of the University of California.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7  *  UCRL-CODE-235197
8  *
9  *  This file is part of the SPL, Solaris Porting Layer.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *
24  *  Solaris Porting Layer (SPL) Proc Implementation.
25  */
26 /*
27  * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
28  */
29 
30 #include <sys/systeminfo.h>
31 #include <sys/kstat.h>
32 #include <sys/kmem.h>
33 #include <sys/kmem_cache.h>
34 #include <sys/vmem.h>
35 #include <sys/proc.h>
36 #include <linux/ctype.h>
37 #include <linux/kmod.h>
38 #include <linux/seq_file.h>
39 #include <linux/uaccess.h>
40 #include <linux/version.h>
41 #include "zfs_gitrev.h"
42 
43 #if defined(CONSTIFY_PLUGIN)
44 typedef struct ctl_table __no_const spl_ctl_table;
45 #else
46 typedef struct ctl_table spl_ctl_table;
47 #endif
48 
49 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
50 #define	CONST_CTL_TABLE		const struct ctl_table
51 #else
52 #define	CONST_CTL_TABLE		struct ctl_table
53 #endif
54 
55 static unsigned long table_min = 0;
56 static unsigned long table_max = ~0;
57 
58 static struct ctl_table_header *spl_header = NULL;
59 #ifndef HAVE_REGISTER_SYSCTL_TABLE
60 static struct ctl_table_header *spl_kmem = NULL;
61 static struct ctl_table_header *spl_kstat = NULL;
62 #endif
63 static struct proc_dir_entry *proc_spl = NULL;
64 static struct proc_dir_entry *proc_spl_kmem = NULL;
65 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
66 struct proc_dir_entry *proc_spl_kstat = NULL;
67 
68 #ifdef DEBUG_KMEM
69 static int
proc_domemused(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)70 proc_domemused(CONST_CTL_TABLE *table, int write,
71     void __user *buffer, size_t *lenp, loff_t *ppos)
72 {
73 	int rc = 0;
74 	unsigned long val;
75 	spl_ctl_table dummy = *table;
76 
77 	dummy.data = &val;
78 	dummy.proc_handler = &proc_dointvec;
79 	dummy.extra1 = &table_min;
80 	dummy.extra2 = &table_max;
81 
82 	if (write) {
83 		*ppos += *lenp;
84 	} else {
85 		val = atomic64_read((atomic64_t *)table->data);
86 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
87 	}
88 
89 	return (rc);
90 }
91 #endif /* DEBUG_KMEM */
92 
93 static int
proc_doslab(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)94 proc_doslab(CONST_CTL_TABLE *table, int write,
95     void __user *buffer, size_t *lenp, loff_t *ppos)
96 {
97 	int rc = 0;
98 	unsigned long val = 0, mask;
99 	spl_ctl_table dummy = *table;
100 	spl_kmem_cache_t *skc = NULL;
101 
102 	dummy.data = &val;
103 	dummy.proc_handler = &proc_dointvec;
104 	dummy.extra1 = &table_min;
105 	dummy.extra2 = &table_max;
106 
107 	if (write) {
108 		*ppos += *lenp;
109 	} else {
110 		down_read(&spl_kmem_cache_sem);
111 		mask = (unsigned long)table->data;
112 
113 		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
114 
115 			/* Only use slabs of the correct kmem/vmem type */
116 			if (!(skc->skc_flags & mask))
117 				continue;
118 
119 			/* Sum the specified field for selected slabs */
120 			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
121 			case KMC_TOTAL:
122 				val += skc->skc_slab_size * skc->skc_slab_total;
123 				break;
124 			case KMC_ALLOC:
125 				val += skc->skc_obj_size * skc->skc_obj_alloc;
126 				break;
127 			case KMC_MAX:
128 				val += skc->skc_obj_size * skc->skc_obj_max;
129 				break;
130 			}
131 		}
132 
133 		up_read(&spl_kmem_cache_sem);
134 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
135 	}
136 
137 	return (rc);
138 }
139 
140 static int
proc_dohostid(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)141 proc_dohostid(CONST_CTL_TABLE *table, int write,
142     void __user *buffer, size_t *lenp, loff_t *ppos)
143 {
144 	char *end, str[32];
145 	unsigned long hid;
146 	spl_ctl_table dummy = *table;
147 
148 	dummy.data = str;
149 	dummy.maxlen = sizeof (str) - 1;
150 
151 	if (!write)
152 		snprintf(str, sizeof (str), "%lx",
153 		    (unsigned long) zone_get_hostid(NULL));
154 
155 	/* always returns 0 */
156 	proc_dostring(&dummy, write, buffer, lenp, ppos);
157 
158 	if (write) {
159 		/*
160 		 * We can't use proc_doulongvec_minmax() in the write
161 		 * case here because hostid, while a hex value, has no
162 		 * leading 0x, which confuses the helper function.
163 		 */
164 
165 		hid = simple_strtoul(str, &end, 16);
166 		if (str == end)
167 			return (-EINVAL);
168 		spl_hostid = hid;
169 	}
170 
171 	return (0);
172 }
173 
174 static void
slab_seq_show_headers(struct seq_file * f)175 slab_seq_show_headers(struct seq_file *f)
176 {
177 	seq_printf(f,
178 	    "--------------------- cache ----------"
179 	    "---------------------------------------------  "
180 	    "----- slab ------  "
181 	    "---- object -----  "
182 	    "--- emergency ---\n");
183 	seq_printf(f,
184 	    "name                                  "
185 	    "  flags      size     alloc slabsize  objsize  "
186 	    "total alloc   max  "
187 	    "total alloc   max  "
188 	    "dlock alloc   max\n");
189 }
190 
191 static int
slab_seq_show(struct seq_file * f,void * p)192 slab_seq_show(struct seq_file *f, void *p)
193 {
194 	spl_kmem_cache_t *skc = p;
195 
196 	ASSERT(skc->skc_magic == SKC_MAGIC);
197 
198 	if (skc->skc_flags & KMC_SLAB) {
199 		/*
200 		 * This cache is backed by a generic Linux kmem cache which
201 		 * has its own accounting. For these caches we only track
202 		 * the number of active allocated objects that exist within
203 		 * the underlying Linux slabs. For the overall statistics of
204 		 * the underlying Linux cache please refer to /proc/slabinfo.
205 		 */
206 		spin_lock(&skc->skc_lock);
207 		uint64_t objs_allocated =
208 		    percpu_counter_sum(&skc->skc_linux_alloc);
209 		seq_printf(f, "%-36s  ", skc->skc_name);
210 		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
211 		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
212 		    (long unsigned)skc->skc_flags,
213 		    "-",
214 		    (long unsigned)(skc->skc_obj_size * objs_allocated),
215 		    "-",
216 		    (unsigned)skc->skc_obj_size,
217 		    "-", "-", "-", "-",
218 		    (long unsigned)objs_allocated,
219 		    "-", "-", "-", "-");
220 		spin_unlock(&skc->skc_lock);
221 		return (0);
222 	}
223 
224 	spin_lock(&skc->skc_lock);
225 	seq_printf(f, "%-36s  ", skc->skc_name);
226 	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
227 	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
228 	    (long unsigned)skc->skc_flags,
229 	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
230 	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
231 	    (unsigned)skc->skc_slab_size,
232 	    (unsigned)skc->skc_obj_size,
233 	    (long unsigned)skc->skc_slab_total,
234 	    (long unsigned)skc->skc_slab_alloc,
235 	    (long unsigned)skc->skc_slab_max,
236 	    (long unsigned)skc->skc_obj_total,
237 	    (long unsigned)skc->skc_obj_alloc,
238 	    (long unsigned)skc->skc_obj_max,
239 	    (long unsigned)skc->skc_obj_deadlock,
240 	    (long unsigned)skc->skc_obj_emergency,
241 	    (long unsigned)skc->skc_obj_emergency_max);
242 	spin_unlock(&skc->skc_lock);
243 	return (0);
244 }
245 
246 static void *
slab_seq_start(struct seq_file * f,loff_t * pos)247 slab_seq_start(struct seq_file *f, loff_t *pos)
248 {
249 	struct list_head *p;
250 	loff_t n = *pos;
251 
252 	down_read(&spl_kmem_cache_sem);
253 	if (!n)
254 		slab_seq_show_headers(f);
255 
256 	p = spl_kmem_cache_list.next;
257 	while (n--) {
258 		p = p->next;
259 		if (p == &spl_kmem_cache_list)
260 			return (NULL);
261 	}
262 
263 	return (list_entry(p, spl_kmem_cache_t, skc_list));
264 }
265 
266 static void *
slab_seq_next(struct seq_file * f,void * p,loff_t * pos)267 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
268 {
269 	spl_kmem_cache_t *skc = p;
270 
271 	++*pos;
272 	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
273 	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
274 }
275 
276 static void
slab_seq_stop(struct seq_file * f,void * v)277 slab_seq_stop(struct seq_file *f, void *v)
278 {
279 	up_read(&spl_kmem_cache_sem);
280 }
281 
282 static const struct seq_operations slab_seq_ops = {
283 	.show  = slab_seq_show,
284 	.start = slab_seq_start,
285 	.next  = slab_seq_next,
286 	.stop  = slab_seq_stop,
287 };
288 
289 static int
proc_slab_open(struct inode * inode,struct file * filp)290 proc_slab_open(struct inode *inode, struct file *filp)
291 {
292 	return (seq_open(filp, &slab_seq_ops));
293 }
294 
295 static const kstat_proc_op_t proc_slab_operations = {
296 #ifdef HAVE_PROC_OPS_STRUCT
297 	.proc_open	= proc_slab_open,
298 	.proc_read	= seq_read,
299 	.proc_lseek	= seq_lseek,
300 	.proc_release	= seq_release,
301 #else
302 	.open		= proc_slab_open,
303 	.read		= seq_read,
304 	.llseek		= seq_lseek,
305 	.release	= seq_release,
306 #endif
307 };
308 
309 static struct ctl_table spl_kmem_table[] = {
310 #ifdef DEBUG_KMEM
311 	{
312 		.procname	= "kmem_used",
313 		.data		= &kmem_alloc_used,
314 		.maxlen		= sizeof (atomic64_t),
315 		.mode		= 0444,
316 		.proc_handler	= &proc_domemused,
317 	},
318 	{
319 		.procname	= "kmem_max",
320 		.data		= &kmem_alloc_max,
321 		.maxlen		= sizeof (uint64_t),
322 		.extra1		= &table_min,
323 		.extra2		= &table_max,
324 		.mode		= 0444,
325 		.proc_handler	= &proc_doulongvec_minmax,
326 	},
327 #endif /* DEBUG_KMEM */
328 	{
329 		.procname	= "slab_kvmem_total",
330 		.data		= (void *)(KMC_KVMEM | KMC_TOTAL),
331 		.maxlen		= sizeof (unsigned long),
332 		.extra1		= &table_min,
333 		.extra2		= &table_max,
334 		.mode		= 0444,
335 		.proc_handler	= &proc_doslab,
336 	},
337 	{
338 		.procname	= "slab_kvmem_alloc",
339 		.data		= (void *)(KMC_KVMEM | KMC_ALLOC),
340 		.maxlen		= sizeof (unsigned long),
341 		.extra1		= &table_min,
342 		.extra2		= &table_max,
343 		.mode		= 0444,
344 		.proc_handler	= &proc_doslab,
345 	},
346 	{
347 		.procname	= "slab_kvmem_max",
348 		.data		= (void *)(KMC_KVMEM | KMC_MAX),
349 		.maxlen		= sizeof (unsigned long),
350 		.extra1		= &table_min,
351 		.extra2		= &table_max,
352 		.mode		= 0444,
353 		.proc_handler	= &proc_doslab,
354 	},
355 	{},
356 };
357 
358 static struct ctl_table spl_kstat_table[] = {
359 	{},
360 };
361 
362 static struct ctl_table spl_table[] = {
363 	/*
364 	 * NB No .strategy entries have been provided since
365 	 * sysctl(8) prefers to go via /proc for portability.
366 	 */
367 	{
368 		.procname	= "gitrev",
369 		.data		= (char *)ZFS_META_GITREV,
370 		.maxlen		= sizeof (ZFS_META_GITREV),
371 		.mode		= 0444,
372 		.proc_handler	= &proc_dostring,
373 	},
374 	{
375 		.procname	= "hostid",
376 		.data		= &spl_hostid,
377 		.maxlen		= sizeof (unsigned long),
378 		.mode		= 0644,
379 		.proc_handler	= &proc_dohostid,
380 	},
381 #ifdef HAVE_REGISTER_SYSCTL_TABLE
382 	{
383 		.procname	= "kmem",
384 		.mode		= 0555,
385 		.child		= spl_kmem_table,
386 	},
387 	{
388 		.procname	= "kstat",
389 		.mode		= 0555,
390 		.child		= spl_kstat_table,
391 	},
392 #endif
393 	{},
394 };
395 
396 #ifdef HAVE_REGISTER_SYSCTL_TABLE
397 static struct ctl_table spl_dir[] = {
398 	{
399 		.procname	= "spl",
400 		.mode		= 0555,
401 		.child		= spl_table,
402 	},
403 	{}
404 };
405 
406 static struct ctl_table spl_root[] = {
407 	{
408 		.procname	= "kernel",
409 		.mode		= 0555,
410 		.child		= spl_dir,
411 	},
412 	{}
413 };
414 #endif
415 
spl_proc_cleanup(void)416 static void spl_proc_cleanup(void)
417 {
418 	remove_proc_entry("kstat", proc_spl);
419 	remove_proc_entry("slab", proc_spl_kmem);
420 	remove_proc_entry("kmem", proc_spl);
421 	remove_proc_entry("spl", NULL);
422 
423 #ifndef HAVE_REGISTER_SYSCTL_TABLE
424 	if (spl_kstat) {
425 		unregister_sysctl_table(spl_kstat);
426 		spl_kstat = NULL;
427 	}
428 	if (spl_kmem) {
429 		unregister_sysctl_table(spl_kmem);
430 		spl_kmem = NULL;
431 	}
432 #endif
433 	if (spl_header) {
434 		unregister_sysctl_table(spl_header);
435 		spl_header = NULL;
436 	}
437 }
438 
439 #ifndef HAVE_REGISTER_SYSCTL_TABLE
440 
441 /*
442  * Traditionally, struct ctl_table arrays have been terminated by an "empty"
443  * sentinel element (specifically, one with .procname == NULL).
444  *
445  * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
446  * that callers could provide the size directly, and redefining
447  * register_sysctl() to just call register_sysctl_sz() with the array size. It
448  * retained support for the terminating element so that existing callers would
449  * continue to work.
450  *
451  * Linux 6.11 removed support for the terminating element, instead interpreting
452  * it as a real malformed element, and rejecting it.
453  *
454  * In order to continue support older kernels, we retain the terminating
455  * sentinel element for our sysctl tables, but instead detect availability of
456  * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
457  * the kernel from trying to process the terminator. For pre-6.6 kernels that
458  * don't have register_sysctl_sz(), we just use register_sysctl(), which can
459  * handle the terminating element as it always has.
460  */
461 #ifdef HAVE_REGISTER_SYSCTL_SZ
462 #define	spl_proc_register_sysctl(p, t)	\
463 	register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
464 #else
465 #define	spl_proc_register_sysctl(p, t)	\
466 	register_sysctl(p, t)
467 #endif
468 #endif
469 
470 int
spl_proc_init(void)471 spl_proc_init(void)
472 {
473 	int rc = 0;
474 
475 #ifdef HAVE_REGISTER_SYSCTL_TABLE
476 	spl_header = register_sysctl_table(spl_root);
477 	if (spl_header == NULL)
478 		return (-EUNATCH);
479 #else
480 	spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
481 	if (spl_header == NULL)
482 		return (-EUNATCH);
483 
484 	spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
485 	if (spl_kmem == NULL) {
486 		rc = -EUNATCH;
487 		goto out;
488 	}
489 	spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
490 	    spl_kstat_table);
491 	if (spl_kstat == NULL) {
492 		rc = -EUNATCH;
493 		goto out;
494 	}
495 #endif
496 
497 	proc_spl = proc_mkdir("spl", NULL);
498 	if (proc_spl == NULL) {
499 		rc = -EUNATCH;
500 		goto out;
501 	}
502 
503 	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
504 	if (proc_spl_kmem == NULL) {
505 		rc = -EUNATCH;
506 		goto out;
507 	}
508 
509 	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
510 	    &proc_slab_operations, NULL);
511 	if (proc_spl_kmem_slab == NULL) {
512 		rc = -EUNATCH;
513 		goto out;
514 	}
515 
516 	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
517 	if (proc_spl_kstat == NULL) {
518 		rc = -EUNATCH;
519 		goto out;
520 	}
521 out:
522 	if (rc)
523 		spl_proc_cleanup();
524 
525 	return (rc);
526 }
527 
528 void
spl_proc_fini(void)529 spl_proc_fini(void)
530 {
531 	spl_proc_cleanup();
532 }
533