xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *
10  *  The SPL is free software; you can redistribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; either version 2 of the License, or (at your
13  *  option) any later version.
14  *
15  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  *  Solaris Porting Layer (SPL) Proc Implementation.
24  */
25 /*
26  * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
27  */
28 
29 #include <sys/systeminfo.h>
30 #include <sys/kstat.h>
31 #include <sys/kmem.h>
32 #include <sys/kmem_cache.h>
33 #include <sys/vmem.h>
34 #include <sys/proc.h>
35 #include <linux/ctype.h>
36 #include <linux/kmod.h>
37 #include <linux/seq_file.h>
38 #include <linux/uaccess.h>
39 #include <linux/version.h>
40 #include "zfs_gitrev.h"
41 
42 #if defined(CONSTIFY_PLUGIN)
43 typedef struct ctl_table __no_const spl_ctl_table;
44 #else
45 typedef struct ctl_table spl_ctl_table;
46 #endif
47 
48 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
49 #define	CONST_CTL_TABLE		const struct ctl_table
50 #else
51 #define	CONST_CTL_TABLE		struct ctl_table
52 #endif
53 
54 static unsigned long table_min = 0;
55 static unsigned long table_max = ~0;
56 
57 static struct ctl_table_header *spl_header = NULL;
58 #ifndef HAVE_REGISTER_SYSCTL_TABLE
59 static struct ctl_table_header *spl_kmem = NULL;
60 static struct ctl_table_header *spl_kstat = NULL;
61 #endif
62 static struct proc_dir_entry *proc_spl = NULL;
63 static struct proc_dir_entry *proc_spl_kmem = NULL;
64 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
65 struct proc_dir_entry *proc_spl_kstat = NULL;
66 
67 #ifdef DEBUG_KMEM
68 static int
69 proc_domemused(CONST_CTL_TABLE *table, int write,
70     void __user *buffer, size_t *lenp, loff_t *ppos)
71 {
72 	int rc = 0;
73 	unsigned long val;
74 	spl_ctl_table dummy = *table;
75 
76 	dummy.data = &val;
77 	dummy.proc_handler = &proc_dointvec;
78 	dummy.extra1 = &table_min;
79 	dummy.extra2 = &table_max;
80 
81 	if (write) {
82 		*ppos += *lenp;
83 	} else {
84 #ifdef HAVE_ATOMIC64_T
85 		val = atomic64_read((atomic64_t *)table->data);
86 #else
87 		val = atomic_read((atomic_t *)table->data);
88 #endif /* HAVE_ATOMIC64_T */
89 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
90 	}
91 
92 	return (rc);
93 }
94 #endif /* DEBUG_KMEM */
95 
96 static int
97 proc_doslab(CONST_CTL_TABLE *table, int write,
98     void __user *buffer, size_t *lenp, loff_t *ppos)
99 {
100 	int rc = 0;
101 	unsigned long val = 0, mask;
102 	spl_ctl_table dummy = *table;
103 	spl_kmem_cache_t *skc = NULL;
104 
105 	dummy.data = &val;
106 	dummy.proc_handler = &proc_dointvec;
107 	dummy.extra1 = &table_min;
108 	dummy.extra2 = &table_max;
109 
110 	if (write) {
111 		*ppos += *lenp;
112 	} else {
113 		down_read(&spl_kmem_cache_sem);
114 		mask = (unsigned long)table->data;
115 
116 		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
117 
118 			/* Only use slabs of the correct kmem/vmem type */
119 			if (!(skc->skc_flags & mask))
120 				continue;
121 
122 			/* Sum the specified field for selected slabs */
123 			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
124 			case KMC_TOTAL:
125 				val += skc->skc_slab_size * skc->skc_slab_total;
126 				break;
127 			case KMC_ALLOC:
128 				val += skc->skc_obj_size * skc->skc_obj_alloc;
129 				break;
130 			case KMC_MAX:
131 				val += skc->skc_obj_size * skc->skc_obj_max;
132 				break;
133 			}
134 		}
135 
136 		up_read(&spl_kmem_cache_sem);
137 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
138 	}
139 
140 	return (rc);
141 }
142 
143 static int
144 proc_dohostid(CONST_CTL_TABLE *table, int write,
145     void __user *buffer, size_t *lenp, loff_t *ppos)
146 {
147 	char *end, str[32];
148 	unsigned long hid;
149 	spl_ctl_table dummy = *table;
150 
151 	dummy.data = str;
152 	dummy.maxlen = sizeof (str) - 1;
153 
154 	if (!write)
155 		snprintf(str, sizeof (str), "%lx",
156 		    (unsigned long) zone_get_hostid(NULL));
157 
158 	/* always returns 0 */
159 	proc_dostring(&dummy, write, buffer, lenp, ppos);
160 
161 	if (write) {
162 		/*
163 		 * We can't use proc_doulongvec_minmax() in the write
164 		 * case here because hostid, while a hex value, has no
165 		 * leading 0x, which confuses the helper function.
166 		 */
167 
168 		hid = simple_strtoul(str, &end, 16);
169 		if (str == end)
170 			return (-EINVAL);
171 		spl_hostid = hid;
172 	}
173 
174 	return (0);
175 }
176 
177 static void
178 slab_seq_show_headers(struct seq_file *f)
179 {
180 	seq_printf(f,
181 	    "--------------------- cache ----------"
182 	    "---------------------------------------------  "
183 	    "----- slab ------  "
184 	    "---- object -----  "
185 	    "--- emergency ---\n");
186 	seq_printf(f,
187 	    "name                                  "
188 	    "  flags      size     alloc slabsize  objsize  "
189 	    "total alloc   max  "
190 	    "total alloc   max  "
191 	    "dlock alloc   max\n");
192 }
193 
194 static int
195 slab_seq_show(struct seq_file *f, void *p)
196 {
197 	spl_kmem_cache_t *skc = p;
198 
199 	ASSERT(skc->skc_magic == SKC_MAGIC);
200 
201 	if (skc->skc_flags & KMC_SLAB) {
202 		/*
203 		 * This cache is backed by a generic Linux kmem cache which
204 		 * has its own accounting. For these caches we only track
205 		 * the number of active allocated objects that exist within
206 		 * the underlying Linux slabs. For the overall statistics of
207 		 * the underlying Linux cache please refer to /proc/slabinfo.
208 		 */
209 		spin_lock(&skc->skc_lock);
210 		uint64_t objs_allocated =
211 		    percpu_counter_sum(&skc->skc_linux_alloc);
212 		seq_printf(f, "%-36s  ", skc->skc_name);
213 		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
214 		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
215 		    (long unsigned)skc->skc_flags,
216 		    "-",
217 		    (long unsigned)(skc->skc_obj_size * objs_allocated),
218 		    "-",
219 		    (unsigned)skc->skc_obj_size,
220 		    "-", "-", "-", "-",
221 		    (long unsigned)objs_allocated,
222 		    "-", "-", "-", "-");
223 		spin_unlock(&skc->skc_lock);
224 		return (0);
225 	}
226 
227 	spin_lock(&skc->skc_lock);
228 	seq_printf(f, "%-36s  ", skc->skc_name);
229 	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
230 	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
231 	    (long unsigned)skc->skc_flags,
232 	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
233 	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
234 	    (unsigned)skc->skc_slab_size,
235 	    (unsigned)skc->skc_obj_size,
236 	    (long unsigned)skc->skc_slab_total,
237 	    (long unsigned)skc->skc_slab_alloc,
238 	    (long unsigned)skc->skc_slab_max,
239 	    (long unsigned)skc->skc_obj_total,
240 	    (long unsigned)skc->skc_obj_alloc,
241 	    (long unsigned)skc->skc_obj_max,
242 	    (long unsigned)skc->skc_obj_deadlock,
243 	    (long unsigned)skc->skc_obj_emergency,
244 	    (long unsigned)skc->skc_obj_emergency_max);
245 	spin_unlock(&skc->skc_lock);
246 	return (0);
247 }
248 
249 static void *
250 slab_seq_start(struct seq_file *f, loff_t *pos)
251 {
252 	struct list_head *p;
253 	loff_t n = *pos;
254 
255 	down_read(&spl_kmem_cache_sem);
256 	if (!n)
257 		slab_seq_show_headers(f);
258 
259 	p = spl_kmem_cache_list.next;
260 	while (n--) {
261 		p = p->next;
262 		if (p == &spl_kmem_cache_list)
263 			return (NULL);
264 	}
265 
266 	return (list_entry(p, spl_kmem_cache_t, skc_list));
267 }
268 
269 static void *
270 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
271 {
272 	spl_kmem_cache_t *skc = p;
273 
274 	++*pos;
275 	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
276 	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
277 }
278 
279 static void
280 slab_seq_stop(struct seq_file *f, void *v)
281 {
282 	up_read(&spl_kmem_cache_sem);
283 }
284 
285 static const struct seq_operations slab_seq_ops = {
286 	.show  = slab_seq_show,
287 	.start = slab_seq_start,
288 	.next  = slab_seq_next,
289 	.stop  = slab_seq_stop,
290 };
291 
292 static int
293 proc_slab_open(struct inode *inode, struct file *filp)
294 {
295 	return (seq_open(filp, &slab_seq_ops));
296 }
297 
298 static const kstat_proc_op_t proc_slab_operations = {
299 #ifdef HAVE_PROC_OPS_STRUCT
300 	.proc_open	= proc_slab_open,
301 	.proc_read	= seq_read,
302 	.proc_lseek	= seq_lseek,
303 	.proc_release	= seq_release,
304 #else
305 	.open		= proc_slab_open,
306 	.read		= seq_read,
307 	.llseek		= seq_lseek,
308 	.release	= seq_release,
309 #endif
310 };
311 
312 static struct ctl_table spl_kmem_table[] = {
313 #ifdef DEBUG_KMEM
314 	{
315 		.procname	= "kmem_used",
316 		.data		= &kmem_alloc_used,
317 #ifdef HAVE_ATOMIC64_T
318 		.maxlen		= sizeof (atomic64_t),
319 #else
320 		.maxlen		= sizeof (atomic_t),
321 #endif /* HAVE_ATOMIC64_T */
322 		.mode		= 0444,
323 		.proc_handler	= &proc_domemused,
324 	},
325 	{
326 		.procname	= "kmem_max",
327 		.data		= &kmem_alloc_max,
328 		.maxlen		= sizeof (unsigned long),
329 		.extra1		= &table_min,
330 		.extra2		= &table_max,
331 		.mode		= 0444,
332 		.proc_handler	= &proc_doulongvec_minmax,
333 	},
334 #endif /* DEBUG_KMEM */
335 	{
336 		.procname	= "slab_kvmem_total",
337 		.data		= (void *)(KMC_KVMEM | KMC_TOTAL),
338 		.maxlen		= sizeof (unsigned long),
339 		.extra1		= &table_min,
340 		.extra2		= &table_max,
341 		.mode		= 0444,
342 		.proc_handler	= &proc_doslab,
343 	},
344 	{
345 		.procname	= "slab_kvmem_alloc",
346 		.data		= (void *)(KMC_KVMEM | KMC_ALLOC),
347 		.maxlen		= sizeof (unsigned long),
348 		.extra1		= &table_min,
349 		.extra2		= &table_max,
350 		.mode		= 0444,
351 		.proc_handler	= &proc_doslab,
352 	},
353 	{
354 		.procname	= "slab_kvmem_max",
355 		.data		= (void *)(KMC_KVMEM | KMC_MAX),
356 		.maxlen		= sizeof (unsigned long),
357 		.extra1		= &table_min,
358 		.extra2		= &table_max,
359 		.mode		= 0444,
360 		.proc_handler	= &proc_doslab,
361 	},
362 	{},
363 };
364 
365 static struct ctl_table spl_kstat_table[] = {
366 	{},
367 };
368 
369 static struct ctl_table spl_table[] = {
370 	/*
371 	 * NB No .strategy entries have been provided since
372 	 * sysctl(8) prefers to go via /proc for portability.
373 	 */
374 	{
375 		.procname	= "gitrev",
376 		.data		= (char *)ZFS_META_GITREV,
377 		.maxlen		= sizeof (ZFS_META_GITREV),
378 		.mode		= 0444,
379 		.proc_handler	= &proc_dostring,
380 	},
381 	{
382 		.procname	= "hostid",
383 		.data		= &spl_hostid,
384 		.maxlen		= sizeof (unsigned long),
385 		.mode		= 0644,
386 		.proc_handler	= &proc_dohostid,
387 	},
388 #ifdef HAVE_REGISTER_SYSCTL_TABLE
389 	{
390 		.procname	= "kmem",
391 		.mode		= 0555,
392 		.child		= spl_kmem_table,
393 	},
394 	{
395 		.procname	= "kstat",
396 		.mode		= 0555,
397 		.child		= spl_kstat_table,
398 	},
399 #endif
400 	{},
401 };
402 
403 #ifdef HAVE_REGISTER_SYSCTL_TABLE
404 static struct ctl_table spl_dir[] = {
405 	{
406 		.procname	= "spl",
407 		.mode		= 0555,
408 		.child		= spl_table,
409 	},
410 	{}
411 };
412 
413 static struct ctl_table spl_root[] = {
414 	{
415 		.procname	= "kernel",
416 		.mode		= 0555,
417 		.child		= spl_dir,
418 	},
419 	{}
420 };
421 #endif
422 
423 static void spl_proc_cleanup(void)
424 {
425 	remove_proc_entry("kstat", proc_spl);
426 	remove_proc_entry("slab", proc_spl_kmem);
427 	remove_proc_entry("kmem", proc_spl);
428 	remove_proc_entry("spl", NULL);
429 
430 #ifndef HAVE_REGISTER_SYSCTL_TABLE
431 	if (spl_kstat) {
432 		unregister_sysctl_table(spl_kstat);
433 		spl_kstat = NULL;
434 	}
435 	if (spl_kmem) {
436 		unregister_sysctl_table(spl_kmem);
437 		spl_kmem = NULL;
438 	}
439 #endif
440 	if (spl_header) {
441 		unregister_sysctl_table(spl_header);
442 		spl_header = NULL;
443 	}
444 }
445 
446 #ifndef HAVE_REGISTER_SYSCTL_TABLE
447 
448 /*
449  * Traditionally, struct ctl_table arrays have been terminated by an "empty"
450  * sentinel element (specifically, one with .procname == NULL).
451  *
452  * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
453  * that callers could provide the size directly, and redefining
454  * register_sysctl() to just call register_sysctl_sz() with the array size. It
455  * retained support for the terminating element so that existing callers would
456  * continue to work.
457  *
458  * Linux 6.11 removed support for the terminating element, instead interpreting
459  * it as a real malformed element, and rejecting it.
460  *
461  * In order to continue support older kernels, we retain the terminating
462  * sentinel element for our sysctl tables, but instead detect availability of
463  * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
464  * the kernel from trying to process the terminator. For pre-6.6 kernels that
465  * don't have register_sysctl_sz(), we just use register_sysctl(), which can
466  * handle the terminating element as it always has.
467  */
468 #ifdef HAVE_REGISTER_SYSCTL_SZ
469 #define	spl_proc_register_sysctl(p, t)	\
470 	register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
471 #else
472 #define	spl_proc_register_sysctl(p, t)	\
473 	register_sysctl(p, t)
474 #endif
475 #endif
476 
477 int
478 spl_proc_init(void)
479 {
480 	int rc = 0;
481 
482 #ifdef HAVE_REGISTER_SYSCTL_TABLE
483 	spl_header = register_sysctl_table(spl_root);
484 	if (spl_header == NULL)
485 		return (-EUNATCH);
486 #else
487 	spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
488 	if (spl_header == NULL)
489 		return (-EUNATCH);
490 
491 	spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
492 	if (spl_kmem == NULL) {
493 		rc = -EUNATCH;
494 		goto out;
495 	}
496 	spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
497 	    spl_kstat_table);
498 	if (spl_kstat == NULL) {
499 		rc = -EUNATCH;
500 		goto out;
501 	}
502 #endif
503 
504 	proc_spl = proc_mkdir("spl", NULL);
505 	if (proc_spl == NULL) {
506 		rc = -EUNATCH;
507 		goto out;
508 	}
509 
510 	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
511 	if (proc_spl_kmem == NULL) {
512 		rc = -EUNATCH;
513 		goto out;
514 	}
515 
516 	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
517 	    &proc_slab_operations, NULL);
518 	if (proc_spl_kmem_slab == NULL) {
519 		rc = -EUNATCH;
520 		goto out;
521 	}
522 
523 	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
524 	if (proc_spl_kstat == NULL) {
525 		rc = -EUNATCH;
526 		goto out;
527 	}
528 out:
529 	if (rc)
530 		spl_proc_cleanup();
531 
532 	return (rc);
533 }
534 
535 void
536 spl_proc_fini(void)
537 {
538 	spl_proc_cleanup();
539 }
540