xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
4  *  Copyright (C) 2007 The Regents of the University of California.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7  *  UCRL-CODE-235197
8  *
9  *  This file is part of the SPL, Solaris Porting Layer.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *
24  *  Solaris Porting Layer (SPL) Proc Implementation.
25  */
26 /*
27  * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
28  */
29 
30 #include <sys/systeminfo.h>
31 #include <sys/kstat.h>
32 #include <sys/kmem.h>
33 #include <sys/kmem_cache.h>
34 #include <sys/vmem.h>
35 #include <sys/proc.h>
36 #include <linux/ctype.h>
37 #include <linux/kmod.h>
38 #include <linux/seq_file.h>
39 #include <linux/uaccess.h>
40 #include <linux/version.h>
41 #include "zfs_gitrev.h"
42 
43 #if defined(CONSTIFY_PLUGIN)
44 typedef struct ctl_table __no_const spl_ctl_table;
45 #else
46 typedef struct ctl_table spl_ctl_table;
47 #endif
48 
49 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
50 #define	CONST_CTL_TABLE		const struct ctl_table
51 #else
52 #define	CONST_CTL_TABLE		struct ctl_table
53 #endif
54 
55 static unsigned long table_min = 0;
56 static unsigned long table_max = ~0;
57 
58 static struct ctl_table_header *spl_header = NULL;
59 #ifndef HAVE_REGISTER_SYSCTL_TABLE
60 static struct ctl_table_header *spl_kmem = NULL;
61 static struct ctl_table_header *spl_kstat = NULL;
62 #endif
63 static struct proc_dir_entry *proc_spl = NULL;
64 static struct proc_dir_entry *proc_spl_kmem = NULL;
65 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
66 struct proc_dir_entry *proc_spl_kstat = NULL;
67 
68 #ifdef DEBUG_KMEM
69 static int
proc_domemused(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)70 proc_domemused(CONST_CTL_TABLE *table, int write,
71     void __user *buffer, size_t *lenp, loff_t *ppos)
72 {
73 	int rc = 0;
74 	unsigned long val;
75 	spl_ctl_table dummy = *table;
76 
77 	dummy.data = &val;
78 	dummy.proc_handler = &proc_dointvec;
79 	dummy.extra1 = &table_min;
80 	dummy.extra2 = &table_max;
81 
82 	if (write) {
83 		*ppos += *lenp;
84 	} else {
85 #ifdef HAVE_ATOMIC64_T
86 		val = atomic64_read((atomic64_t *)table->data);
87 #else
88 		val = atomic_read((atomic_t *)table->data);
89 #endif /* HAVE_ATOMIC64_T */
90 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
91 	}
92 
93 	return (rc);
94 }
95 #endif /* DEBUG_KMEM */
96 
97 static int
proc_doslab(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)98 proc_doslab(CONST_CTL_TABLE *table, int write,
99     void __user *buffer, size_t *lenp, loff_t *ppos)
100 {
101 	int rc = 0;
102 	unsigned long val = 0, mask;
103 	spl_ctl_table dummy = *table;
104 	spl_kmem_cache_t *skc = NULL;
105 
106 	dummy.data = &val;
107 	dummy.proc_handler = &proc_dointvec;
108 	dummy.extra1 = &table_min;
109 	dummy.extra2 = &table_max;
110 
111 	if (write) {
112 		*ppos += *lenp;
113 	} else {
114 		down_read(&spl_kmem_cache_sem);
115 		mask = (unsigned long)table->data;
116 
117 		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
118 
119 			/* Only use slabs of the correct kmem/vmem type */
120 			if (!(skc->skc_flags & mask))
121 				continue;
122 
123 			/* Sum the specified field for selected slabs */
124 			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
125 			case KMC_TOTAL:
126 				val += skc->skc_slab_size * skc->skc_slab_total;
127 				break;
128 			case KMC_ALLOC:
129 				val += skc->skc_obj_size * skc->skc_obj_alloc;
130 				break;
131 			case KMC_MAX:
132 				val += skc->skc_obj_size * skc->skc_obj_max;
133 				break;
134 			}
135 		}
136 
137 		up_read(&spl_kmem_cache_sem);
138 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
139 	}
140 
141 	return (rc);
142 }
143 
144 static int
proc_dohostid(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)145 proc_dohostid(CONST_CTL_TABLE *table, int write,
146     void __user *buffer, size_t *lenp, loff_t *ppos)
147 {
148 	char *end, str[32];
149 	unsigned long hid;
150 	spl_ctl_table dummy = *table;
151 
152 	dummy.data = str;
153 	dummy.maxlen = sizeof (str) - 1;
154 
155 	if (!write)
156 		snprintf(str, sizeof (str), "%lx",
157 		    (unsigned long) zone_get_hostid(NULL));
158 
159 	/* always returns 0 */
160 	proc_dostring(&dummy, write, buffer, lenp, ppos);
161 
162 	if (write) {
163 		/*
164 		 * We can't use proc_doulongvec_minmax() in the write
165 		 * case here because hostid, while a hex value, has no
166 		 * leading 0x, which confuses the helper function.
167 		 */
168 
169 		hid = simple_strtoul(str, &end, 16);
170 		if (str == end)
171 			return (-EINVAL);
172 		spl_hostid = hid;
173 	}
174 
175 	return (0);
176 }
177 
178 static void
slab_seq_show_headers(struct seq_file * f)179 slab_seq_show_headers(struct seq_file *f)
180 {
181 	seq_printf(f,
182 	    "--------------------- cache ----------"
183 	    "---------------------------------------------  "
184 	    "----- slab ------  "
185 	    "---- object -----  "
186 	    "--- emergency ---\n");
187 	seq_printf(f,
188 	    "name                                  "
189 	    "  flags      size     alloc slabsize  objsize  "
190 	    "total alloc   max  "
191 	    "total alloc   max  "
192 	    "dlock alloc   max\n");
193 }
194 
195 static int
slab_seq_show(struct seq_file * f,void * p)196 slab_seq_show(struct seq_file *f, void *p)
197 {
198 	spl_kmem_cache_t *skc = p;
199 
200 	ASSERT(skc->skc_magic == SKC_MAGIC);
201 
202 	if (skc->skc_flags & KMC_SLAB) {
203 		/*
204 		 * This cache is backed by a generic Linux kmem cache which
205 		 * has its own accounting. For these caches we only track
206 		 * the number of active allocated objects that exist within
207 		 * the underlying Linux slabs. For the overall statistics of
208 		 * the underlying Linux cache please refer to /proc/slabinfo.
209 		 */
210 		spin_lock(&skc->skc_lock);
211 		uint64_t objs_allocated =
212 		    percpu_counter_sum(&skc->skc_linux_alloc);
213 		seq_printf(f, "%-36s  ", skc->skc_name);
214 		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
215 		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
216 		    (long unsigned)skc->skc_flags,
217 		    "-",
218 		    (long unsigned)(skc->skc_obj_size * objs_allocated),
219 		    "-",
220 		    (unsigned)skc->skc_obj_size,
221 		    "-", "-", "-", "-",
222 		    (long unsigned)objs_allocated,
223 		    "-", "-", "-", "-");
224 		spin_unlock(&skc->skc_lock);
225 		return (0);
226 	}
227 
228 	spin_lock(&skc->skc_lock);
229 	seq_printf(f, "%-36s  ", skc->skc_name);
230 	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
231 	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
232 	    (long unsigned)skc->skc_flags,
233 	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
234 	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
235 	    (unsigned)skc->skc_slab_size,
236 	    (unsigned)skc->skc_obj_size,
237 	    (long unsigned)skc->skc_slab_total,
238 	    (long unsigned)skc->skc_slab_alloc,
239 	    (long unsigned)skc->skc_slab_max,
240 	    (long unsigned)skc->skc_obj_total,
241 	    (long unsigned)skc->skc_obj_alloc,
242 	    (long unsigned)skc->skc_obj_max,
243 	    (long unsigned)skc->skc_obj_deadlock,
244 	    (long unsigned)skc->skc_obj_emergency,
245 	    (long unsigned)skc->skc_obj_emergency_max);
246 	spin_unlock(&skc->skc_lock);
247 	return (0);
248 }
249 
250 static void *
slab_seq_start(struct seq_file * f,loff_t * pos)251 slab_seq_start(struct seq_file *f, loff_t *pos)
252 {
253 	struct list_head *p;
254 	loff_t n = *pos;
255 
256 	down_read(&spl_kmem_cache_sem);
257 	if (!n)
258 		slab_seq_show_headers(f);
259 
260 	p = spl_kmem_cache_list.next;
261 	while (n--) {
262 		p = p->next;
263 		if (p == &spl_kmem_cache_list)
264 			return (NULL);
265 	}
266 
267 	return (list_entry(p, spl_kmem_cache_t, skc_list));
268 }
269 
270 static void *
slab_seq_next(struct seq_file * f,void * p,loff_t * pos)271 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
272 {
273 	spl_kmem_cache_t *skc = p;
274 
275 	++*pos;
276 	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
277 	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
278 }
279 
280 static void
slab_seq_stop(struct seq_file * f,void * v)281 slab_seq_stop(struct seq_file *f, void *v)
282 {
283 	up_read(&spl_kmem_cache_sem);
284 }
285 
286 static const struct seq_operations slab_seq_ops = {
287 	.show  = slab_seq_show,
288 	.start = slab_seq_start,
289 	.next  = slab_seq_next,
290 	.stop  = slab_seq_stop,
291 };
292 
293 static int
proc_slab_open(struct inode * inode,struct file * filp)294 proc_slab_open(struct inode *inode, struct file *filp)
295 {
296 	return (seq_open(filp, &slab_seq_ops));
297 }
298 
299 static const kstat_proc_op_t proc_slab_operations = {
300 #ifdef HAVE_PROC_OPS_STRUCT
301 	.proc_open	= proc_slab_open,
302 	.proc_read	= seq_read,
303 	.proc_lseek	= seq_lseek,
304 	.proc_release	= seq_release,
305 #else
306 	.open		= proc_slab_open,
307 	.read		= seq_read,
308 	.llseek		= seq_lseek,
309 	.release	= seq_release,
310 #endif
311 };
312 
313 static struct ctl_table spl_kmem_table[] = {
314 #ifdef DEBUG_KMEM
315 	{
316 		.procname	= "kmem_used",
317 		.data		= &kmem_alloc_used,
318 #ifdef HAVE_ATOMIC64_T
319 		.maxlen		= sizeof (atomic64_t),
320 #else
321 		.maxlen		= sizeof (atomic_t),
322 #endif /* HAVE_ATOMIC64_T */
323 		.mode		= 0444,
324 		.proc_handler	= &proc_domemused,
325 	},
326 	{
327 		.procname	= "kmem_max",
328 		.data		= &kmem_alloc_max,
329 		.maxlen		= sizeof (unsigned long),
330 		.extra1		= &table_min,
331 		.extra2		= &table_max,
332 		.mode		= 0444,
333 		.proc_handler	= &proc_doulongvec_minmax,
334 	},
335 #endif /* DEBUG_KMEM */
336 	{
337 		.procname	= "slab_kvmem_total",
338 		.data		= (void *)(KMC_KVMEM | KMC_TOTAL),
339 		.maxlen		= sizeof (unsigned long),
340 		.extra1		= &table_min,
341 		.extra2		= &table_max,
342 		.mode		= 0444,
343 		.proc_handler	= &proc_doslab,
344 	},
345 	{
346 		.procname	= "slab_kvmem_alloc",
347 		.data		= (void *)(KMC_KVMEM | KMC_ALLOC),
348 		.maxlen		= sizeof (unsigned long),
349 		.extra1		= &table_min,
350 		.extra2		= &table_max,
351 		.mode		= 0444,
352 		.proc_handler	= &proc_doslab,
353 	},
354 	{
355 		.procname	= "slab_kvmem_max",
356 		.data		= (void *)(KMC_KVMEM | KMC_MAX),
357 		.maxlen		= sizeof (unsigned long),
358 		.extra1		= &table_min,
359 		.extra2		= &table_max,
360 		.mode		= 0444,
361 		.proc_handler	= &proc_doslab,
362 	},
363 	{},
364 };
365 
366 static struct ctl_table spl_kstat_table[] = {
367 	{},
368 };
369 
370 static struct ctl_table spl_table[] = {
371 	/*
372 	 * NB No .strategy entries have been provided since
373 	 * sysctl(8) prefers to go via /proc for portability.
374 	 */
375 	{
376 		.procname	= "gitrev",
377 		.data		= (char *)ZFS_META_GITREV,
378 		.maxlen		= sizeof (ZFS_META_GITREV),
379 		.mode		= 0444,
380 		.proc_handler	= &proc_dostring,
381 	},
382 	{
383 		.procname	= "hostid",
384 		.data		= &spl_hostid,
385 		.maxlen		= sizeof (unsigned long),
386 		.mode		= 0644,
387 		.proc_handler	= &proc_dohostid,
388 	},
389 #ifdef HAVE_REGISTER_SYSCTL_TABLE
390 	{
391 		.procname	= "kmem",
392 		.mode		= 0555,
393 		.child		= spl_kmem_table,
394 	},
395 	{
396 		.procname	= "kstat",
397 		.mode		= 0555,
398 		.child		= spl_kstat_table,
399 	},
400 #endif
401 	{},
402 };
403 
404 #ifdef HAVE_REGISTER_SYSCTL_TABLE
405 static struct ctl_table spl_dir[] = {
406 	{
407 		.procname	= "spl",
408 		.mode		= 0555,
409 		.child		= spl_table,
410 	},
411 	{}
412 };
413 
414 static struct ctl_table spl_root[] = {
415 	{
416 		.procname	= "kernel",
417 		.mode		= 0555,
418 		.child		= spl_dir,
419 	},
420 	{}
421 };
422 #endif
423 
spl_proc_cleanup(void)424 static void spl_proc_cleanup(void)
425 {
426 	remove_proc_entry("kstat", proc_spl);
427 	remove_proc_entry("slab", proc_spl_kmem);
428 	remove_proc_entry("kmem", proc_spl);
429 	remove_proc_entry("spl", NULL);
430 
431 #ifndef HAVE_REGISTER_SYSCTL_TABLE
432 	if (spl_kstat) {
433 		unregister_sysctl_table(spl_kstat);
434 		spl_kstat = NULL;
435 	}
436 	if (spl_kmem) {
437 		unregister_sysctl_table(spl_kmem);
438 		spl_kmem = NULL;
439 	}
440 #endif
441 	if (spl_header) {
442 		unregister_sysctl_table(spl_header);
443 		spl_header = NULL;
444 	}
445 }
446 
447 #ifndef HAVE_REGISTER_SYSCTL_TABLE
448 
449 /*
450  * Traditionally, struct ctl_table arrays have been terminated by an "empty"
451  * sentinel element (specifically, one with .procname == NULL).
452  *
453  * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
454  * that callers could provide the size directly, and redefining
455  * register_sysctl() to just call register_sysctl_sz() with the array size. It
456  * retained support for the terminating element so that existing callers would
457  * continue to work.
458  *
459  * Linux 6.11 removed support for the terminating element, instead interpreting
460  * it as a real malformed element, and rejecting it.
461  *
462  * In order to continue support older kernels, we retain the terminating
463  * sentinel element for our sysctl tables, but instead detect availability of
464  * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
465  * the kernel from trying to process the terminator. For pre-6.6 kernels that
466  * don't have register_sysctl_sz(), we just use register_sysctl(), which can
467  * handle the terminating element as it always has.
468  */
469 #ifdef HAVE_REGISTER_SYSCTL_SZ
470 #define	spl_proc_register_sysctl(p, t)	\
471 	register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
472 #else
473 #define	spl_proc_register_sysctl(p, t)	\
474 	register_sysctl(p, t)
475 #endif
476 #endif
477 
478 int
spl_proc_init(void)479 spl_proc_init(void)
480 {
481 	int rc = 0;
482 
483 #ifdef HAVE_REGISTER_SYSCTL_TABLE
484 	spl_header = register_sysctl_table(spl_root);
485 	if (spl_header == NULL)
486 		return (-EUNATCH);
487 #else
488 	spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
489 	if (spl_header == NULL)
490 		return (-EUNATCH);
491 
492 	spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
493 	if (spl_kmem == NULL) {
494 		rc = -EUNATCH;
495 		goto out;
496 	}
497 	spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
498 	    spl_kstat_table);
499 	if (spl_kstat == NULL) {
500 		rc = -EUNATCH;
501 		goto out;
502 	}
503 #endif
504 
505 	proc_spl = proc_mkdir("spl", NULL);
506 	if (proc_spl == NULL) {
507 		rc = -EUNATCH;
508 		goto out;
509 	}
510 
511 	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
512 	if (proc_spl_kmem == NULL) {
513 		rc = -EUNATCH;
514 		goto out;
515 	}
516 
517 	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
518 	    &proc_slab_operations, NULL);
519 	if (proc_spl_kmem_slab == NULL) {
520 		rc = -EUNATCH;
521 		goto out;
522 	}
523 
524 	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
525 	if (proc_spl_kstat == NULL) {
526 		rc = -EUNATCH;
527 		goto out;
528 	}
529 out:
530 	if (rc)
531 		spl_proc_cleanup();
532 
533 	return (rc);
534 }
535 
536 void
spl_proc_fini(void)537 spl_proc_fini(void)
538 {
539 	spl_proc_cleanup();
540 }
541