1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
4 * Copyright (C) 2007 The Regents of the University of California.
5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7 * UCRL-CODE-235197
8 *
9 * This file is part of the SPL, Solaris Porting Layer.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *
24 * Solaris Porting Layer (SPL) Proc Implementation.
25 */
26 /*
27 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
28 */
29
30 #include <sys/systeminfo.h>
31 #include <sys/kstat.h>
32 #include <sys/kmem.h>
33 #include <sys/kmem_cache.h>
34 #include <sys/vmem.h>
35 #include <sys/proc.h>
36 #include <linux/ctype.h>
37 #include <linux/kmod.h>
38 #include <linux/seq_file.h>
39 #include <linux/uaccess.h>
40 #include <linux/version.h>
41 #include "zfs_gitrev.h"
42
43 #if defined(CONSTIFY_PLUGIN)
44 typedef struct ctl_table __no_const spl_ctl_table;
45 #else
46 typedef struct ctl_table spl_ctl_table;
47 #endif
48
49 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
50 #define CONST_CTL_TABLE const struct ctl_table
51 #else
52 #define CONST_CTL_TABLE struct ctl_table
53 #endif
54
55 static unsigned long table_min = 0;
56 static unsigned long table_max = ~0;
57
58 static struct ctl_table_header *spl_header = NULL;
59 #ifndef HAVE_REGISTER_SYSCTL_TABLE
60 static struct ctl_table_header *spl_kmem = NULL;
61 static struct ctl_table_header *spl_kstat = NULL;
62 #endif
63 static struct proc_dir_entry *proc_spl = NULL;
64 static struct proc_dir_entry *proc_spl_kmem = NULL;
65 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
66 struct proc_dir_entry *proc_spl_kstat = NULL;
67
68 #ifdef DEBUG_KMEM
69 static int
proc_domemused(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)70 proc_domemused(CONST_CTL_TABLE *table, int write,
71 void __user *buffer, size_t *lenp, loff_t *ppos)
72 {
73 int rc = 0;
74 unsigned long val;
75 spl_ctl_table dummy = *table;
76
77 dummy.data = &val;
78 dummy.proc_handler = &proc_dointvec;
79 dummy.extra1 = &table_min;
80 dummy.extra2 = &table_max;
81
82 if (write) {
83 *ppos += *lenp;
84 } else {
85 #ifdef HAVE_ATOMIC64_T
86 val = atomic64_read((atomic64_t *)table->data);
87 #else
88 val = atomic_read((atomic_t *)table->data);
89 #endif /* HAVE_ATOMIC64_T */
90 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
91 }
92
93 return (rc);
94 }
95 #endif /* DEBUG_KMEM */
96
97 static int
proc_doslab(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)98 proc_doslab(CONST_CTL_TABLE *table, int write,
99 void __user *buffer, size_t *lenp, loff_t *ppos)
100 {
101 int rc = 0;
102 unsigned long val = 0, mask;
103 spl_ctl_table dummy = *table;
104 spl_kmem_cache_t *skc = NULL;
105
106 dummy.data = &val;
107 dummy.proc_handler = &proc_dointvec;
108 dummy.extra1 = &table_min;
109 dummy.extra2 = &table_max;
110
111 if (write) {
112 *ppos += *lenp;
113 } else {
114 down_read(&spl_kmem_cache_sem);
115 mask = (unsigned long)table->data;
116
117 list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
118
119 /* Only use slabs of the correct kmem/vmem type */
120 if (!(skc->skc_flags & mask))
121 continue;
122
123 /* Sum the specified field for selected slabs */
124 switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
125 case KMC_TOTAL:
126 val += skc->skc_slab_size * skc->skc_slab_total;
127 break;
128 case KMC_ALLOC:
129 val += skc->skc_obj_size * skc->skc_obj_alloc;
130 break;
131 case KMC_MAX:
132 val += skc->skc_obj_size * skc->skc_obj_max;
133 break;
134 }
135 }
136
137 up_read(&spl_kmem_cache_sem);
138 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
139 }
140
141 return (rc);
142 }
143
144 static int
proc_dohostid(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)145 proc_dohostid(CONST_CTL_TABLE *table, int write,
146 void __user *buffer, size_t *lenp, loff_t *ppos)
147 {
148 char *end, str[32];
149 unsigned long hid;
150 spl_ctl_table dummy = *table;
151
152 dummy.data = str;
153 dummy.maxlen = sizeof (str) - 1;
154
155 if (!write)
156 snprintf(str, sizeof (str), "%lx",
157 (unsigned long) zone_get_hostid(NULL));
158
159 /* always returns 0 */
160 proc_dostring(&dummy, write, buffer, lenp, ppos);
161
162 if (write) {
163 /*
164 * We can't use proc_doulongvec_minmax() in the write
165 * case here because hostid, while a hex value, has no
166 * leading 0x, which confuses the helper function.
167 */
168
169 hid = simple_strtoul(str, &end, 16);
170 if (str == end)
171 return (-EINVAL);
172 spl_hostid = hid;
173 }
174
175 return (0);
176 }
177
178 static void
slab_seq_show_headers(struct seq_file * f)179 slab_seq_show_headers(struct seq_file *f)
180 {
181 seq_printf(f,
182 "--------------------- cache ----------"
183 "--------------------------------------------- "
184 "----- slab ------ "
185 "---- object ----- "
186 "--- emergency ---\n");
187 seq_printf(f,
188 "name "
189 " flags size alloc slabsize objsize "
190 "total alloc max "
191 "total alloc max "
192 "dlock alloc max\n");
193 }
194
195 static int
slab_seq_show(struct seq_file * f,void * p)196 slab_seq_show(struct seq_file *f, void *p)
197 {
198 spl_kmem_cache_t *skc = p;
199
200 ASSERT(skc->skc_magic == SKC_MAGIC);
201
202 if (skc->skc_flags & KMC_SLAB) {
203 /*
204 * This cache is backed by a generic Linux kmem cache which
205 * has its own accounting. For these caches we only track
206 * the number of active allocated objects that exist within
207 * the underlying Linux slabs. For the overall statistics of
208 * the underlying Linux cache please refer to /proc/slabinfo.
209 */
210 spin_lock(&skc->skc_lock);
211 uint64_t objs_allocated =
212 percpu_counter_sum(&skc->skc_linux_alloc);
213 seq_printf(f, "%-36s ", skc->skc_name);
214 seq_printf(f, "0x%05lx %9s %9lu %8s %8u "
215 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n",
216 (long unsigned)skc->skc_flags,
217 "-",
218 (long unsigned)(skc->skc_obj_size * objs_allocated),
219 "-",
220 (unsigned)skc->skc_obj_size,
221 "-", "-", "-", "-",
222 (long unsigned)objs_allocated,
223 "-", "-", "-", "-");
224 spin_unlock(&skc->skc_lock);
225 return (0);
226 }
227
228 spin_lock(&skc->skc_lock);
229 seq_printf(f, "%-36s ", skc->skc_name);
230 seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
231 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
232 (long unsigned)skc->skc_flags,
233 (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
234 (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
235 (unsigned)skc->skc_slab_size,
236 (unsigned)skc->skc_obj_size,
237 (long unsigned)skc->skc_slab_total,
238 (long unsigned)skc->skc_slab_alloc,
239 (long unsigned)skc->skc_slab_max,
240 (long unsigned)skc->skc_obj_total,
241 (long unsigned)skc->skc_obj_alloc,
242 (long unsigned)skc->skc_obj_max,
243 (long unsigned)skc->skc_obj_deadlock,
244 (long unsigned)skc->skc_obj_emergency,
245 (long unsigned)skc->skc_obj_emergency_max);
246 spin_unlock(&skc->skc_lock);
247 return (0);
248 }
249
250 static void *
slab_seq_start(struct seq_file * f,loff_t * pos)251 slab_seq_start(struct seq_file *f, loff_t *pos)
252 {
253 struct list_head *p;
254 loff_t n = *pos;
255
256 down_read(&spl_kmem_cache_sem);
257 if (!n)
258 slab_seq_show_headers(f);
259
260 p = spl_kmem_cache_list.next;
261 while (n--) {
262 p = p->next;
263 if (p == &spl_kmem_cache_list)
264 return (NULL);
265 }
266
267 return (list_entry(p, spl_kmem_cache_t, skc_list));
268 }
269
270 static void *
slab_seq_next(struct seq_file * f,void * p,loff_t * pos)271 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
272 {
273 spl_kmem_cache_t *skc = p;
274
275 ++*pos;
276 return ((skc->skc_list.next == &spl_kmem_cache_list) ?
277 NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
278 }
279
280 static void
slab_seq_stop(struct seq_file * f,void * v)281 slab_seq_stop(struct seq_file *f, void *v)
282 {
283 up_read(&spl_kmem_cache_sem);
284 }
285
286 static const struct seq_operations slab_seq_ops = {
287 .show = slab_seq_show,
288 .start = slab_seq_start,
289 .next = slab_seq_next,
290 .stop = slab_seq_stop,
291 };
292
293 static int
proc_slab_open(struct inode * inode,struct file * filp)294 proc_slab_open(struct inode *inode, struct file *filp)
295 {
296 return (seq_open(filp, &slab_seq_ops));
297 }
298
299 static const kstat_proc_op_t proc_slab_operations = {
300 #ifdef HAVE_PROC_OPS_STRUCT
301 .proc_open = proc_slab_open,
302 .proc_read = seq_read,
303 .proc_lseek = seq_lseek,
304 .proc_release = seq_release,
305 #else
306 .open = proc_slab_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = seq_release,
310 #endif
311 };
312
313 static struct ctl_table spl_kmem_table[] = {
314 #ifdef DEBUG_KMEM
315 {
316 .procname = "kmem_used",
317 .data = &kmem_alloc_used,
318 #ifdef HAVE_ATOMIC64_T
319 .maxlen = sizeof (atomic64_t),
320 #else
321 .maxlen = sizeof (atomic_t),
322 #endif /* HAVE_ATOMIC64_T */
323 .mode = 0444,
324 .proc_handler = &proc_domemused,
325 },
326 {
327 .procname = "kmem_max",
328 .data = &kmem_alloc_max,
329 .maxlen = sizeof (unsigned long),
330 .extra1 = &table_min,
331 .extra2 = &table_max,
332 .mode = 0444,
333 .proc_handler = &proc_doulongvec_minmax,
334 },
335 #endif /* DEBUG_KMEM */
336 {
337 .procname = "slab_kvmem_total",
338 .data = (void *)(KMC_KVMEM | KMC_TOTAL),
339 .maxlen = sizeof (unsigned long),
340 .extra1 = &table_min,
341 .extra2 = &table_max,
342 .mode = 0444,
343 .proc_handler = &proc_doslab,
344 },
345 {
346 .procname = "slab_kvmem_alloc",
347 .data = (void *)(KMC_KVMEM | KMC_ALLOC),
348 .maxlen = sizeof (unsigned long),
349 .extra1 = &table_min,
350 .extra2 = &table_max,
351 .mode = 0444,
352 .proc_handler = &proc_doslab,
353 },
354 {
355 .procname = "slab_kvmem_max",
356 .data = (void *)(KMC_KVMEM | KMC_MAX),
357 .maxlen = sizeof (unsigned long),
358 .extra1 = &table_min,
359 .extra2 = &table_max,
360 .mode = 0444,
361 .proc_handler = &proc_doslab,
362 },
363 {},
364 };
365
366 static struct ctl_table spl_kstat_table[] = {
367 {},
368 };
369
370 static struct ctl_table spl_table[] = {
371 /*
372 * NB No .strategy entries have been provided since
373 * sysctl(8) prefers to go via /proc for portability.
374 */
375 {
376 .procname = "gitrev",
377 .data = (char *)ZFS_META_GITREV,
378 .maxlen = sizeof (ZFS_META_GITREV),
379 .mode = 0444,
380 .proc_handler = &proc_dostring,
381 },
382 {
383 .procname = "hostid",
384 .data = &spl_hostid,
385 .maxlen = sizeof (unsigned long),
386 .mode = 0644,
387 .proc_handler = &proc_dohostid,
388 },
389 #ifdef HAVE_REGISTER_SYSCTL_TABLE
390 {
391 .procname = "kmem",
392 .mode = 0555,
393 .child = spl_kmem_table,
394 },
395 {
396 .procname = "kstat",
397 .mode = 0555,
398 .child = spl_kstat_table,
399 },
400 #endif
401 {},
402 };
403
404 #ifdef HAVE_REGISTER_SYSCTL_TABLE
405 static struct ctl_table spl_dir[] = {
406 {
407 .procname = "spl",
408 .mode = 0555,
409 .child = spl_table,
410 },
411 {}
412 };
413
414 static struct ctl_table spl_root[] = {
415 {
416 .procname = "kernel",
417 .mode = 0555,
418 .child = spl_dir,
419 },
420 {}
421 };
422 #endif
423
spl_proc_cleanup(void)424 static void spl_proc_cleanup(void)
425 {
426 remove_proc_entry("kstat", proc_spl);
427 remove_proc_entry("slab", proc_spl_kmem);
428 remove_proc_entry("kmem", proc_spl);
429 remove_proc_entry("spl", NULL);
430
431 #ifndef HAVE_REGISTER_SYSCTL_TABLE
432 if (spl_kstat) {
433 unregister_sysctl_table(spl_kstat);
434 spl_kstat = NULL;
435 }
436 if (spl_kmem) {
437 unregister_sysctl_table(spl_kmem);
438 spl_kmem = NULL;
439 }
440 #endif
441 if (spl_header) {
442 unregister_sysctl_table(spl_header);
443 spl_header = NULL;
444 }
445 }
446
447 #ifndef HAVE_REGISTER_SYSCTL_TABLE
448
449 /*
450 * Traditionally, struct ctl_table arrays have been terminated by an "empty"
451 * sentinel element (specifically, one with .procname == NULL).
452 *
453 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
454 * that callers could provide the size directly, and redefining
455 * register_sysctl() to just call register_sysctl_sz() with the array size. It
456 * retained support for the terminating element so that existing callers would
457 * continue to work.
458 *
459 * Linux 6.11 removed support for the terminating element, instead interpreting
460 * it as a real malformed element, and rejecting it.
461 *
462 * In order to continue support older kernels, we retain the terminating
463 * sentinel element for our sysctl tables, but instead detect availability of
464 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
465 * the kernel from trying to process the terminator. For pre-6.6 kernels that
466 * don't have register_sysctl_sz(), we just use register_sysctl(), which can
467 * handle the terminating element as it always has.
468 */
469 #ifdef HAVE_REGISTER_SYSCTL_SZ
470 #define spl_proc_register_sysctl(p, t) \
471 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
472 #else
473 #define spl_proc_register_sysctl(p, t) \
474 register_sysctl(p, t)
475 #endif
476 #endif
477
478 int
spl_proc_init(void)479 spl_proc_init(void)
480 {
481 int rc = 0;
482
483 #ifdef HAVE_REGISTER_SYSCTL_TABLE
484 spl_header = register_sysctl_table(spl_root);
485 if (spl_header == NULL)
486 return (-EUNATCH);
487 #else
488 spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
489 if (spl_header == NULL)
490 return (-EUNATCH);
491
492 spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
493 if (spl_kmem == NULL) {
494 rc = -EUNATCH;
495 goto out;
496 }
497 spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
498 spl_kstat_table);
499 if (spl_kstat == NULL) {
500 rc = -EUNATCH;
501 goto out;
502 }
503 #endif
504
505 proc_spl = proc_mkdir("spl", NULL);
506 if (proc_spl == NULL) {
507 rc = -EUNATCH;
508 goto out;
509 }
510
511 proc_spl_kmem = proc_mkdir("kmem", proc_spl);
512 if (proc_spl_kmem == NULL) {
513 rc = -EUNATCH;
514 goto out;
515 }
516
517 proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
518 &proc_slab_operations, NULL);
519 if (proc_spl_kmem_slab == NULL) {
520 rc = -EUNATCH;
521 goto out;
522 }
523
524 proc_spl_kstat = proc_mkdir("kstat", proc_spl);
525 if (proc_spl_kstat == NULL) {
526 rc = -EUNATCH;
527 goto out;
528 }
529 out:
530 if (rc)
531 spl_proc_cleanup();
532
533 return (rc);
534 }
535
536 void
spl_proc_fini(void)537 spl_proc_fini(void)
538 {
539 spl_proc_cleanup();
540 }
541