1 /*
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *
23 * Solaris Porting Layer (SPL) Proc Implementation.
24 */
25 /*
26 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
27 */
28
29 #include <sys/systeminfo.h>
30 #include <sys/kstat.h>
31 #include <sys/kmem.h>
32 #include <sys/kmem_cache.h>
33 #include <sys/vmem.h>
34 #include <sys/proc.h>
35 #include <linux/ctype.h>
36 #include <linux/kmod.h>
37 #include <linux/seq_file.h>
38 #include <linux/uaccess.h>
39 #include <linux/version.h>
40 #include "zfs_gitrev.h"
41
42 #if defined(CONSTIFY_PLUGIN)
43 typedef struct ctl_table __no_const spl_ctl_table;
44 #else
45 typedef struct ctl_table spl_ctl_table;
46 #endif
47
48 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
49 #define CONST_CTL_TABLE const struct ctl_table
50 #else
51 #define CONST_CTL_TABLE struct ctl_table
52 #endif
53
54 static unsigned long table_min = 0;
55 static unsigned long table_max = ~0;
56
57 static struct ctl_table_header *spl_header = NULL;
58 #ifndef HAVE_REGISTER_SYSCTL_TABLE
59 static struct ctl_table_header *spl_kmem = NULL;
60 static struct ctl_table_header *spl_kstat = NULL;
61 #endif
62 static struct proc_dir_entry *proc_spl = NULL;
63 static struct proc_dir_entry *proc_spl_kmem = NULL;
64 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
65 struct proc_dir_entry *proc_spl_kstat = NULL;
66
67 #ifdef DEBUG_KMEM
68 static int
proc_domemused(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)69 proc_domemused(CONST_CTL_TABLE *table, int write,
70 void __user *buffer, size_t *lenp, loff_t *ppos)
71 {
72 int rc = 0;
73 unsigned long val;
74 spl_ctl_table dummy = *table;
75
76 dummy.data = &val;
77 dummy.proc_handler = &proc_dointvec;
78 dummy.extra1 = &table_min;
79 dummy.extra2 = &table_max;
80
81 if (write) {
82 *ppos += *lenp;
83 } else {
84 #ifdef HAVE_ATOMIC64_T
85 val = atomic64_read((atomic64_t *)table->data);
86 #else
87 val = atomic_read((atomic_t *)table->data);
88 #endif /* HAVE_ATOMIC64_T */
89 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
90 }
91
92 return (rc);
93 }
94 #endif /* DEBUG_KMEM */
95
96 static int
proc_doslab(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)97 proc_doslab(CONST_CTL_TABLE *table, int write,
98 void __user *buffer, size_t *lenp, loff_t *ppos)
99 {
100 int rc = 0;
101 unsigned long val = 0, mask;
102 spl_ctl_table dummy = *table;
103 spl_kmem_cache_t *skc = NULL;
104
105 dummy.data = &val;
106 dummy.proc_handler = &proc_dointvec;
107 dummy.extra1 = &table_min;
108 dummy.extra2 = &table_max;
109
110 if (write) {
111 *ppos += *lenp;
112 } else {
113 down_read(&spl_kmem_cache_sem);
114 mask = (unsigned long)table->data;
115
116 list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
117
118 /* Only use slabs of the correct kmem/vmem type */
119 if (!(skc->skc_flags & mask))
120 continue;
121
122 /* Sum the specified field for selected slabs */
123 switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
124 case KMC_TOTAL:
125 val += skc->skc_slab_size * skc->skc_slab_total;
126 break;
127 case KMC_ALLOC:
128 val += skc->skc_obj_size * skc->skc_obj_alloc;
129 break;
130 case KMC_MAX:
131 val += skc->skc_obj_size * skc->skc_obj_max;
132 break;
133 }
134 }
135
136 up_read(&spl_kmem_cache_sem);
137 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
138 }
139
140 return (rc);
141 }
142
143 static int
proc_dohostid(CONST_CTL_TABLE * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)144 proc_dohostid(CONST_CTL_TABLE *table, int write,
145 void __user *buffer, size_t *lenp, loff_t *ppos)
146 {
147 char *end, str[32];
148 unsigned long hid;
149 spl_ctl_table dummy = *table;
150
151 dummy.data = str;
152 dummy.maxlen = sizeof (str) - 1;
153
154 if (!write)
155 snprintf(str, sizeof (str), "%lx",
156 (unsigned long) zone_get_hostid(NULL));
157
158 /* always returns 0 */
159 proc_dostring(&dummy, write, buffer, lenp, ppos);
160
161 if (write) {
162 /*
163 * We can't use proc_doulongvec_minmax() in the write
164 * case here because hostid, while a hex value, has no
165 * leading 0x, which confuses the helper function.
166 */
167
168 hid = simple_strtoul(str, &end, 16);
169 if (str == end)
170 return (-EINVAL);
171 spl_hostid = hid;
172 }
173
174 return (0);
175 }
176
177 static void
slab_seq_show_headers(struct seq_file * f)178 slab_seq_show_headers(struct seq_file *f)
179 {
180 seq_printf(f,
181 "--------------------- cache ----------"
182 "--------------------------------------------- "
183 "----- slab ------ "
184 "---- object ----- "
185 "--- emergency ---\n");
186 seq_printf(f,
187 "name "
188 " flags size alloc slabsize objsize "
189 "total alloc max "
190 "total alloc max "
191 "dlock alloc max\n");
192 }
193
194 static int
slab_seq_show(struct seq_file * f,void * p)195 slab_seq_show(struct seq_file *f, void *p)
196 {
197 spl_kmem_cache_t *skc = p;
198
199 ASSERT(skc->skc_magic == SKC_MAGIC);
200
201 if (skc->skc_flags & KMC_SLAB) {
202 /*
203 * This cache is backed by a generic Linux kmem cache which
204 * has its own accounting. For these caches we only track
205 * the number of active allocated objects that exist within
206 * the underlying Linux slabs. For the overall statistics of
207 * the underlying Linux cache please refer to /proc/slabinfo.
208 */
209 spin_lock(&skc->skc_lock);
210 uint64_t objs_allocated =
211 percpu_counter_sum(&skc->skc_linux_alloc);
212 seq_printf(f, "%-36s ", skc->skc_name);
213 seq_printf(f, "0x%05lx %9s %9lu %8s %8u "
214 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n",
215 (long unsigned)skc->skc_flags,
216 "-",
217 (long unsigned)(skc->skc_obj_size * objs_allocated),
218 "-",
219 (unsigned)skc->skc_obj_size,
220 "-", "-", "-", "-",
221 (long unsigned)objs_allocated,
222 "-", "-", "-", "-");
223 spin_unlock(&skc->skc_lock);
224 return (0);
225 }
226
227 spin_lock(&skc->skc_lock);
228 seq_printf(f, "%-36s ", skc->skc_name);
229 seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
230 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
231 (long unsigned)skc->skc_flags,
232 (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
233 (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
234 (unsigned)skc->skc_slab_size,
235 (unsigned)skc->skc_obj_size,
236 (long unsigned)skc->skc_slab_total,
237 (long unsigned)skc->skc_slab_alloc,
238 (long unsigned)skc->skc_slab_max,
239 (long unsigned)skc->skc_obj_total,
240 (long unsigned)skc->skc_obj_alloc,
241 (long unsigned)skc->skc_obj_max,
242 (long unsigned)skc->skc_obj_deadlock,
243 (long unsigned)skc->skc_obj_emergency,
244 (long unsigned)skc->skc_obj_emergency_max);
245 spin_unlock(&skc->skc_lock);
246 return (0);
247 }
248
249 static void *
slab_seq_start(struct seq_file * f,loff_t * pos)250 slab_seq_start(struct seq_file *f, loff_t *pos)
251 {
252 struct list_head *p;
253 loff_t n = *pos;
254
255 down_read(&spl_kmem_cache_sem);
256 if (!n)
257 slab_seq_show_headers(f);
258
259 p = spl_kmem_cache_list.next;
260 while (n--) {
261 p = p->next;
262 if (p == &spl_kmem_cache_list)
263 return (NULL);
264 }
265
266 return (list_entry(p, spl_kmem_cache_t, skc_list));
267 }
268
269 static void *
slab_seq_next(struct seq_file * f,void * p,loff_t * pos)270 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
271 {
272 spl_kmem_cache_t *skc = p;
273
274 ++*pos;
275 return ((skc->skc_list.next == &spl_kmem_cache_list) ?
276 NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
277 }
278
279 static void
slab_seq_stop(struct seq_file * f,void * v)280 slab_seq_stop(struct seq_file *f, void *v)
281 {
282 up_read(&spl_kmem_cache_sem);
283 }
284
285 static const struct seq_operations slab_seq_ops = {
286 .show = slab_seq_show,
287 .start = slab_seq_start,
288 .next = slab_seq_next,
289 .stop = slab_seq_stop,
290 };
291
292 static int
proc_slab_open(struct inode * inode,struct file * filp)293 proc_slab_open(struct inode *inode, struct file *filp)
294 {
295 return (seq_open(filp, &slab_seq_ops));
296 }
297
298 static const kstat_proc_op_t proc_slab_operations = {
299 #ifdef HAVE_PROC_OPS_STRUCT
300 .proc_open = proc_slab_open,
301 .proc_read = seq_read,
302 .proc_lseek = seq_lseek,
303 .proc_release = seq_release,
304 #else
305 .open = proc_slab_open,
306 .read = seq_read,
307 .llseek = seq_lseek,
308 .release = seq_release,
309 #endif
310 };
311
312 static struct ctl_table spl_kmem_table[] = {
313 #ifdef DEBUG_KMEM
314 {
315 .procname = "kmem_used",
316 .data = &kmem_alloc_used,
317 #ifdef HAVE_ATOMIC64_T
318 .maxlen = sizeof (atomic64_t),
319 #else
320 .maxlen = sizeof (atomic_t),
321 #endif /* HAVE_ATOMIC64_T */
322 .mode = 0444,
323 .proc_handler = &proc_domemused,
324 },
325 {
326 .procname = "kmem_max",
327 .data = &kmem_alloc_max,
328 .maxlen = sizeof (unsigned long),
329 .extra1 = &table_min,
330 .extra2 = &table_max,
331 .mode = 0444,
332 .proc_handler = &proc_doulongvec_minmax,
333 },
334 #endif /* DEBUG_KMEM */
335 {
336 .procname = "slab_kvmem_total",
337 .data = (void *)(KMC_KVMEM | KMC_TOTAL),
338 .maxlen = sizeof (unsigned long),
339 .extra1 = &table_min,
340 .extra2 = &table_max,
341 .mode = 0444,
342 .proc_handler = &proc_doslab,
343 },
344 {
345 .procname = "slab_kvmem_alloc",
346 .data = (void *)(KMC_KVMEM | KMC_ALLOC),
347 .maxlen = sizeof (unsigned long),
348 .extra1 = &table_min,
349 .extra2 = &table_max,
350 .mode = 0444,
351 .proc_handler = &proc_doslab,
352 },
353 {
354 .procname = "slab_kvmem_max",
355 .data = (void *)(KMC_KVMEM | KMC_MAX),
356 .maxlen = sizeof (unsigned long),
357 .extra1 = &table_min,
358 .extra2 = &table_max,
359 .mode = 0444,
360 .proc_handler = &proc_doslab,
361 },
362 {},
363 };
364
365 static struct ctl_table spl_kstat_table[] = {
366 {},
367 };
368
369 static struct ctl_table spl_table[] = {
370 /*
371 * NB No .strategy entries have been provided since
372 * sysctl(8) prefers to go via /proc for portability.
373 */
374 {
375 .procname = "gitrev",
376 .data = (char *)ZFS_META_GITREV,
377 .maxlen = sizeof (ZFS_META_GITREV),
378 .mode = 0444,
379 .proc_handler = &proc_dostring,
380 },
381 {
382 .procname = "hostid",
383 .data = &spl_hostid,
384 .maxlen = sizeof (unsigned long),
385 .mode = 0644,
386 .proc_handler = &proc_dohostid,
387 },
388 #ifdef HAVE_REGISTER_SYSCTL_TABLE
389 {
390 .procname = "kmem",
391 .mode = 0555,
392 .child = spl_kmem_table,
393 },
394 {
395 .procname = "kstat",
396 .mode = 0555,
397 .child = spl_kstat_table,
398 },
399 #endif
400 {},
401 };
402
403 #ifdef HAVE_REGISTER_SYSCTL_TABLE
404 static struct ctl_table spl_dir[] = {
405 {
406 .procname = "spl",
407 .mode = 0555,
408 .child = spl_table,
409 },
410 {}
411 };
412
413 static struct ctl_table spl_root[] = {
414 {
415 .procname = "kernel",
416 .mode = 0555,
417 .child = spl_dir,
418 },
419 {}
420 };
421 #endif
422
spl_proc_cleanup(void)423 static void spl_proc_cleanup(void)
424 {
425 remove_proc_entry("kstat", proc_spl);
426 remove_proc_entry("slab", proc_spl_kmem);
427 remove_proc_entry("kmem", proc_spl);
428 remove_proc_entry("spl", NULL);
429
430 #ifndef HAVE_REGISTER_SYSCTL_TABLE
431 if (spl_kstat) {
432 unregister_sysctl_table(spl_kstat);
433 spl_kstat = NULL;
434 }
435 if (spl_kmem) {
436 unregister_sysctl_table(spl_kmem);
437 spl_kmem = NULL;
438 }
439 #endif
440 if (spl_header) {
441 unregister_sysctl_table(spl_header);
442 spl_header = NULL;
443 }
444 }
445
446 #ifndef HAVE_REGISTER_SYSCTL_TABLE
447
448 /*
449 * Traditionally, struct ctl_table arrays have been terminated by an "empty"
450 * sentinel element (specifically, one with .procname == NULL).
451 *
452 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
453 * that callers could provide the size directly, and redefining
454 * register_sysctl() to just call register_sysctl_sz() with the array size. It
455 * retained support for the terminating element so that existing callers would
456 * continue to work.
457 *
458 * Linux 6.11 removed support for the terminating element, instead interpreting
459 * it as a real malformed element, and rejecting it.
460 *
461 * In order to continue support older kernels, we retain the terminating
462 * sentinel element for our sysctl tables, but instead detect availability of
463 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
464 * the kernel from trying to process the terminator. For pre-6.6 kernels that
465 * don't have register_sysctl_sz(), we just use register_sysctl(), which can
466 * handle the terminating element as it always has.
467 */
468 #ifdef HAVE_REGISTER_SYSCTL_SZ
469 #define spl_proc_register_sysctl(p, t) \
470 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
471 #else
472 #define spl_proc_register_sysctl(p, t) \
473 register_sysctl(p, t)
474 #endif
475 #endif
476
477 int
spl_proc_init(void)478 spl_proc_init(void)
479 {
480 int rc = 0;
481
482 #ifdef HAVE_REGISTER_SYSCTL_TABLE
483 spl_header = register_sysctl_table(spl_root);
484 if (spl_header == NULL)
485 return (-EUNATCH);
486 #else
487 spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
488 if (spl_header == NULL)
489 return (-EUNATCH);
490
491 spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
492 if (spl_kmem == NULL) {
493 rc = -EUNATCH;
494 goto out;
495 }
496 spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
497 spl_kstat_table);
498 if (spl_kstat == NULL) {
499 rc = -EUNATCH;
500 goto out;
501 }
502 #endif
503
504 proc_spl = proc_mkdir("spl", NULL);
505 if (proc_spl == NULL) {
506 rc = -EUNATCH;
507 goto out;
508 }
509
510 proc_spl_kmem = proc_mkdir("kmem", proc_spl);
511 if (proc_spl_kmem == NULL) {
512 rc = -EUNATCH;
513 goto out;
514 }
515
516 proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
517 &proc_slab_operations, NULL);
518 if (proc_spl_kmem_slab == NULL) {
519 rc = -EUNATCH;
520 goto out;
521 }
522
523 proc_spl_kstat = proc_mkdir("kstat", proc_spl);
524 if (proc_spl_kstat == NULL) {
525 rc = -EUNATCH;
526 goto out;
527 }
528 out:
529 if (rc)
530 spl_proc_cleanup();
531
532 return (rc);
533 }
534
535 void
spl_proc_fini(void)536 spl_proc_fini(void)
537 {
538 spl_proc_cleanup();
539 }
540