1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 4 * Copyright (C) 2007 The Regents of the University of California. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 7 * UCRL-CODE-235197 8 * 9 * This file is part of the SPL, Solaris Porting Layer. 10 * 11 * The SPL is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU General Public License as published by the 13 * Free Software Foundation; either version 2 of the License, or (at your 14 * option) any later version. 15 * 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 * for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 23 * 24 * Solaris Porting Layer (SPL) Proc Implementation. 25 */ 26 /* 27 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> 28 */ 29 30 #include <sys/systeminfo.h> 31 #include <sys/kstat.h> 32 #include <sys/kmem.h> 33 #include <sys/kmem_cache.h> 34 #include <sys/vmem.h> 35 #include <sys/proc.h> 36 #include <linux/ctype.h> 37 #include <linux/kmod.h> 38 #include <linux/seq_file.h> 39 #include <linux/uaccess.h> 40 #include <linux/version.h> 41 #include "zfs_gitrev.h" 42 43 #if defined(CONSTIFY_PLUGIN) 44 typedef struct ctl_table __no_const spl_ctl_table; 45 #else 46 typedef struct ctl_table spl_ctl_table; 47 #endif 48 49 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST 50 #define CONST_CTL_TABLE const struct ctl_table 51 #else 52 #define CONST_CTL_TABLE struct ctl_table 53 #endif 54 55 static unsigned long table_min = 0; 56 static unsigned long table_max = ~0; 57 58 static struct ctl_table_header *spl_header = NULL; 59 #ifndef HAVE_REGISTER_SYSCTL_TABLE 60 static struct ctl_table_header *spl_kmem = NULL; 61 static struct ctl_table_header *spl_kstat = NULL; 62 #endif 63 static struct proc_dir_entry *proc_spl = NULL; 64 static struct proc_dir_entry *proc_spl_kmem = NULL; 65 static struct proc_dir_entry *proc_spl_kmem_slab = NULL; 66 struct proc_dir_entry *proc_spl_kstat = NULL; 67 68 #ifdef DEBUG_KMEM 69 static int 70 proc_domemused(CONST_CTL_TABLE *table, int write, 71 void __user *buffer, size_t *lenp, loff_t *ppos) 72 { 73 int rc = 0; 74 unsigned long val; 75 spl_ctl_table dummy = *table; 76 77 dummy.data = &val; 78 dummy.proc_handler = &proc_dointvec; 79 dummy.extra1 = &table_min; 80 dummy.extra2 = &table_max; 81 82 if (write) { 83 *ppos += *lenp; 84 } else { 85 #ifdef HAVE_ATOMIC64_T 86 val = atomic64_read((atomic64_t *)table->data); 87 #else 88 val = atomic_read((atomic_t *)table->data); 89 #endif /* HAVE_ATOMIC64_T */ 90 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 91 } 92 93 return (rc); 94 } 95 #endif /* DEBUG_KMEM */ 96 97 static int 98 proc_doslab(CONST_CTL_TABLE *table, int write, 99 void __user *buffer, size_t *lenp, loff_t *ppos) 100 { 101 int rc = 0; 102 unsigned long val = 0, mask; 103 spl_ctl_table dummy = *table; 104 spl_kmem_cache_t *skc = NULL; 105 106 dummy.data = &val; 107 dummy.proc_handler = &proc_dointvec; 108 dummy.extra1 = &table_min; 109 dummy.extra2 = &table_max; 110 111 if (write) { 112 *ppos += *lenp; 113 } else { 114 down_read(&spl_kmem_cache_sem); 115 mask = (unsigned long)table->data; 116 117 list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { 118 119 /* Only use slabs of the correct kmem/vmem type */ 120 if (!(skc->skc_flags & mask)) 121 continue; 122 123 /* Sum the specified field for selected slabs */ 124 switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) { 125 case KMC_TOTAL: 126 val += skc->skc_slab_size * skc->skc_slab_total; 127 break; 128 case KMC_ALLOC: 129 val += skc->skc_obj_size * skc->skc_obj_alloc; 130 break; 131 case KMC_MAX: 132 val += skc->skc_obj_size * skc->skc_obj_max; 133 break; 134 } 135 } 136 137 up_read(&spl_kmem_cache_sem); 138 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 139 } 140 141 return (rc); 142 } 143 144 static int 145 proc_dohostid(CONST_CTL_TABLE *table, int write, 146 void __user *buffer, size_t *lenp, loff_t *ppos) 147 { 148 char *end, str[32]; 149 unsigned long hid; 150 spl_ctl_table dummy = *table; 151 152 dummy.data = str; 153 dummy.maxlen = sizeof (str) - 1; 154 155 if (!write) 156 snprintf(str, sizeof (str), "%lx", 157 (unsigned long) zone_get_hostid(NULL)); 158 159 /* always returns 0 */ 160 proc_dostring(&dummy, write, buffer, lenp, ppos); 161 162 if (write) { 163 /* 164 * We can't use proc_doulongvec_minmax() in the write 165 * case here because hostid, while a hex value, has no 166 * leading 0x, which confuses the helper function. 167 */ 168 169 hid = simple_strtoul(str, &end, 16); 170 if (str == end) 171 return (-EINVAL); 172 spl_hostid = hid; 173 } 174 175 return (0); 176 } 177 178 static void 179 slab_seq_show_headers(struct seq_file *f) 180 { 181 seq_printf(f, 182 "--------------------- cache ----------" 183 "--------------------------------------------- " 184 "----- slab ------ " 185 "---- object ----- " 186 "--- emergency ---\n"); 187 seq_printf(f, 188 "name " 189 " flags size alloc slabsize objsize " 190 "total alloc max " 191 "total alloc max " 192 "dlock alloc max\n"); 193 } 194 195 static int 196 slab_seq_show(struct seq_file *f, void *p) 197 { 198 spl_kmem_cache_t *skc = p; 199 200 ASSERT(skc->skc_magic == SKC_MAGIC); 201 202 if (skc->skc_flags & KMC_SLAB) { 203 /* 204 * This cache is backed by a generic Linux kmem cache which 205 * has its own accounting. For these caches we only track 206 * the number of active allocated objects that exist within 207 * the underlying Linux slabs. For the overall statistics of 208 * the underlying Linux cache please refer to /proc/slabinfo. 209 */ 210 spin_lock(&skc->skc_lock); 211 uint64_t objs_allocated = 212 percpu_counter_sum(&skc->skc_linux_alloc); 213 seq_printf(f, "%-36s ", skc->skc_name); 214 seq_printf(f, "0x%05lx %9s %9lu %8s %8u " 215 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", 216 (long unsigned)skc->skc_flags, 217 "-", 218 (long unsigned)(skc->skc_obj_size * objs_allocated), 219 "-", 220 (unsigned)skc->skc_obj_size, 221 "-", "-", "-", "-", 222 (long unsigned)objs_allocated, 223 "-", "-", "-", "-"); 224 spin_unlock(&skc->skc_lock); 225 return (0); 226 } 227 228 spin_lock(&skc->skc_lock); 229 seq_printf(f, "%-36s ", skc->skc_name); 230 seq_printf(f, "0x%05lx %9lu %9lu %8u %8u " 231 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n", 232 (long unsigned)skc->skc_flags, 233 (long unsigned)(skc->skc_slab_size * skc->skc_slab_total), 234 (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), 235 (unsigned)skc->skc_slab_size, 236 (unsigned)skc->skc_obj_size, 237 (long unsigned)skc->skc_slab_total, 238 (long unsigned)skc->skc_slab_alloc, 239 (long unsigned)skc->skc_slab_max, 240 (long unsigned)skc->skc_obj_total, 241 (long unsigned)skc->skc_obj_alloc, 242 (long unsigned)skc->skc_obj_max, 243 (long unsigned)skc->skc_obj_deadlock, 244 (long unsigned)skc->skc_obj_emergency, 245 (long unsigned)skc->skc_obj_emergency_max); 246 spin_unlock(&skc->skc_lock); 247 return (0); 248 } 249 250 static void * 251 slab_seq_start(struct seq_file *f, loff_t *pos) 252 { 253 struct list_head *p; 254 loff_t n = *pos; 255 256 down_read(&spl_kmem_cache_sem); 257 if (!n) 258 slab_seq_show_headers(f); 259 260 p = spl_kmem_cache_list.next; 261 while (n--) { 262 p = p->next; 263 if (p == &spl_kmem_cache_list) 264 return (NULL); 265 } 266 267 return (list_entry(p, spl_kmem_cache_t, skc_list)); 268 } 269 270 static void * 271 slab_seq_next(struct seq_file *f, void *p, loff_t *pos) 272 { 273 spl_kmem_cache_t *skc = p; 274 275 ++*pos; 276 return ((skc->skc_list.next == &spl_kmem_cache_list) ? 277 NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list)); 278 } 279 280 static void 281 slab_seq_stop(struct seq_file *f, void *v) 282 { 283 up_read(&spl_kmem_cache_sem); 284 } 285 286 static const struct seq_operations slab_seq_ops = { 287 .show = slab_seq_show, 288 .start = slab_seq_start, 289 .next = slab_seq_next, 290 .stop = slab_seq_stop, 291 }; 292 293 static int 294 proc_slab_open(struct inode *inode, struct file *filp) 295 { 296 return (seq_open(filp, &slab_seq_ops)); 297 } 298 299 static const kstat_proc_op_t proc_slab_operations = { 300 #ifdef HAVE_PROC_OPS_STRUCT 301 .proc_open = proc_slab_open, 302 .proc_read = seq_read, 303 .proc_lseek = seq_lseek, 304 .proc_release = seq_release, 305 #else 306 .open = proc_slab_open, 307 .read = seq_read, 308 .llseek = seq_lseek, 309 .release = seq_release, 310 #endif 311 }; 312 313 static struct ctl_table spl_kmem_table[] = { 314 #ifdef DEBUG_KMEM 315 { 316 .procname = "kmem_used", 317 .data = &kmem_alloc_used, 318 #ifdef HAVE_ATOMIC64_T 319 .maxlen = sizeof (atomic64_t), 320 #else 321 .maxlen = sizeof (atomic_t), 322 #endif /* HAVE_ATOMIC64_T */ 323 .mode = 0444, 324 .proc_handler = &proc_domemused, 325 }, 326 { 327 .procname = "kmem_max", 328 .data = &kmem_alloc_max, 329 .maxlen = sizeof (unsigned long), 330 .extra1 = &table_min, 331 .extra2 = &table_max, 332 .mode = 0444, 333 .proc_handler = &proc_doulongvec_minmax, 334 }, 335 #endif /* DEBUG_KMEM */ 336 { 337 .procname = "slab_kvmem_total", 338 .data = (void *)(KMC_KVMEM | KMC_TOTAL), 339 .maxlen = sizeof (unsigned long), 340 .extra1 = &table_min, 341 .extra2 = &table_max, 342 .mode = 0444, 343 .proc_handler = &proc_doslab, 344 }, 345 { 346 .procname = "slab_kvmem_alloc", 347 .data = (void *)(KMC_KVMEM | KMC_ALLOC), 348 .maxlen = sizeof (unsigned long), 349 .extra1 = &table_min, 350 .extra2 = &table_max, 351 .mode = 0444, 352 .proc_handler = &proc_doslab, 353 }, 354 { 355 .procname = "slab_kvmem_max", 356 .data = (void *)(KMC_KVMEM | KMC_MAX), 357 .maxlen = sizeof (unsigned long), 358 .extra1 = &table_min, 359 .extra2 = &table_max, 360 .mode = 0444, 361 .proc_handler = &proc_doslab, 362 }, 363 {}, 364 }; 365 366 static struct ctl_table spl_kstat_table[] = { 367 {}, 368 }; 369 370 static struct ctl_table spl_table[] = { 371 /* 372 * NB No .strategy entries have been provided since 373 * sysctl(8) prefers to go via /proc for portability. 374 */ 375 { 376 .procname = "gitrev", 377 .data = (char *)ZFS_META_GITREV, 378 .maxlen = sizeof (ZFS_META_GITREV), 379 .mode = 0444, 380 .proc_handler = &proc_dostring, 381 }, 382 { 383 .procname = "hostid", 384 .data = &spl_hostid, 385 .maxlen = sizeof (unsigned long), 386 .mode = 0644, 387 .proc_handler = &proc_dohostid, 388 }, 389 #ifdef HAVE_REGISTER_SYSCTL_TABLE 390 { 391 .procname = "kmem", 392 .mode = 0555, 393 .child = spl_kmem_table, 394 }, 395 { 396 .procname = "kstat", 397 .mode = 0555, 398 .child = spl_kstat_table, 399 }, 400 #endif 401 {}, 402 }; 403 404 #ifdef HAVE_REGISTER_SYSCTL_TABLE 405 static struct ctl_table spl_dir[] = { 406 { 407 .procname = "spl", 408 .mode = 0555, 409 .child = spl_table, 410 }, 411 {} 412 }; 413 414 static struct ctl_table spl_root[] = { 415 { 416 .procname = "kernel", 417 .mode = 0555, 418 .child = spl_dir, 419 }, 420 {} 421 }; 422 #endif 423 424 static void spl_proc_cleanup(void) 425 { 426 remove_proc_entry("kstat", proc_spl); 427 remove_proc_entry("slab", proc_spl_kmem); 428 remove_proc_entry("kmem", proc_spl); 429 remove_proc_entry("spl", NULL); 430 431 #ifndef HAVE_REGISTER_SYSCTL_TABLE 432 if (spl_kstat) { 433 unregister_sysctl_table(spl_kstat); 434 spl_kstat = NULL; 435 } 436 if (spl_kmem) { 437 unregister_sysctl_table(spl_kmem); 438 spl_kmem = NULL; 439 } 440 #endif 441 if (spl_header) { 442 unregister_sysctl_table(spl_header); 443 spl_header = NULL; 444 } 445 } 446 447 #ifndef HAVE_REGISTER_SYSCTL_TABLE 448 449 /* 450 * Traditionally, struct ctl_table arrays have been terminated by an "empty" 451 * sentinel element (specifically, one with .procname == NULL). 452 * 453 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so 454 * that callers could provide the size directly, and redefining 455 * register_sysctl() to just call register_sysctl_sz() with the array size. It 456 * retained support for the terminating element so that existing callers would 457 * continue to work. 458 * 459 * Linux 6.11 removed support for the terminating element, instead interpreting 460 * it as a real malformed element, and rejecting it. 461 * 462 * In order to continue support older kernels, we retain the terminating 463 * sentinel element for our sysctl tables, but instead detect availability of 464 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping 465 * the kernel from trying to process the terminator. For pre-6.6 kernels that 466 * don't have register_sysctl_sz(), we just use register_sysctl(), which can 467 * handle the terminating element as it always has. 468 */ 469 #ifdef HAVE_REGISTER_SYSCTL_SZ 470 #define spl_proc_register_sysctl(p, t) \ 471 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1) 472 #else 473 #define spl_proc_register_sysctl(p, t) \ 474 register_sysctl(p, t) 475 #endif 476 #endif 477 478 int 479 spl_proc_init(void) 480 { 481 int rc = 0; 482 483 #ifdef HAVE_REGISTER_SYSCTL_TABLE 484 spl_header = register_sysctl_table(spl_root); 485 if (spl_header == NULL) 486 return (-EUNATCH); 487 #else 488 spl_header = spl_proc_register_sysctl("kernel/spl", spl_table); 489 if (spl_header == NULL) 490 return (-EUNATCH); 491 492 spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table); 493 if (spl_kmem == NULL) { 494 rc = -EUNATCH; 495 goto out; 496 } 497 spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat", 498 spl_kstat_table); 499 if (spl_kstat == NULL) { 500 rc = -EUNATCH; 501 goto out; 502 } 503 #endif 504 505 proc_spl = proc_mkdir("spl", NULL); 506 if (proc_spl == NULL) { 507 rc = -EUNATCH; 508 goto out; 509 } 510 511 proc_spl_kmem = proc_mkdir("kmem", proc_spl); 512 if (proc_spl_kmem == NULL) { 513 rc = -EUNATCH; 514 goto out; 515 } 516 517 proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem, 518 &proc_slab_operations, NULL); 519 if (proc_spl_kmem_slab == NULL) { 520 rc = -EUNATCH; 521 goto out; 522 } 523 524 proc_spl_kstat = proc_mkdir("kstat", proc_spl); 525 if (proc_spl_kstat == NULL) { 526 rc = -EUNATCH; 527 goto out; 528 } 529 out: 530 if (rc) 531 spl_proc_cleanup(); 532 533 return (rc); 534 } 535 536 void 537 spl_proc_fini(void) 538 { 539 spl_proc_cleanup(); 540 } 541