1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 4 * Copyright (C) 2007 The Regents of the University of California. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 7 * UCRL-CODE-235197 8 * 9 * This file is part of the SPL, Solaris Porting Layer. 10 * 11 * The SPL is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU General Public License as published by the 13 * Free Software Foundation; either version 2 of the License, or (at your 14 * option) any later version. 15 * 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 * for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 23 * 24 * Solaris Porting Layer (SPL) Proc Implementation. 25 */ 26 /* 27 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> 28 */ 29 30 #include <sys/systeminfo.h> 31 #include <sys/kstat.h> 32 #include <sys/kmem.h> 33 #include <sys/kmem_cache.h> 34 #include <sys/vmem.h> 35 #include <sys/proc.h> 36 #include <linux/ctype.h> 37 #include <linux/kmod.h> 38 #include <linux/seq_file.h> 39 #include <linux/uaccess.h> 40 #include <linux/version.h> 41 #include "zfs_gitrev.h" 42 43 #if defined(CONSTIFY_PLUGIN) 44 typedef struct ctl_table __no_const spl_ctl_table; 45 #else 46 typedef struct ctl_table spl_ctl_table; 47 #endif 48 49 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST 50 #define CONST_CTL_TABLE const struct ctl_table 51 #else 52 #define CONST_CTL_TABLE struct ctl_table 53 #endif 54 55 static unsigned long table_min = 0; 56 static unsigned long table_max = ~0; 57 58 static struct ctl_table_header *spl_header = NULL; 59 #ifndef HAVE_REGISTER_SYSCTL_TABLE 60 static struct ctl_table_header *spl_kmem = NULL; 61 static struct ctl_table_header *spl_kstat = NULL; 62 #endif 63 static struct proc_dir_entry *proc_spl = NULL; 64 static struct proc_dir_entry *proc_spl_kmem = NULL; 65 static struct proc_dir_entry *proc_spl_kmem_slab = NULL; 66 struct proc_dir_entry *proc_spl_kstat = NULL; 67 68 #ifdef DEBUG_KMEM 69 static int 70 proc_domemused(CONST_CTL_TABLE *table, int write, 71 void __user *buffer, size_t *lenp, loff_t *ppos) 72 { 73 int rc = 0; 74 unsigned long val; 75 spl_ctl_table dummy = *table; 76 77 dummy.data = &val; 78 dummy.proc_handler = &proc_dointvec; 79 dummy.extra1 = &table_min; 80 dummy.extra2 = &table_max; 81 82 if (write) { 83 *ppos += *lenp; 84 } else { 85 val = atomic64_read((atomic64_t *)table->data); 86 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 87 } 88 89 return (rc); 90 } 91 #endif /* DEBUG_KMEM */ 92 93 static int 94 proc_doslab(CONST_CTL_TABLE *table, int write, 95 void __user *buffer, size_t *lenp, loff_t *ppos) 96 { 97 int rc = 0; 98 unsigned long val = 0, mask; 99 spl_ctl_table dummy = *table; 100 spl_kmem_cache_t *skc = NULL; 101 102 dummy.data = &val; 103 dummy.proc_handler = &proc_dointvec; 104 dummy.extra1 = &table_min; 105 dummy.extra2 = &table_max; 106 107 if (write) { 108 *ppos += *lenp; 109 } else { 110 down_read(&spl_kmem_cache_sem); 111 mask = (unsigned long)table->data; 112 113 list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { 114 115 /* Only use slabs of the correct kmem/vmem type */ 116 if (!(skc->skc_flags & mask)) 117 continue; 118 119 /* Sum the specified field for selected slabs */ 120 switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) { 121 case KMC_TOTAL: 122 val += skc->skc_slab_size * skc->skc_slab_total; 123 break; 124 case KMC_ALLOC: 125 val += skc->skc_obj_size * skc->skc_obj_alloc; 126 break; 127 case KMC_MAX: 128 val += skc->skc_obj_size * skc->skc_obj_max; 129 break; 130 } 131 } 132 133 up_read(&spl_kmem_cache_sem); 134 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 135 } 136 137 return (rc); 138 } 139 140 static int 141 proc_dohostid(CONST_CTL_TABLE *table, int write, 142 void __user *buffer, size_t *lenp, loff_t *ppos) 143 { 144 char *end, str[32]; 145 unsigned long hid; 146 spl_ctl_table dummy = *table; 147 148 dummy.data = str; 149 dummy.maxlen = sizeof (str) - 1; 150 151 if (!write) 152 snprintf(str, sizeof (str), "%lx", 153 (unsigned long) zone_get_hostid(NULL)); 154 155 /* always returns 0 */ 156 proc_dostring(&dummy, write, buffer, lenp, ppos); 157 158 if (write) { 159 /* 160 * We can't use proc_doulongvec_minmax() in the write 161 * case here because hostid, while a hex value, has no 162 * leading 0x, which confuses the helper function. 163 */ 164 165 hid = simple_strtoul(str, &end, 16); 166 if (str == end) 167 return (-EINVAL); 168 spl_hostid = hid; 169 } 170 171 return (0); 172 } 173 174 static void 175 slab_seq_show_headers(struct seq_file *f) 176 { 177 seq_printf(f, 178 "--------------------- cache ----------" 179 "--------------------------------------------- " 180 "----- slab ------ " 181 "---- object ----- " 182 "--- emergency ---\n"); 183 seq_printf(f, 184 "name " 185 " flags size alloc slabsize objsize " 186 "total alloc max " 187 "total alloc max " 188 "dlock alloc max\n"); 189 } 190 191 static int 192 slab_seq_show(struct seq_file *f, void *p) 193 { 194 spl_kmem_cache_t *skc = p; 195 196 ASSERT(skc->skc_magic == SKC_MAGIC); 197 198 if (skc->skc_flags & KMC_SLAB) { 199 /* 200 * This cache is backed by a generic Linux kmem cache which 201 * has its own accounting. For these caches we only track 202 * the number of active allocated objects that exist within 203 * the underlying Linux slabs. For the overall statistics of 204 * the underlying Linux cache please refer to /proc/slabinfo. 205 */ 206 spin_lock(&skc->skc_lock); 207 uint64_t objs_allocated = 208 percpu_counter_sum(&skc->skc_linux_alloc); 209 seq_printf(f, "%-36s ", skc->skc_name); 210 seq_printf(f, "0x%05lx %9s %9lu %8s %8u " 211 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", 212 (long unsigned)skc->skc_flags, 213 "-", 214 (long unsigned)(skc->skc_obj_size * objs_allocated), 215 "-", 216 (unsigned)skc->skc_obj_size, 217 "-", "-", "-", "-", 218 (long unsigned)objs_allocated, 219 "-", "-", "-", "-"); 220 spin_unlock(&skc->skc_lock); 221 return (0); 222 } 223 224 spin_lock(&skc->skc_lock); 225 seq_printf(f, "%-36s ", skc->skc_name); 226 seq_printf(f, "0x%05lx %9lu %9lu %8u %8u " 227 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n", 228 (long unsigned)skc->skc_flags, 229 (long unsigned)(skc->skc_slab_size * skc->skc_slab_total), 230 (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), 231 (unsigned)skc->skc_slab_size, 232 (unsigned)skc->skc_obj_size, 233 (long unsigned)skc->skc_slab_total, 234 (long unsigned)skc->skc_slab_alloc, 235 (long unsigned)skc->skc_slab_max, 236 (long unsigned)skc->skc_obj_total, 237 (long unsigned)skc->skc_obj_alloc, 238 (long unsigned)skc->skc_obj_max, 239 (long unsigned)skc->skc_obj_deadlock, 240 (long unsigned)skc->skc_obj_emergency, 241 (long unsigned)skc->skc_obj_emergency_max); 242 spin_unlock(&skc->skc_lock); 243 return (0); 244 } 245 246 static void * 247 slab_seq_start(struct seq_file *f, loff_t *pos) 248 { 249 struct list_head *p; 250 loff_t n = *pos; 251 252 down_read(&spl_kmem_cache_sem); 253 if (!n) 254 slab_seq_show_headers(f); 255 256 p = spl_kmem_cache_list.next; 257 while (n--) { 258 p = p->next; 259 if (p == &spl_kmem_cache_list) 260 return (NULL); 261 } 262 263 return (list_entry(p, spl_kmem_cache_t, skc_list)); 264 } 265 266 static void * 267 slab_seq_next(struct seq_file *f, void *p, loff_t *pos) 268 { 269 spl_kmem_cache_t *skc = p; 270 271 ++*pos; 272 return ((skc->skc_list.next == &spl_kmem_cache_list) ? 273 NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list)); 274 } 275 276 static void 277 slab_seq_stop(struct seq_file *f, void *v) 278 { 279 up_read(&spl_kmem_cache_sem); 280 } 281 282 static const struct seq_operations slab_seq_ops = { 283 .show = slab_seq_show, 284 .start = slab_seq_start, 285 .next = slab_seq_next, 286 .stop = slab_seq_stop, 287 }; 288 289 static int 290 proc_slab_open(struct inode *inode, struct file *filp) 291 { 292 return (seq_open(filp, &slab_seq_ops)); 293 } 294 295 static const kstat_proc_op_t proc_slab_operations = { 296 #ifdef HAVE_PROC_OPS_STRUCT 297 .proc_open = proc_slab_open, 298 .proc_read = seq_read, 299 .proc_lseek = seq_lseek, 300 .proc_release = seq_release, 301 #else 302 .open = proc_slab_open, 303 .read = seq_read, 304 .llseek = seq_lseek, 305 .release = seq_release, 306 #endif 307 }; 308 309 static struct ctl_table spl_kmem_table[] = { 310 #ifdef DEBUG_KMEM 311 { 312 .procname = "kmem_used", 313 .data = &kmem_alloc_used, 314 .maxlen = sizeof (atomic64_t), 315 .mode = 0444, 316 .proc_handler = &proc_domemused, 317 }, 318 { 319 .procname = "kmem_max", 320 .data = &kmem_alloc_max, 321 .maxlen = sizeof (uint64_t), 322 .extra1 = &table_min, 323 .extra2 = &table_max, 324 .mode = 0444, 325 .proc_handler = &proc_doulongvec_minmax, 326 }, 327 #endif /* DEBUG_KMEM */ 328 { 329 .procname = "slab_kvmem_total", 330 .data = (void *)(KMC_KVMEM | KMC_TOTAL), 331 .maxlen = sizeof (unsigned long), 332 .extra1 = &table_min, 333 .extra2 = &table_max, 334 .mode = 0444, 335 .proc_handler = &proc_doslab, 336 }, 337 { 338 .procname = "slab_kvmem_alloc", 339 .data = (void *)(KMC_KVMEM | KMC_ALLOC), 340 .maxlen = sizeof (unsigned long), 341 .extra1 = &table_min, 342 .extra2 = &table_max, 343 .mode = 0444, 344 .proc_handler = &proc_doslab, 345 }, 346 { 347 .procname = "slab_kvmem_max", 348 .data = (void *)(KMC_KVMEM | KMC_MAX), 349 .maxlen = sizeof (unsigned long), 350 .extra1 = &table_min, 351 .extra2 = &table_max, 352 .mode = 0444, 353 .proc_handler = &proc_doslab, 354 }, 355 {}, 356 }; 357 358 static struct ctl_table spl_kstat_table[] = { 359 {}, 360 }; 361 362 static struct ctl_table spl_table[] = { 363 /* 364 * NB No .strategy entries have been provided since 365 * sysctl(8) prefers to go via /proc for portability. 366 */ 367 { 368 .procname = "gitrev", 369 .data = (char *)ZFS_META_GITREV, 370 .maxlen = sizeof (ZFS_META_GITREV), 371 .mode = 0444, 372 .proc_handler = &proc_dostring, 373 }, 374 { 375 .procname = "hostid", 376 .data = &spl_hostid, 377 .maxlen = sizeof (unsigned long), 378 .mode = 0644, 379 .proc_handler = &proc_dohostid, 380 }, 381 #ifdef HAVE_REGISTER_SYSCTL_TABLE 382 { 383 .procname = "kmem", 384 .mode = 0555, 385 .child = spl_kmem_table, 386 }, 387 { 388 .procname = "kstat", 389 .mode = 0555, 390 .child = spl_kstat_table, 391 }, 392 #endif 393 {}, 394 }; 395 396 #ifdef HAVE_REGISTER_SYSCTL_TABLE 397 static struct ctl_table spl_dir[] = { 398 { 399 .procname = "spl", 400 .mode = 0555, 401 .child = spl_table, 402 }, 403 {} 404 }; 405 406 static struct ctl_table spl_root[] = { 407 { 408 .procname = "kernel", 409 .mode = 0555, 410 .child = spl_dir, 411 }, 412 {} 413 }; 414 #endif 415 416 static void spl_proc_cleanup(void) 417 { 418 remove_proc_entry("kstat", proc_spl); 419 remove_proc_entry("slab", proc_spl_kmem); 420 remove_proc_entry("kmem", proc_spl); 421 remove_proc_entry("spl", NULL); 422 423 #ifndef HAVE_REGISTER_SYSCTL_TABLE 424 if (spl_kstat) { 425 unregister_sysctl_table(spl_kstat); 426 spl_kstat = NULL; 427 } 428 if (spl_kmem) { 429 unregister_sysctl_table(spl_kmem); 430 spl_kmem = NULL; 431 } 432 #endif 433 if (spl_header) { 434 unregister_sysctl_table(spl_header); 435 spl_header = NULL; 436 } 437 } 438 439 #ifndef HAVE_REGISTER_SYSCTL_TABLE 440 441 /* 442 * Traditionally, struct ctl_table arrays have been terminated by an "empty" 443 * sentinel element (specifically, one with .procname == NULL). 444 * 445 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so 446 * that callers could provide the size directly, and redefining 447 * register_sysctl() to just call register_sysctl_sz() with the array size. It 448 * retained support for the terminating element so that existing callers would 449 * continue to work. 450 * 451 * Linux 6.11 removed support for the terminating element, instead interpreting 452 * it as a real malformed element, and rejecting it. 453 * 454 * In order to continue support older kernels, we retain the terminating 455 * sentinel element for our sysctl tables, but instead detect availability of 456 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping 457 * the kernel from trying to process the terminator. For pre-6.6 kernels that 458 * don't have register_sysctl_sz(), we just use register_sysctl(), which can 459 * handle the terminating element as it always has. 460 */ 461 #ifdef HAVE_REGISTER_SYSCTL_SZ 462 #define spl_proc_register_sysctl(p, t) \ 463 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1) 464 #else 465 #define spl_proc_register_sysctl(p, t) \ 466 register_sysctl(p, t) 467 #endif 468 #endif 469 470 int 471 spl_proc_init(void) 472 { 473 int rc = 0; 474 475 #ifdef HAVE_REGISTER_SYSCTL_TABLE 476 spl_header = register_sysctl_table(spl_root); 477 if (spl_header == NULL) 478 return (-EUNATCH); 479 #else 480 spl_header = spl_proc_register_sysctl("kernel/spl", spl_table); 481 if (spl_header == NULL) 482 return (-EUNATCH); 483 484 spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table); 485 if (spl_kmem == NULL) { 486 rc = -EUNATCH; 487 goto out; 488 } 489 spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat", 490 spl_kstat_table); 491 if (spl_kstat == NULL) { 492 rc = -EUNATCH; 493 goto out; 494 } 495 #endif 496 497 proc_spl = proc_mkdir("spl", NULL); 498 if (proc_spl == NULL) { 499 rc = -EUNATCH; 500 goto out; 501 } 502 503 proc_spl_kmem = proc_mkdir("kmem", proc_spl); 504 if (proc_spl_kmem == NULL) { 505 rc = -EUNATCH; 506 goto out; 507 } 508 509 proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem, 510 &proc_slab_operations, NULL); 511 if (proc_spl_kmem_slab == NULL) { 512 rc = -EUNATCH; 513 goto out; 514 } 515 516 proc_spl_kstat = proc_mkdir("kstat", proc_spl); 517 if (proc_spl_kstat == NULL) { 518 rc = -EUNATCH; 519 goto out; 520 } 521 out: 522 if (rc) 523 spl_proc_cleanup(); 524 525 return (rc); 526 } 527 528 void 529 spl_proc_fini(void) 530 { 531 spl_proc_cleanup(); 532 } 533