1 /* 2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 3 * Copyright (C) 2007 The Regents of the University of California. 4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 6 * UCRL-CODE-235197 7 * 8 * This file is part of the SPL, Solaris Porting Layer. 9 * 10 * The SPL is free software; you can redistribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; either version 2 of the License, or (at your 13 * option) any later version. 14 * 15 * The SPL is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 * for more details. 19 * 20 * You should have received a copy of the GNU General Public License along 21 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 22 * 23 * Solaris Porting Layer (SPL) Proc Implementation. 24 */ 25 /* 26 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> 27 */ 28 29 #include <sys/systeminfo.h> 30 #include <sys/kstat.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_cache.h> 33 #include <sys/vmem.h> 34 #include <sys/proc.h> 35 #include <linux/ctype.h> 36 #include <linux/kmod.h> 37 #include <linux/seq_file.h> 38 #include <linux/uaccess.h> 39 #include <linux/version.h> 40 #include "zfs_gitrev.h" 41 42 #if defined(CONSTIFY_PLUGIN) 43 typedef struct ctl_table __no_const spl_ctl_table; 44 #else 45 typedef struct ctl_table spl_ctl_table; 46 #endif 47 48 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST 49 #define CONST_CTL_TABLE const struct ctl_table 50 #else 51 #define CONST_CTL_TABLE struct ctl_table 52 #endif 53 54 static unsigned long table_min = 0; 55 static unsigned long table_max = ~0; 56 57 static struct ctl_table_header *spl_header = NULL; 58 #ifndef HAVE_REGISTER_SYSCTL_TABLE 59 static struct ctl_table_header *spl_kmem = NULL; 60 static struct ctl_table_header *spl_kstat = NULL; 61 #endif 62 static struct proc_dir_entry *proc_spl = NULL; 63 static struct proc_dir_entry *proc_spl_kmem = NULL; 64 static struct proc_dir_entry *proc_spl_kmem_slab = NULL; 65 struct proc_dir_entry *proc_spl_kstat = NULL; 66 67 #ifdef DEBUG_KMEM 68 static int 69 proc_domemused(CONST_CTL_TABLE *table, int write, 70 void __user *buffer, size_t *lenp, loff_t *ppos) 71 { 72 int rc = 0; 73 unsigned long val; 74 spl_ctl_table dummy = *table; 75 76 dummy.data = &val; 77 dummy.proc_handler = &proc_dointvec; 78 dummy.extra1 = &table_min; 79 dummy.extra2 = &table_max; 80 81 if (write) { 82 *ppos += *lenp; 83 } else { 84 #ifdef HAVE_ATOMIC64_T 85 val = atomic64_read((atomic64_t *)table->data); 86 #else 87 val = atomic_read((atomic_t *)table->data); 88 #endif /* HAVE_ATOMIC64_T */ 89 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 90 } 91 92 return (rc); 93 } 94 #endif /* DEBUG_KMEM */ 95 96 static int 97 proc_doslab(CONST_CTL_TABLE *table, int write, 98 void __user *buffer, size_t *lenp, loff_t *ppos) 99 { 100 int rc = 0; 101 unsigned long val = 0, mask; 102 spl_ctl_table dummy = *table; 103 spl_kmem_cache_t *skc = NULL; 104 105 dummy.data = &val; 106 dummy.proc_handler = &proc_dointvec; 107 dummy.extra1 = &table_min; 108 dummy.extra2 = &table_max; 109 110 if (write) { 111 *ppos += *lenp; 112 } else { 113 down_read(&spl_kmem_cache_sem); 114 mask = (unsigned long)table->data; 115 116 list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { 117 118 /* Only use slabs of the correct kmem/vmem type */ 119 if (!(skc->skc_flags & mask)) 120 continue; 121 122 /* Sum the specified field for selected slabs */ 123 switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) { 124 case KMC_TOTAL: 125 val += skc->skc_slab_size * skc->skc_slab_total; 126 break; 127 case KMC_ALLOC: 128 val += skc->skc_obj_size * skc->skc_obj_alloc; 129 break; 130 case KMC_MAX: 131 val += skc->skc_obj_size * skc->skc_obj_max; 132 break; 133 } 134 } 135 136 up_read(&spl_kmem_cache_sem); 137 rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos); 138 } 139 140 return (rc); 141 } 142 143 static int 144 proc_dohostid(CONST_CTL_TABLE *table, int write, 145 void __user *buffer, size_t *lenp, loff_t *ppos) 146 { 147 char *end, str[32]; 148 unsigned long hid; 149 spl_ctl_table dummy = *table; 150 151 dummy.data = str; 152 dummy.maxlen = sizeof (str) - 1; 153 154 if (!write) 155 snprintf(str, sizeof (str), "%lx", 156 (unsigned long) zone_get_hostid(NULL)); 157 158 /* always returns 0 */ 159 proc_dostring(&dummy, write, buffer, lenp, ppos); 160 161 if (write) { 162 /* 163 * We can't use proc_doulongvec_minmax() in the write 164 * case here because hostid, while a hex value, has no 165 * leading 0x, which confuses the helper function. 166 */ 167 168 hid = simple_strtoul(str, &end, 16); 169 if (str == end) 170 return (-EINVAL); 171 spl_hostid = hid; 172 } 173 174 return (0); 175 } 176 177 static void 178 slab_seq_show_headers(struct seq_file *f) 179 { 180 seq_printf(f, 181 "--------------------- cache ----------" 182 "--------------------------------------------- " 183 "----- slab ------ " 184 "---- object ----- " 185 "--- emergency ---\n"); 186 seq_printf(f, 187 "name " 188 " flags size alloc slabsize objsize " 189 "total alloc max " 190 "total alloc max " 191 "dlock alloc max\n"); 192 } 193 194 static int 195 slab_seq_show(struct seq_file *f, void *p) 196 { 197 spl_kmem_cache_t *skc = p; 198 199 ASSERT(skc->skc_magic == SKC_MAGIC); 200 201 if (skc->skc_flags & KMC_SLAB) { 202 /* 203 * This cache is backed by a generic Linux kmem cache which 204 * has its own accounting. For these caches we only track 205 * the number of active allocated objects that exist within 206 * the underlying Linux slabs. For the overall statistics of 207 * the underlying Linux cache please refer to /proc/slabinfo. 208 */ 209 spin_lock(&skc->skc_lock); 210 uint64_t objs_allocated = 211 percpu_counter_sum(&skc->skc_linux_alloc); 212 seq_printf(f, "%-36s ", skc->skc_name); 213 seq_printf(f, "0x%05lx %9s %9lu %8s %8u " 214 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", 215 (long unsigned)skc->skc_flags, 216 "-", 217 (long unsigned)(skc->skc_obj_size * objs_allocated), 218 "-", 219 (unsigned)skc->skc_obj_size, 220 "-", "-", "-", "-", 221 (long unsigned)objs_allocated, 222 "-", "-", "-", "-"); 223 spin_unlock(&skc->skc_lock); 224 return (0); 225 } 226 227 spin_lock(&skc->skc_lock); 228 seq_printf(f, "%-36s ", skc->skc_name); 229 seq_printf(f, "0x%05lx %9lu %9lu %8u %8u " 230 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n", 231 (long unsigned)skc->skc_flags, 232 (long unsigned)(skc->skc_slab_size * skc->skc_slab_total), 233 (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), 234 (unsigned)skc->skc_slab_size, 235 (unsigned)skc->skc_obj_size, 236 (long unsigned)skc->skc_slab_total, 237 (long unsigned)skc->skc_slab_alloc, 238 (long unsigned)skc->skc_slab_max, 239 (long unsigned)skc->skc_obj_total, 240 (long unsigned)skc->skc_obj_alloc, 241 (long unsigned)skc->skc_obj_max, 242 (long unsigned)skc->skc_obj_deadlock, 243 (long unsigned)skc->skc_obj_emergency, 244 (long unsigned)skc->skc_obj_emergency_max); 245 spin_unlock(&skc->skc_lock); 246 return (0); 247 } 248 249 static void * 250 slab_seq_start(struct seq_file *f, loff_t *pos) 251 { 252 struct list_head *p; 253 loff_t n = *pos; 254 255 down_read(&spl_kmem_cache_sem); 256 if (!n) 257 slab_seq_show_headers(f); 258 259 p = spl_kmem_cache_list.next; 260 while (n--) { 261 p = p->next; 262 if (p == &spl_kmem_cache_list) 263 return (NULL); 264 } 265 266 return (list_entry(p, spl_kmem_cache_t, skc_list)); 267 } 268 269 static void * 270 slab_seq_next(struct seq_file *f, void *p, loff_t *pos) 271 { 272 spl_kmem_cache_t *skc = p; 273 274 ++*pos; 275 return ((skc->skc_list.next == &spl_kmem_cache_list) ? 276 NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list)); 277 } 278 279 static void 280 slab_seq_stop(struct seq_file *f, void *v) 281 { 282 up_read(&spl_kmem_cache_sem); 283 } 284 285 static const struct seq_operations slab_seq_ops = { 286 .show = slab_seq_show, 287 .start = slab_seq_start, 288 .next = slab_seq_next, 289 .stop = slab_seq_stop, 290 }; 291 292 static int 293 proc_slab_open(struct inode *inode, struct file *filp) 294 { 295 return (seq_open(filp, &slab_seq_ops)); 296 } 297 298 static const kstat_proc_op_t proc_slab_operations = { 299 #ifdef HAVE_PROC_OPS_STRUCT 300 .proc_open = proc_slab_open, 301 .proc_read = seq_read, 302 .proc_lseek = seq_lseek, 303 .proc_release = seq_release, 304 #else 305 .open = proc_slab_open, 306 .read = seq_read, 307 .llseek = seq_lseek, 308 .release = seq_release, 309 #endif 310 }; 311 312 static struct ctl_table spl_kmem_table[] = { 313 #ifdef DEBUG_KMEM 314 { 315 .procname = "kmem_used", 316 .data = &kmem_alloc_used, 317 #ifdef HAVE_ATOMIC64_T 318 .maxlen = sizeof (atomic64_t), 319 #else 320 .maxlen = sizeof (atomic_t), 321 #endif /* HAVE_ATOMIC64_T */ 322 .mode = 0444, 323 .proc_handler = &proc_domemused, 324 }, 325 { 326 .procname = "kmem_max", 327 .data = &kmem_alloc_max, 328 .maxlen = sizeof (unsigned long), 329 .extra1 = &table_min, 330 .extra2 = &table_max, 331 .mode = 0444, 332 .proc_handler = &proc_doulongvec_minmax, 333 }, 334 #endif /* DEBUG_KMEM */ 335 { 336 .procname = "slab_kvmem_total", 337 .data = (void *)(KMC_KVMEM | KMC_TOTAL), 338 .maxlen = sizeof (unsigned long), 339 .extra1 = &table_min, 340 .extra2 = &table_max, 341 .mode = 0444, 342 .proc_handler = &proc_doslab, 343 }, 344 { 345 .procname = "slab_kvmem_alloc", 346 .data = (void *)(KMC_KVMEM | KMC_ALLOC), 347 .maxlen = sizeof (unsigned long), 348 .extra1 = &table_min, 349 .extra2 = &table_max, 350 .mode = 0444, 351 .proc_handler = &proc_doslab, 352 }, 353 { 354 .procname = "slab_kvmem_max", 355 .data = (void *)(KMC_KVMEM | KMC_MAX), 356 .maxlen = sizeof (unsigned long), 357 .extra1 = &table_min, 358 .extra2 = &table_max, 359 .mode = 0444, 360 .proc_handler = &proc_doslab, 361 }, 362 {}, 363 }; 364 365 static struct ctl_table spl_kstat_table[] = { 366 {}, 367 }; 368 369 static struct ctl_table spl_table[] = { 370 /* 371 * NB No .strategy entries have been provided since 372 * sysctl(8) prefers to go via /proc for portability. 373 */ 374 { 375 .procname = "gitrev", 376 .data = (char *)ZFS_META_GITREV, 377 .maxlen = sizeof (ZFS_META_GITREV), 378 .mode = 0444, 379 .proc_handler = &proc_dostring, 380 }, 381 { 382 .procname = "hostid", 383 .data = &spl_hostid, 384 .maxlen = sizeof (unsigned long), 385 .mode = 0644, 386 .proc_handler = &proc_dohostid, 387 }, 388 #ifdef HAVE_REGISTER_SYSCTL_TABLE 389 { 390 .procname = "kmem", 391 .mode = 0555, 392 .child = spl_kmem_table, 393 }, 394 { 395 .procname = "kstat", 396 .mode = 0555, 397 .child = spl_kstat_table, 398 }, 399 #endif 400 {}, 401 }; 402 403 #ifdef HAVE_REGISTER_SYSCTL_TABLE 404 static struct ctl_table spl_dir[] = { 405 { 406 .procname = "spl", 407 .mode = 0555, 408 .child = spl_table, 409 }, 410 {} 411 }; 412 413 static struct ctl_table spl_root[] = { 414 { 415 .procname = "kernel", 416 .mode = 0555, 417 .child = spl_dir, 418 }, 419 {} 420 }; 421 #endif 422 423 static void spl_proc_cleanup(void) 424 { 425 remove_proc_entry("kstat", proc_spl); 426 remove_proc_entry("slab", proc_spl_kmem); 427 remove_proc_entry("kmem", proc_spl); 428 remove_proc_entry("spl", NULL); 429 430 #ifndef HAVE_REGISTER_SYSCTL_TABLE 431 if (spl_kstat) { 432 unregister_sysctl_table(spl_kstat); 433 spl_kstat = NULL; 434 } 435 if (spl_kmem) { 436 unregister_sysctl_table(spl_kmem); 437 spl_kmem = NULL; 438 } 439 #endif 440 if (spl_header) { 441 unregister_sysctl_table(spl_header); 442 spl_header = NULL; 443 } 444 } 445 446 #ifndef HAVE_REGISTER_SYSCTL_TABLE 447 448 /* 449 * Traditionally, struct ctl_table arrays have been terminated by an "empty" 450 * sentinel element (specifically, one with .procname == NULL). 451 * 452 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so 453 * that callers could provide the size directly, and redefining 454 * register_sysctl() to just call register_sysctl_sz() with the array size. It 455 * retained support for the terminating element so that existing callers would 456 * continue to work. 457 * 458 * Linux 6.11 removed support for the terminating element, instead interpreting 459 * it as a real malformed element, and rejecting it. 460 * 461 * In order to continue support older kernels, we retain the terminating 462 * sentinel element for our sysctl tables, but instead detect availability of 463 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping 464 * the kernel from trying to process the terminator. For pre-6.6 kernels that 465 * don't have register_sysctl_sz(), we just use register_sysctl(), which can 466 * handle the terminating element as it always has. 467 */ 468 #ifdef HAVE_REGISTER_SYSCTL_SZ 469 #define spl_proc_register_sysctl(p, t) \ 470 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1) 471 #else 472 #define spl_proc_register_sysctl(p, t) \ 473 register_sysctl(p, t) 474 #endif 475 #endif 476 477 int 478 spl_proc_init(void) 479 { 480 int rc = 0; 481 482 #ifdef HAVE_REGISTER_SYSCTL_TABLE 483 spl_header = register_sysctl_table(spl_root); 484 if (spl_header == NULL) 485 return (-EUNATCH); 486 #else 487 spl_header = spl_proc_register_sysctl("kernel/spl", spl_table); 488 if (spl_header == NULL) 489 return (-EUNATCH); 490 491 spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table); 492 if (spl_kmem == NULL) { 493 rc = -EUNATCH; 494 goto out; 495 } 496 spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat", 497 spl_kstat_table); 498 if (spl_kstat == NULL) { 499 rc = -EUNATCH; 500 goto out; 501 } 502 #endif 503 504 proc_spl = proc_mkdir("spl", NULL); 505 if (proc_spl == NULL) { 506 rc = -EUNATCH; 507 goto out; 508 } 509 510 proc_spl_kmem = proc_mkdir("kmem", proc_spl); 511 if (proc_spl_kmem == NULL) { 512 rc = -EUNATCH; 513 goto out; 514 } 515 516 proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem, 517 &proc_slab_operations, NULL); 518 if (proc_spl_kmem_slab == NULL) { 519 rc = -EUNATCH; 520 goto out; 521 } 522 523 proc_spl_kstat = proc_mkdir("kstat", proc_spl); 524 if (proc_spl_kstat == NULL) { 525 rc = -EUNATCH; 526 goto out; 527 } 528 out: 529 if (rc) 530 spl_proc_cleanup(); 531 532 return (rc); 533 } 534 535 void 536 spl_proc_fini(void) 537 { 538 spl_proc_cleanup(); 539 } 540