1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #define _WANT_FREEBSD_BITSET 32 33 #include <sys/param.h> 34 #include <sys/counter.h> 35 #include <sys/cpuset.h> 36 #include <sys/sysctl.h> 37 38 #include <vm/uma.h> 39 #include <vm/uma_int.h> 40 41 #include <err.h> 42 #include <errno.h> 43 #include <kvm.h> 44 #include <nlist.h> 45 #include <stddef.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 51 #include "memstat.h" 52 #include "memstat_internal.h" 53 54 static struct nlist namelist[] = { 55 #define X_UMA_KEGS 0 56 { .n_name = "_uma_kegs" }, 57 #define X_MP_MAXID 1 58 { .n_name = "_mp_maxid" }, 59 #define X_ALL_CPUS 2 60 { .n_name = "_all_cpus" }, 61 #define X_VM_NDOMAINS 3 62 { .n_name = "_vm_ndomains" }, 63 { .n_name = "" }, 64 }; 65 66 /* 67 * Extract uma(9) statistics from the running kernel, and store all memory 68 * type information in the passed list. For each type, check the list for an 69 * existing entry with the right name/allocator -- if present, update that 70 * entry. Otherwise, add a new entry. On error, the entire list will be 71 * cleared, as entries will be in an inconsistent state. 72 * 73 * To reduce the level of work for a list that starts empty, we keep around a 74 * hint as to whether it was empty when we began, so we can avoid searching 75 * the list for entries to update. Updates are O(n^2) due to searching for 76 * each entry before adding it. 77 */ 78 int 79 memstat_sysctl_uma(struct memory_type_list *list, int flags) 80 { 81 struct uma_stream_header *ushp; 82 struct uma_type_header *uthp; 83 struct uma_percpu_stat *upsp; 84 struct memory_type *mtp; 85 int count, hint_dontsearch, i, j, maxcpus, maxid; 86 char *buffer, *p; 87 size_t size; 88 89 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 90 91 /* 92 * Query the number of CPUs, number of malloc types so that we can 93 * guess an initial buffer size. We loop until we succeed or really 94 * fail. Note that the value of maxcpus we query using sysctl is not 95 * the version we use when processing the real data -- that is read 96 * from the header. 97 */ 98 retry: 99 size = sizeof(maxid); 100 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 101 if (errno == EACCES || errno == EPERM) 102 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 103 else 104 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 105 return (-1); 106 } 107 if (size != sizeof(maxid)) { 108 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 109 return (-1); 110 } 111 112 size = sizeof(count); 113 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 114 if (errno == EACCES || errno == EPERM) 115 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 116 else 117 list->mtl_error = MEMSTAT_ERROR_VERSION; 118 return (-1); 119 } 120 if (size != sizeof(count)) { 121 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 122 return (-1); 123 } 124 125 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 126 (maxid + 1)); 127 128 buffer = malloc(size); 129 if (buffer == NULL) { 130 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 131 return (-1); 132 } 133 134 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 135 /* 136 * XXXRW: ENOMEM is an ambiguous return, we should bound the 137 * number of loops, perhaps. 138 */ 139 if (errno == ENOMEM) { 140 free(buffer); 141 goto retry; 142 } 143 if (errno == EACCES || errno == EPERM) 144 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 145 else 146 list->mtl_error = MEMSTAT_ERROR_VERSION; 147 free(buffer); 148 return (-1); 149 } 150 151 if (size == 0) { 152 free(buffer); 153 return (0); 154 } 155 156 if (size < sizeof(*ushp)) { 157 list->mtl_error = MEMSTAT_ERROR_VERSION; 158 free(buffer); 159 return (-1); 160 } 161 p = buffer; 162 ushp = (struct uma_stream_header *)p; 163 p += sizeof(*ushp); 164 165 if (ushp->ush_version != UMA_STREAM_VERSION) { 166 list->mtl_error = MEMSTAT_ERROR_VERSION; 167 free(buffer); 168 return (-1); 169 } 170 171 /* 172 * For the remainder of this function, we are quite trusting about 173 * the layout of structures and sizes, since we've determined we have 174 * a matching version and acceptable CPU count. 175 */ 176 maxcpus = ushp->ush_maxcpus; 177 count = ushp->ush_count; 178 for (i = 0; i < count; i++) { 179 uthp = (struct uma_type_header *)p; 180 p += sizeof(*uthp); 181 182 if (hint_dontsearch == 0) { 183 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 184 uthp->uth_name); 185 } else 186 mtp = NULL; 187 if (mtp == NULL) 188 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 189 uthp->uth_name, maxid + 1); 190 if (mtp == NULL) { 191 _memstat_mtl_empty(list); 192 free(buffer); 193 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 194 return (-1); 195 } 196 197 /* 198 * Reset the statistics on a current node. 199 */ 200 _memstat_mt_reset_stats(mtp, maxid + 1); 201 202 mtp->mt_numallocs = uthp->uth_allocs; 203 mtp->mt_numfrees = uthp->uth_frees; 204 mtp->mt_failures = uthp->uth_fails; 205 mtp->mt_sleeps = uthp->uth_sleeps; 206 mtp->mt_xdomain = uthp->uth_xdomain; 207 208 for (j = 0; j < maxcpus; j++) { 209 upsp = (struct uma_percpu_stat *)p; 210 p += sizeof(*upsp); 211 212 mtp->mt_percpu_cache[j].mtp_free = 213 upsp->ups_cache_free; 214 mtp->mt_free += upsp->ups_cache_free; 215 mtp->mt_numallocs += upsp->ups_allocs; 216 mtp->mt_numfrees += upsp->ups_frees; 217 } 218 219 /* 220 * Values for uth_allocs and uth_frees frees are snap. 221 * It may happen that kernel reports that number of frees 222 * is greater than number of allocs. See counter(9) for 223 * details. 224 */ 225 if (mtp->mt_numallocs < mtp->mt_numfrees) 226 mtp->mt_numallocs = mtp->mt_numfrees; 227 228 mtp->mt_size = uthp->uth_size; 229 mtp->mt_rsize = uthp->uth_rsize; 230 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 231 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 232 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 233 mtp->mt_countlimit = uthp->uth_limit; 234 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 235 236 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 237 mtp->mt_zonefree = uthp->uth_zone_free; 238 239 /* 240 * UMA secondary zones share a keg with the primary zone. To 241 * avoid double-reporting of free items, report keg free 242 * items only in the primary zone. 243 */ 244 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 245 mtp->mt_kegfree = uthp->uth_keg_free; 246 mtp->mt_free += mtp->mt_kegfree; 247 } 248 mtp->mt_free += mtp->mt_zonefree; 249 } 250 251 free(buffer); 252 253 return (0); 254 } 255 256 static int 257 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 258 size_t offset) 259 { 260 ssize_t ret; 261 262 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 263 size); 264 if (ret < 0) 265 return (MEMSTAT_ERROR_KVM); 266 if ((size_t)ret != size) 267 return (MEMSTAT_ERROR_KVM_SHORTREAD); 268 return (0); 269 } 270 271 static int 272 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 273 { 274 ssize_t ret; 275 int i; 276 277 for (i = 0; i < buflen; i++) { 278 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 279 &(buffer[i]), sizeof(char)); 280 if (ret < 0) 281 return (MEMSTAT_ERROR_KVM); 282 if ((size_t)ret != sizeof(char)) 283 return (MEMSTAT_ERROR_KVM_SHORTREAD); 284 if (buffer[i] == '\0') 285 return (0); 286 } 287 /* Truncate. */ 288 buffer[i-1] = '\0'; 289 return (0); 290 } 291 292 static int 293 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 294 size_t offset) 295 { 296 ssize_t ret; 297 298 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 299 if (ret < 0) 300 return (MEMSTAT_ERROR_KVM); 301 if ((size_t)ret != size) 302 return (MEMSTAT_ERROR_KVM_SHORTREAD); 303 return (0); 304 } 305 306 /* 307 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 308 * UMA(9) statistics from a kernel core/memory file. 309 */ 310 int 311 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 312 { 313 LIST_HEAD(, uma_keg) uma_kegs; 314 struct memory_type *mtp; 315 struct uma_zone_domain uzd; 316 struct uma_domain ukd; 317 struct uma_bucket *ubp, ub; 318 struct uma_cache *ucp, *ucp_array; 319 struct uma_zone *uzp, uz; 320 struct uma_keg *kzp, kz; 321 uint64_t kegfree; 322 int hint_dontsearch, i, mp_maxid, ndomains, ret; 323 char name[MEMTYPE_MAXNAME]; 324 cpuset_t all_cpus; 325 long cpusetsize; 326 kvm_t *kvm; 327 328 kvm = (kvm_t *)kvm_handle; 329 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 330 if (kvm_nlist(kvm, namelist) != 0) { 331 list->mtl_error = MEMSTAT_ERROR_KVM; 332 return (-1); 333 } 334 if (namelist[X_UMA_KEGS].n_type == 0 || 335 namelist[X_UMA_KEGS].n_value == 0) { 336 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 337 return (-1); 338 } 339 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 340 if (ret != 0) { 341 list->mtl_error = ret; 342 return (-1); 343 } 344 ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains, 345 sizeof(ndomains), 0); 346 if (ret != 0) { 347 list->mtl_error = ret; 348 return (-1); 349 } 350 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 351 if (ret != 0) { 352 list->mtl_error = ret; 353 return (-1); 354 } 355 cpusetsize = sysconf(_SC_CPUSET_SIZE); 356 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 357 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 358 return (-1); 359 } 360 CPU_ZERO(&all_cpus); 361 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 362 if (ret != 0) { 363 list->mtl_error = ret; 364 return (-1); 365 } 366 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 367 if (ucp_array == NULL) { 368 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 369 return (-1); 370 } 371 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 372 LIST_NEXT(&kz, uk_link)) { 373 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 374 if (ret != 0) { 375 free(ucp_array); 376 _memstat_mtl_empty(list); 377 list->mtl_error = ret; 378 return (-1); 379 } 380 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 381 LIST_NEXT(&uz, uz_link)) { 382 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 383 if (ret != 0) { 384 free(ucp_array); 385 _memstat_mtl_empty(list); 386 list->mtl_error = ret; 387 return (-1); 388 } 389 ret = kread(kvm, uzp, ucp_array, 390 sizeof(struct uma_cache) * (mp_maxid + 1), 391 offsetof(struct uma_zone, uz_cpu[0])); 392 if (ret != 0) { 393 free(ucp_array); 394 _memstat_mtl_empty(list); 395 list->mtl_error = ret; 396 return (-1); 397 } 398 ret = kread_string(kvm, uz.uz_name, name, 399 MEMTYPE_MAXNAME); 400 if (ret != 0) { 401 free(ucp_array); 402 _memstat_mtl_empty(list); 403 list->mtl_error = ret; 404 return (-1); 405 } 406 if (hint_dontsearch == 0) { 407 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 408 name); 409 } else 410 mtp = NULL; 411 if (mtp == NULL) 412 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 413 name, mp_maxid + 1); 414 if (mtp == NULL) { 415 free(ucp_array); 416 _memstat_mtl_empty(list); 417 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 418 return (-1); 419 } 420 /* 421 * Reset the statistics on a current node. 422 */ 423 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 424 mtp->mt_numallocs = kvm_counter_u64_fetch(kvm, 425 (unsigned long )uz.uz_allocs); 426 mtp->mt_numfrees = kvm_counter_u64_fetch(kvm, 427 (unsigned long )uz.uz_frees); 428 mtp->mt_failures = kvm_counter_u64_fetch(kvm, 429 (unsigned long )uz.uz_fails); 430 mtp->mt_xdomain = kvm_counter_u64_fetch(kvm, 431 (unsigned long )uz.uz_xdomain); 432 mtp->mt_sleeps = uz.uz_sleeps; 433 /* See comment above in memstat_sysctl_uma(). */ 434 if (mtp->mt_numallocs < mtp->mt_numfrees) 435 mtp->mt_numallocs = mtp->mt_numfrees; 436 437 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 438 goto skip_percpu; 439 for (i = 0; i < mp_maxid + 1; i++) { 440 if (!CPU_ISSET(i, &all_cpus)) 441 continue; 442 ucp = &ucp_array[i]; 443 mtp->mt_numallocs += ucp->uc_allocs; 444 mtp->mt_numfrees += ucp->uc_frees; 445 446 mtp->mt_free += ucp->uc_allocbucket.ucb_cnt; 447 mtp->mt_free += ucp->uc_freebucket.ucb_cnt; 448 mtp->mt_free += ucp->uc_crossbucket.ucb_cnt; 449 } 450 skip_percpu: 451 mtp->mt_size = kz.uk_size; 452 mtp->mt_rsize = kz.uk_rsize; 453 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 454 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 455 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 456 mtp->mt_countlimit = uz.uz_max_items; 457 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 458 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 459 for (i = 0; i < ndomains; i++) { 460 ret = kread(kvm, ZDOM_GET(uzp, i), &uzd, 461 sizeof(uzd), 0); 462 if (ret != 0) 463 continue; 464 for (ubp = 465 STAILQ_FIRST(&uzd.uzd_buckets); 466 ubp != NULL; 467 ubp = STAILQ_NEXT(&ub, ub_link)) { 468 ret = kread(kvm, ubp, &ub, 469 sizeof(ub), 0); 470 if (ret != 0) 471 continue; 472 mtp->mt_zonefree += ub.ub_cnt; 473 } 474 } 475 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 476 LIST_FIRST(&kz.uk_zones) != uzp)) { 477 kegfree = 0; 478 for (i = 0; i < ndomains; i++) { 479 ret = kread(kvm, &kzp->uk_domain[i], 480 &ukd, sizeof(ukd), 0); 481 if (ret != 0) 482 kegfree += ukd.ud_free_items; 483 } 484 mtp->mt_kegfree = kegfree; 485 mtp->mt_free += mtp->mt_kegfree; 486 } 487 mtp->mt_free += mtp->mt_zonefree; 488 } 489 } 490 free(ucp_array); 491 return (0); 492 } 493