1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/counter.h> 33 #include <sys/cpuset.h> 34 #include <sys/sysctl.h> 35 36 #include <vm/uma.h> 37 #include <vm/uma_int.h> 38 39 #include <err.h> 40 #include <errno.h> 41 #include <kvm.h> 42 #include <nlist.h> 43 #include <stddef.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "memstat.h" 50 #include "memstat_internal.h" 51 52 static struct nlist namelist[] = { 53 #define X_UMA_KEGS 0 54 { .n_name = "_uma_kegs" }, 55 #define X_MP_MAXID 1 56 { .n_name = "_mp_maxid" }, 57 #define X_ALL_CPUS 2 58 { .n_name = "_all_cpus" }, 59 #define X_VM_NDOMAINS 3 60 { .n_name = "_vm_ndomains" }, 61 { .n_name = "" }, 62 }; 63 64 /* 65 * Extract uma(9) statistics from the running kernel, and store all memory 66 * type information in the passed list. For each type, check the list for an 67 * existing entry with the right name/allocator -- if present, update that 68 * entry. Otherwise, add a new entry. On error, the entire list will be 69 * cleared, as entries will be in an inconsistent state. 70 * 71 * To reduce the level of work for a list that starts empty, we keep around a 72 * hint as to whether it was empty when we began, so we can avoid searching 73 * the list for entries to update. Updates are O(n^2) due to searching for 74 * each entry before adding it. 75 */ 76 int 77 memstat_sysctl_uma(struct memory_type_list *list, int flags) 78 { 79 struct uma_stream_header *ushp; 80 struct uma_type_header *uthp; 81 struct uma_percpu_stat *upsp; 82 struct memory_type *mtp; 83 int count, hint_dontsearch, i, j, maxcpus, maxid; 84 char *buffer, *p; 85 size_t size; 86 87 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 88 89 /* 90 * Query the number of CPUs, number of malloc types so that we can 91 * guess an initial buffer size. We loop until we succeed or really 92 * fail. Note that the value of maxcpus we query using sysctl is not 93 * the version we use when processing the real data -- that is read 94 * from the header. 95 */ 96 retry: 97 size = sizeof(maxid); 98 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 99 if (errno == EACCES || errno == EPERM) 100 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 101 else 102 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 103 return (-1); 104 } 105 if (size != sizeof(maxid)) { 106 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 107 return (-1); 108 } 109 110 size = sizeof(count); 111 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 112 if (errno == EACCES || errno == EPERM) 113 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 114 else 115 list->mtl_error = MEMSTAT_ERROR_VERSION; 116 return (-1); 117 } 118 if (size != sizeof(count)) { 119 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 120 return (-1); 121 } 122 123 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 124 (maxid + 1)); 125 126 buffer = malloc(size); 127 if (buffer == NULL) { 128 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 129 return (-1); 130 } 131 132 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 133 /* 134 * XXXRW: ENOMEM is an ambiguous return, we should bound the 135 * number of loops, perhaps. 136 */ 137 if (errno == ENOMEM) { 138 free(buffer); 139 goto retry; 140 } 141 if (errno == EACCES || errno == EPERM) 142 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 143 else 144 list->mtl_error = MEMSTAT_ERROR_VERSION; 145 free(buffer); 146 return (-1); 147 } 148 149 if (size == 0) { 150 free(buffer); 151 return (0); 152 } 153 154 if (size < sizeof(*ushp)) { 155 list->mtl_error = MEMSTAT_ERROR_VERSION; 156 free(buffer); 157 return (-1); 158 } 159 p = buffer; 160 ushp = (struct uma_stream_header *)p; 161 p += sizeof(*ushp); 162 163 if (ushp->ush_version != UMA_STREAM_VERSION) { 164 list->mtl_error = MEMSTAT_ERROR_VERSION; 165 free(buffer); 166 return (-1); 167 } 168 169 /* 170 * For the remainder of this function, we are quite trusting about 171 * the layout of structures and sizes, since we've determined we have 172 * a matching version and acceptable CPU count. 173 */ 174 maxcpus = ushp->ush_maxcpus; 175 count = ushp->ush_count; 176 for (i = 0; i < count; i++) { 177 uthp = (struct uma_type_header *)p; 178 p += sizeof(*uthp); 179 180 if (hint_dontsearch == 0) { 181 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 182 uthp->uth_name); 183 } else 184 mtp = NULL; 185 if (mtp == NULL) 186 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 187 uthp->uth_name, maxid + 1); 188 if (mtp == NULL) { 189 _memstat_mtl_empty(list); 190 free(buffer); 191 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 192 return (-1); 193 } 194 195 /* 196 * Reset the statistics on a current node. 197 */ 198 _memstat_mt_reset_stats(mtp, maxid + 1); 199 200 mtp->mt_numallocs = uthp->uth_allocs; 201 mtp->mt_numfrees = uthp->uth_frees; 202 mtp->mt_failures = uthp->uth_fails; 203 mtp->mt_sleeps = uthp->uth_sleeps; 204 mtp->mt_xdomain = uthp->uth_xdomain; 205 206 for (j = 0; j < maxcpus; j++) { 207 upsp = (struct uma_percpu_stat *)p; 208 p += sizeof(*upsp); 209 210 mtp->mt_percpu_cache[j].mtp_free = 211 upsp->ups_cache_free; 212 mtp->mt_free += upsp->ups_cache_free; 213 mtp->mt_numallocs += upsp->ups_allocs; 214 mtp->mt_numfrees += upsp->ups_frees; 215 } 216 217 /* 218 * Values for uth_allocs and uth_frees frees are snap. 219 * It may happen that kernel reports that number of frees 220 * is greater than number of allocs. See counter(9) for 221 * details. 222 */ 223 if (mtp->mt_numallocs < mtp->mt_numfrees) 224 mtp->mt_numallocs = mtp->mt_numfrees; 225 226 mtp->mt_size = uthp->uth_size; 227 mtp->mt_rsize = uthp->uth_rsize; 228 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 229 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 230 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 231 mtp->mt_countlimit = uthp->uth_limit; 232 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 233 234 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 235 mtp->mt_zonefree = uthp->uth_zone_free; 236 237 /* 238 * UMA secondary zones share a keg with the primary zone. To 239 * avoid double-reporting of free items, report keg free 240 * items only in the primary zone. 241 */ 242 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 243 mtp->mt_kegfree = uthp->uth_keg_free; 244 mtp->mt_free += mtp->mt_kegfree; 245 } 246 mtp->mt_free += mtp->mt_zonefree; 247 } 248 249 free(buffer); 250 251 return (0); 252 } 253 254 static int 255 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 256 size_t offset) 257 { 258 ssize_t ret; 259 260 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 261 size); 262 if (ret < 0) 263 return (MEMSTAT_ERROR_KVM); 264 if ((size_t)ret != size) 265 return (MEMSTAT_ERROR_KVM_SHORTREAD); 266 return (0); 267 } 268 269 static int 270 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 271 { 272 ssize_t ret; 273 int i; 274 275 for (i = 0; i < buflen; i++) { 276 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 277 &(buffer[i]), sizeof(char)); 278 if (ret < 0) 279 return (MEMSTAT_ERROR_KVM); 280 if ((size_t)ret != sizeof(char)) 281 return (MEMSTAT_ERROR_KVM_SHORTREAD); 282 if (buffer[i] == '\0') 283 return (0); 284 } 285 /* Truncate. */ 286 buffer[i-1] = '\0'; 287 return (0); 288 } 289 290 static int 291 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 292 size_t offset) 293 { 294 ssize_t ret; 295 296 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 297 if (ret < 0) 298 return (MEMSTAT_ERROR_KVM); 299 if ((size_t)ret != size) 300 return (MEMSTAT_ERROR_KVM_SHORTREAD); 301 return (0); 302 } 303 304 /* 305 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 306 * UMA(9) statistics from a kernel core/memory file. 307 */ 308 int 309 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 310 { 311 LIST_HEAD(, uma_keg) uma_kegs; 312 struct memory_type *mtp; 313 struct uma_zone_domain uzd; 314 struct uma_bucket *ubp, ub; 315 struct uma_cache *ucp, *ucp_array; 316 struct uma_zone *uzp, uz; 317 struct uma_keg *kzp, kz; 318 int hint_dontsearch, i, mp_maxid, ndomains, ret; 319 char name[MEMTYPE_MAXNAME]; 320 cpuset_t all_cpus; 321 long cpusetsize; 322 kvm_t *kvm; 323 324 kvm = (kvm_t *)kvm_handle; 325 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 326 if (kvm_nlist(kvm, namelist) != 0) { 327 list->mtl_error = MEMSTAT_ERROR_KVM; 328 return (-1); 329 } 330 if (namelist[X_UMA_KEGS].n_type == 0 || 331 namelist[X_UMA_KEGS].n_value == 0) { 332 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 333 return (-1); 334 } 335 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 336 if (ret != 0) { 337 list->mtl_error = ret; 338 return (-1); 339 } 340 ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains, 341 sizeof(ndomains), 0); 342 if (ret != 0) { 343 list->mtl_error = ret; 344 return (-1); 345 } 346 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 347 if (ret != 0) { 348 list->mtl_error = ret; 349 return (-1); 350 } 351 cpusetsize = sysconf(_SC_CPUSET_SIZE); 352 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 353 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 354 return (-1); 355 } 356 CPU_ZERO(&all_cpus); 357 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 358 if (ret != 0) { 359 list->mtl_error = ret; 360 return (-1); 361 } 362 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 363 if (ucp_array == NULL) { 364 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 365 return (-1); 366 } 367 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 368 LIST_NEXT(&kz, uk_link)) { 369 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 370 if (ret != 0) { 371 free(ucp_array); 372 _memstat_mtl_empty(list); 373 list->mtl_error = ret; 374 return (-1); 375 } 376 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 377 LIST_NEXT(&uz, uz_link)) { 378 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 379 if (ret != 0) { 380 free(ucp_array); 381 _memstat_mtl_empty(list); 382 list->mtl_error = ret; 383 return (-1); 384 } 385 ret = kread(kvm, uzp, ucp_array, 386 sizeof(struct uma_cache) * (mp_maxid + 1), 387 offsetof(struct uma_zone, uz_cpu[0])); 388 if (ret != 0) { 389 free(ucp_array); 390 _memstat_mtl_empty(list); 391 list->mtl_error = ret; 392 return (-1); 393 } 394 ret = kread_string(kvm, uz.uz_name, name, 395 MEMTYPE_MAXNAME); 396 if (ret != 0) { 397 free(ucp_array); 398 _memstat_mtl_empty(list); 399 list->mtl_error = ret; 400 return (-1); 401 } 402 if (hint_dontsearch == 0) { 403 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 404 name); 405 } else 406 mtp = NULL; 407 if (mtp == NULL) 408 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 409 name, mp_maxid + 1); 410 if (mtp == NULL) { 411 free(ucp_array); 412 _memstat_mtl_empty(list); 413 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 414 return (-1); 415 } 416 /* 417 * Reset the statistics on a current node. 418 */ 419 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 420 mtp->mt_numallocs = kvm_counter_u64_fetch(kvm, 421 (unsigned long )uz.uz_allocs); 422 mtp->mt_numfrees = kvm_counter_u64_fetch(kvm, 423 (unsigned long )uz.uz_frees); 424 mtp->mt_failures = kvm_counter_u64_fetch(kvm, 425 (unsigned long )uz.uz_fails); 426 mtp->mt_sleeps = uz.uz_sleeps; 427 /* See comment above in memstat_sysctl_uma(). */ 428 if (mtp->mt_numallocs < mtp->mt_numfrees) 429 mtp->mt_numallocs = mtp->mt_numfrees; 430 431 mtp->mt_xdomain = uz.uz_xdomain; 432 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 433 goto skip_percpu; 434 for (i = 0; i < mp_maxid + 1; i++) { 435 if (!CPU_ISSET(i, &all_cpus)) 436 continue; 437 ucp = &ucp_array[i]; 438 mtp->mt_numallocs += ucp->uc_allocs; 439 mtp->mt_numfrees += ucp->uc_frees; 440 441 if (ucp->uc_allocbucket != NULL) { 442 ret = kread(kvm, ucp->uc_allocbucket, 443 &ub, sizeof(ub), 0); 444 if (ret != 0) { 445 free(ucp_array); 446 _memstat_mtl_empty(list); 447 list->mtl_error = ret; 448 return (-1); 449 } 450 mtp->mt_free += ub.ub_cnt; 451 } 452 if (ucp->uc_freebucket != NULL) { 453 ret = kread(kvm, ucp->uc_freebucket, 454 &ub, sizeof(ub), 0); 455 if (ret != 0) { 456 free(ucp_array); 457 _memstat_mtl_empty(list); 458 list->mtl_error = ret; 459 return (-1); 460 } 461 mtp->mt_free += ub.ub_cnt; 462 } 463 } 464 skip_percpu: 465 mtp->mt_size = kz.uk_size; 466 mtp->mt_rsize = kz.uk_rsize; 467 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 468 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 469 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 470 mtp->mt_countlimit = uz.uz_max_items; 471 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 472 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 473 for (i = 0; i < ndomains; i++) { 474 ret = kread(kvm, &uz.uz_domain[i], &uzd, 475 sizeof(uzd), 0); 476 for (ubp = 477 TAILQ_FIRST(&uzd.uzd_buckets); 478 ubp != NULL; 479 ubp = TAILQ_NEXT(&ub, ub_link)) { 480 ret = kread(kvm, ubp, &ub, 481 sizeof(ub), 0); 482 mtp->mt_zonefree += ub.ub_cnt; 483 } 484 } 485 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 486 LIST_FIRST(&kz.uk_zones) != uzp)) { 487 mtp->mt_kegfree = kz.uk_free; 488 mtp->mt_free += mtp->mt_kegfree; 489 } 490 mtp->mt_free += mtp->mt_zonefree; 491 } 492 } 493 free(ucp_array); 494 return (0); 495 } 496