1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/cpuset.h> 33 #include <sys/sysctl.h> 34 35 #include <vm/uma.h> 36 #include <vm/uma_int.h> 37 38 #include <err.h> 39 #include <errno.h> 40 #include <kvm.h> 41 #include <nlist.h> 42 #include <stddef.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include "memstat.h" 49 #include "memstat_internal.h" 50 51 static struct nlist namelist[] = { 52 #define X_UMA_KEGS 0 53 { .n_name = "_uma_kegs" }, 54 #define X_MP_MAXID 1 55 { .n_name = "_mp_maxid" }, 56 #define X_ALL_CPUS 2 57 { .n_name = "_all_cpus" }, 58 #define X_VM_NDOMAINS 3 59 { .n_name = "_vm_ndomains" }, 60 { .n_name = "" }, 61 }; 62 63 /* 64 * Extract uma(9) statistics from the running kernel, and store all memory 65 * type information in the passed list. For each type, check the list for an 66 * existing entry with the right name/allocator -- if present, update that 67 * entry. Otherwise, add a new entry. On error, the entire list will be 68 * cleared, as entries will be in an inconsistent state. 69 * 70 * To reduce the level of work for a list that starts empty, we keep around a 71 * hint as to whether it was empty when we began, so we can avoid searching 72 * the list for entries to update. Updates are O(n^2) due to searching for 73 * each entry before adding it. 74 */ 75 int 76 memstat_sysctl_uma(struct memory_type_list *list, int flags) 77 { 78 struct uma_stream_header *ushp; 79 struct uma_type_header *uthp; 80 struct uma_percpu_stat *upsp; 81 struct memory_type *mtp; 82 int count, hint_dontsearch, i, j, maxcpus, maxid; 83 char *buffer, *p; 84 size_t size; 85 86 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 87 88 /* 89 * Query the number of CPUs, number of malloc types so that we can 90 * guess an initial buffer size. We loop until we succeed or really 91 * fail. Note that the value of maxcpus we query using sysctl is not 92 * the version we use when processing the real data -- that is read 93 * from the header. 94 */ 95 retry: 96 size = sizeof(maxid); 97 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 98 if (errno == EACCES || errno == EPERM) 99 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 100 else 101 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 102 return (-1); 103 } 104 if (size != sizeof(maxid)) { 105 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 106 return (-1); 107 } 108 109 size = sizeof(count); 110 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 111 if (errno == EACCES || errno == EPERM) 112 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 113 else 114 list->mtl_error = MEMSTAT_ERROR_VERSION; 115 return (-1); 116 } 117 if (size != sizeof(count)) { 118 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 119 return (-1); 120 } 121 122 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 123 (maxid + 1)); 124 125 buffer = malloc(size); 126 if (buffer == NULL) { 127 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 128 return (-1); 129 } 130 131 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 132 /* 133 * XXXRW: ENOMEM is an ambiguous return, we should bound the 134 * number of loops, perhaps. 135 */ 136 if (errno == ENOMEM) { 137 free(buffer); 138 goto retry; 139 } 140 if (errno == EACCES || errno == EPERM) 141 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 142 else 143 list->mtl_error = MEMSTAT_ERROR_VERSION; 144 free(buffer); 145 return (-1); 146 } 147 148 if (size == 0) { 149 free(buffer); 150 return (0); 151 } 152 153 if (size < sizeof(*ushp)) { 154 list->mtl_error = MEMSTAT_ERROR_VERSION; 155 free(buffer); 156 return (-1); 157 } 158 p = buffer; 159 ushp = (struct uma_stream_header *)p; 160 p += sizeof(*ushp); 161 162 if (ushp->ush_version != UMA_STREAM_VERSION) { 163 list->mtl_error = MEMSTAT_ERROR_VERSION; 164 free(buffer); 165 return (-1); 166 } 167 168 /* 169 * For the remainder of this function, we are quite trusting about 170 * the layout of structures and sizes, since we've determined we have 171 * a matching version and acceptable CPU count. 172 */ 173 maxcpus = ushp->ush_maxcpus; 174 count = ushp->ush_count; 175 for (i = 0; i < count; i++) { 176 uthp = (struct uma_type_header *)p; 177 p += sizeof(*uthp); 178 179 if (hint_dontsearch == 0) { 180 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 181 uthp->uth_name); 182 } else 183 mtp = NULL; 184 if (mtp == NULL) 185 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 186 uthp->uth_name, maxid + 1); 187 if (mtp == NULL) { 188 _memstat_mtl_empty(list); 189 free(buffer); 190 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 191 return (-1); 192 } 193 194 /* 195 * Reset the statistics on a current node. 196 */ 197 _memstat_mt_reset_stats(mtp, maxid + 1); 198 199 mtp->mt_numallocs = uthp->uth_allocs; 200 mtp->mt_numfrees = uthp->uth_frees; 201 mtp->mt_failures = uthp->uth_fails; 202 mtp->mt_sleeps = uthp->uth_sleeps; 203 204 for (j = 0; j < maxcpus; j++) { 205 upsp = (struct uma_percpu_stat *)p; 206 p += sizeof(*upsp); 207 208 mtp->mt_percpu_cache[j].mtp_free = 209 upsp->ups_cache_free; 210 mtp->mt_free += upsp->ups_cache_free; 211 mtp->mt_numallocs += upsp->ups_allocs; 212 mtp->mt_numfrees += upsp->ups_frees; 213 } 214 215 mtp->mt_size = uthp->uth_size; 216 mtp->mt_rsize = uthp->uth_rsize; 217 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 218 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 219 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 220 mtp->mt_countlimit = uthp->uth_limit; 221 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 222 223 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 224 mtp->mt_zonefree = uthp->uth_zone_free; 225 226 /* 227 * UMA secondary zones share a keg with the primary zone. To 228 * avoid double-reporting of free items, report keg free 229 * items only in the primary zone. 230 */ 231 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 232 mtp->mt_kegfree = uthp->uth_keg_free; 233 mtp->mt_free += mtp->mt_kegfree; 234 } 235 mtp->mt_free += mtp->mt_zonefree; 236 } 237 238 free(buffer); 239 240 return (0); 241 } 242 243 static int 244 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 245 size_t offset) 246 { 247 ssize_t ret; 248 249 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 250 size); 251 if (ret < 0) 252 return (MEMSTAT_ERROR_KVM); 253 if ((size_t)ret != size) 254 return (MEMSTAT_ERROR_KVM_SHORTREAD); 255 return (0); 256 } 257 258 static int 259 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 260 { 261 ssize_t ret; 262 int i; 263 264 for (i = 0; i < buflen; i++) { 265 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 266 &(buffer[i]), sizeof(char)); 267 if (ret < 0) 268 return (MEMSTAT_ERROR_KVM); 269 if ((size_t)ret != sizeof(char)) 270 return (MEMSTAT_ERROR_KVM_SHORTREAD); 271 if (buffer[i] == '\0') 272 return (0); 273 } 274 /* Truncate. */ 275 buffer[i-1] = '\0'; 276 return (0); 277 } 278 279 static int 280 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 281 size_t offset) 282 { 283 ssize_t ret; 284 285 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 286 if (ret < 0) 287 return (MEMSTAT_ERROR_KVM); 288 if ((size_t)ret != size) 289 return (MEMSTAT_ERROR_KVM_SHORTREAD); 290 return (0); 291 } 292 293 /* 294 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 295 * UMA(9) statistics from a kernel core/memory file. 296 */ 297 int 298 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 299 { 300 LIST_HEAD(, uma_keg) uma_kegs; 301 struct memory_type *mtp; 302 struct uma_zone_domain uzd; 303 struct uma_bucket *ubp, ub; 304 struct uma_cache *ucp, *ucp_array; 305 struct uma_zone *uzp, uz; 306 struct uma_keg *kzp, kz; 307 int hint_dontsearch, i, mp_maxid, ndomains, ret; 308 char name[MEMTYPE_MAXNAME]; 309 cpuset_t all_cpus; 310 long cpusetsize; 311 kvm_t *kvm; 312 313 kvm = (kvm_t *)kvm_handle; 314 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 315 if (kvm_nlist(kvm, namelist) != 0) { 316 list->mtl_error = MEMSTAT_ERROR_KVM; 317 return (-1); 318 } 319 if (namelist[X_UMA_KEGS].n_type == 0 || 320 namelist[X_UMA_KEGS].n_value == 0) { 321 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 322 return (-1); 323 } 324 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 325 if (ret != 0) { 326 list->mtl_error = ret; 327 return (-1); 328 } 329 ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains, 330 sizeof(ndomains), 0); 331 if (ret != 0) { 332 list->mtl_error = ret; 333 return (-1); 334 } 335 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 336 if (ret != 0) { 337 list->mtl_error = ret; 338 return (-1); 339 } 340 cpusetsize = sysconf(_SC_CPUSET_SIZE); 341 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 342 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 343 return (-1); 344 } 345 CPU_ZERO(&all_cpus); 346 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 347 if (ret != 0) { 348 list->mtl_error = ret; 349 return (-1); 350 } 351 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 352 if (ucp_array == NULL) { 353 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 354 return (-1); 355 } 356 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 357 LIST_NEXT(&kz, uk_link)) { 358 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 359 if (ret != 0) { 360 free(ucp_array); 361 _memstat_mtl_empty(list); 362 list->mtl_error = ret; 363 return (-1); 364 } 365 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 366 LIST_NEXT(&uz, uz_link)) { 367 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 368 if (ret != 0) { 369 free(ucp_array); 370 _memstat_mtl_empty(list); 371 list->mtl_error = ret; 372 return (-1); 373 } 374 ret = kread(kvm, uzp, ucp_array, 375 sizeof(struct uma_cache) * (mp_maxid + 1), 376 offsetof(struct uma_zone, uz_cpu[0])); 377 if (ret != 0) { 378 free(ucp_array); 379 _memstat_mtl_empty(list); 380 list->mtl_error = ret; 381 return (-1); 382 } 383 ret = kread_string(kvm, uz.uz_name, name, 384 MEMTYPE_MAXNAME); 385 if (ret != 0) { 386 free(ucp_array); 387 _memstat_mtl_empty(list); 388 list->mtl_error = ret; 389 return (-1); 390 } 391 if (hint_dontsearch == 0) { 392 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 393 name); 394 } else 395 mtp = NULL; 396 if (mtp == NULL) 397 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 398 name, mp_maxid + 1); 399 if (mtp == NULL) { 400 free(ucp_array); 401 _memstat_mtl_empty(list); 402 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 403 return (-1); 404 } 405 /* 406 * Reset the statistics on a current node. 407 */ 408 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 409 mtp->mt_numallocs = uz.uz_allocs; 410 mtp->mt_numfrees = uz.uz_frees; 411 mtp->mt_failures = uz.uz_fails; 412 mtp->mt_sleeps = uz.uz_sleeps; 413 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 414 goto skip_percpu; 415 for (i = 0; i < mp_maxid + 1; i++) { 416 if (!CPU_ISSET(i, &all_cpus)) 417 continue; 418 ucp = &ucp_array[i]; 419 mtp->mt_numallocs += ucp->uc_allocs; 420 mtp->mt_numfrees += ucp->uc_frees; 421 422 if (ucp->uc_allocbucket != NULL) { 423 ret = kread(kvm, ucp->uc_allocbucket, 424 &ub, sizeof(ub), 0); 425 if (ret != 0) { 426 free(ucp_array); 427 _memstat_mtl_empty(list); 428 list->mtl_error = ret; 429 return (-1); 430 } 431 mtp->mt_free += ub.ub_cnt; 432 } 433 if (ucp->uc_freebucket != NULL) { 434 ret = kread(kvm, ucp->uc_freebucket, 435 &ub, sizeof(ub), 0); 436 if (ret != 0) { 437 free(ucp_array); 438 _memstat_mtl_empty(list); 439 list->mtl_error = ret; 440 return (-1); 441 } 442 mtp->mt_free += ub.ub_cnt; 443 } 444 } 445 skip_percpu: 446 mtp->mt_size = kz.uk_size; 447 mtp->mt_rsize = kz.uk_rsize; 448 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 449 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 450 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 451 if (kz.uk_ppera > 1) 452 mtp->mt_countlimit = kz.uk_maxpages / 453 kz.uk_ipers; 454 else 455 mtp->mt_countlimit = kz.uk_maxpages * 456 kz.uk_ipers; 457 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 458 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 459 for (i = 0; i < ndomains; i++) { 460 ret = kread(kvm, &uz.uz_domain[i], &uzd, 461 sizeof(uzd), 0); 462 for (ubp = 463 LIST_FIRST(&uzd.uzd_buckets); 464 ubp != NULL; 465 ubp = LIST_NEXT(&ub, ub_link)) { 466 ret = kread(kvm, ubp, &ub, 467 sizeof(ub), 0); 468 mtp->mt_zonefree += ub.ub_cnt; 469 } 470 } 471 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 472 LIST_FIRST(&kz.uk_zones) != uzp)) { 473 mtp->mt_kegfree = kz.uk_free; 474 mtp->mt_free += mtp->mt_kegfree; 475 } 476 mtp->mt_free += mtp->mt_zonefree; 477 } 478 } 479 free(ucp_array); 480 return (0); 481 } 482