1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/cpuset.h> 33 #include <sys/sysctl.h> 34 35 #include <vm/uma.h> 36 #include <vm/uma_int.h> 37 38 #include <err.h> 39 #include <errno.h> 40 #include <kvm.h> 41 #include <nlist.h> 42 #include <stddef.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include "memstat.h" 49 #include "memstat_internal.h" 50 51 static struct nlist namelist[] = { 52 #define X_UMA_KEGS 0 53 { .n_name = "_uma_kegs" }, 54 #define X_MP_MAXID 1 55 { .n_name = "_mp_maxid" }, 56 #define X_ALL_CPUS 2 57 { .n_name = "_all_cpus" }, 58 { .n_name = "" }, 59 }; 60 61 /* 62 * Extract uma(9) statistics from the running kernel, and store all memory 63 * type information in the passed list. For each type, check the list for an 64 * existing entry with the right name/allocator -- if present, update that 65 * entry. Otherwise, add a new entry. On error, the entire list will be 66 * cleared, as entries will be in an inconsistent state. 67 * 68 * To reduce the level of work for a list that starts empty, we keep around a 69 * hint as to whether it was empty when we began, so we can avoid searching 70 * the list for entries to update. Updates are O(n^2) due to searching for 71 * each entry before adding it. 72 */ 73 int 74 memstat_sysctl_uma(struct memory_type_list *list, int flags) 75 { 76 struct uma_stream_header *ushp; 77 struct uma_type_header *uthp; 78 struct uma_percpu_stat *upsp; 79 struct memory_type *mtp; 80 int count, hint_dontsearch, i, j, maxcpus, maxid; 81 char *buffer, *p; 82 size_t size; 83 84 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 85 86 /* 87 * Query the number of CPUs, number of malloc types so that we can 88 * guess an initial buffer size. We loop until we succeed or really 89 * fail. Note that the value of maxcpus we query using sysctl is not 90 * the version we use when processing the real data -- that is read 91 * from the header. 92 */ 93 retry: 94 size = sizeof(maxid); 95 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 96 if (errno == EACCES || errno == EPERM) 97 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 98 else 99 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 100 return (-1); 101 } 102 if (size != sizeof(maxid)) { 103 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 104 return (-1); 105 } 106 107 size = sizeof(count); 108 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 109 if (errno == EACCES || errno == EPERM) 110 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 111 else 112 list->mtl_error = MEMSTAT_ERROR_VERSION; 113 return (-1); 114 } 115 if (size != sizeof(count)) { 116 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 117 return (-1); 118 } 119 120 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 121 (maxid + 1)); 122 123 buffer = malloc(size); 124 if (buffer == NULL) { 125 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 126 return (-1); 127 } 128 129 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 130 /* 131 * XXXRW: ENOMEM is an ambiguous return, we should bound the 132 * number of loops, perhaps. 133 */ 134 if (errno == ENOMEM) { 135 free(buffer); 136 goto retry; 137 } 138 if (errno == EACCES || errno == EPERM) 139 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 140 else 141 list->mtl_error = MEMSTAT_ERROR_VERSION; 142 free(buffer); 143 return (-1); 144 } 145 146 if (size == 0) { 147 free(buffer); 148 return (0); 149 } 150 151 if (size < sizeof(*ushp)) { 152 list->mtl_error = MEMSTAT_ERROR_VERSION; 153 free(buffer); 154 return (-1); 155 } 156 p = buffer; 157 ushp = (struct uma_stream_header *)p; 158 p += sizeof(*ushp); 159 160 if (ushp->ush_version != UMA_STREAM_VERSION) { 161 list->mtl_error = MEMSTAT_ERROR_VERSION; 162 free(buffer); 163 return (-1); 164 } 165 166 /* 167 * For the remainder of this function, we are quite trusting about 168 * the layout of structures and sizes, since we've determined we have 169 * a matching version and acceptable CPU count. 170 */ 171 maxcpus = ushp->ush_maxcpus; 172 count = ushp->ush_count; 173 for (i = 0; i < count; i++) { 174 uthp = (struct uma_type_header *)p; 175 p += sizeof(*uthp); 176 177 if (hint_dontsearch == 0) { 178 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 179 uthp->uth_name); 180 } else 181 mtp = NULL; 182 if (mtp == NULL) 183 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 184 uthp->uth_name, maxid + 1); 185 if (mtp == NULL) { 186 _memstat_mtl_empty(list); 187 free(buffer); 188 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 189 return (-1); 190 } 191 192 /* 193 * Reset the statistics on a current node. 194 */ 195 _memstat_mt_reset_stats(mtp, maxid + 1); 196 197 mtp->mt_numallocs = uthp->uth_allocs; 198 mtp->mt_numfrees = uthp->uth_frees; 199 mtp->mt_failures = uthp->uth_fails; 200 mtp->mt_sleeps = uthp->uth_sleeps; 201 202 for (j = 0; j < maxcpus; j++) { 203 upsp = (struct uma_percpu_stat *)p; 204 p += sizeof(*upsp); 205 206 mtp->mt_percpu_cache[j].mtp_free = 207 upsp->ups_cache_free; 208 mtp->mt_free += upsp->ups_cache_free; 209 mtp->mt_numallocs += upsp->ups_allocs; 210 mtp->mt_numfrees += upsp->ups_frees; 211 } 212 213 mtp->mt_size = uthp->uth_size; 214 mtp->mt_rsize = uthp->uth_rsize; 215 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 216 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 217 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 218 mtp->mt_countlimit = uthp->uth_limit; 219 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 220 221 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 222 mtp->mt_zonefree = uthp->uth_zone_free; 223 224 /* 225 * UMA secondary zones share a keg with the primary zone. To 226 * avoid double-reporting of free items, report keg free 227 * items only in the primary zone. 228 */ 229 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 230 mtp->mt_kegfree = uthp->uth_keg_free; 231 mtp->mt_free += mtp->mt_kegfree; 232 } 233 mtp->mt_free += mtp->mt_zonefree; 234 } 235 236 free(buffer); 237 238 return (0); 239 } 240 241 static int 242 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 243 size_t offset) 244 { 245 ssize_t ret; 246 247 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 248 size); 249 if (ret < 0) 250 return (MEMSTAT_ERROR_KVM); 251 if ((size_t)ret != size) 252 return (MEMSTAT_ERROR_KVM_SHORTREAD); 253 return (0); 254 } 255 256 static int 257 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 258 { 259 ssize_t ret; 260 int i; 261 262 for (i = 0; i < buflen; i++) { 263 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 264 &(buffer[i]), sizeof(char)); 265 if (ret < 0) 266 return (MEMSTAT_ERROR_KVM); 267 if ((size_t)ret != sizeof(char)) 268 return (MEMSTAT_ERROR_KVM_SHORTREAD); 269 if (buffer[i] == '\0') 270 return (0); 271 } 272 /* Truncate. */ 273 buffer[i-1] = '\0'; 274 return (0); 275 } 276 277 static int 278 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 279 size_t offset) 280 { 281 ssize_t ret; 282 283 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 284 if (ret < 0) 285 return (MEMSTAT_ERROR_KVM); 286 if ((size_t)ret != size) 287 return (MEMSTAT_ERROR_KVM_SHORTREAD); 288 return (0); 289 } 290 291 /* 292 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 293 * UMA(9) statistics from a kernel core/memory file. 294 */ 295 int 296 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 297 { 298 LIST_HEAD(, uma_keg) uma_kegs; 299 struct memory_type *mtp; 300 struct uma_bucket *ubp, ub; 301 struct uma_cache *ucp, *ucp_array; 302 struct uma_zone *uzp, uz; 303 struct uma_keg *kzp, kz; 304 int hint_dontsearch, i, mp_maxid, ret; 305 char name[MEMTYPE_MAXNAME]; 306 cpuset_t all_cpus; 307 long cpusetsize; 308 kvm_t *kvm; 309 310 kvm = (kvm_t *)kvm_handle; 311 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 312 if (kvm_nlist(kvm, namelist) != 0) { 313 list->mtl_error = MEMSTAT_ERROR_KVM; 314 return (-1); 315 } 316 if (namelist[X_UMA_KEGS].n_type == 0 || 317 namelist[X_UMA_KEGS].n_value == 0) { 318 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 319 return (-1); 320 } 321 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 322 if (ret != 0) { 323 list->mtl_error = ret; 324 return (-1); 325 } 326 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 327 if (ret != 0) { 328 list->mtl_error = ret; 329 return (-1); 330 } 331 cpusetsize = sysconf(_SC_CPUSET_SIZE); 332 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 333 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 334 return (-1); 335 } 336 CPU_ZERO(&all_cpus); 337 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 338 if (ret != 0) { 339 list->mtl_error = ret; 340 return (-1); 341 } 342 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 343 if (ucp_array == NULL) { 344 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 345 return (-1); 346 } 347 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 348 LIST_NEXT(&kz, uk_link)) { 349 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 350 if (ret != 0) { 351 free(ucp_array); 352 _memstat_mtl_empty(list); 353 list->mtl_error = ret; 354 return (-1); 355 } 356 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 357 LIST_NEXT(&uz, uz_link)) { 358 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 359 if (ret != 0) { 360 free(ucp_array); 361 _memstat_mtl_empty(list); 362 list->mtl_error = ret; 363 return (-1); 364 } 365 ret = kread(kvm, uzp, ucp_array, 366 sizeof(struct uma_cache) * (mp_maxid + 1), 367 offsetof(struct uma_zone, uz_cpu[0])); 368 if (ret != 0) { 369 free(ucp_array); 370 _memstat_mtl_empty(list); 371 list->mtl_error = ret; 372 return (-1); 373 } 374 ret = kread_string(kvm, uz.uz_name, name, 375 MEMTYPE_MAXNAME); 376 if (ret != 0) { 377 free(ucp_array); 378 _memstat_mtl_empty(list); 379 list->mtl_error = ret; 380 return (-1); 381 } 382 if (hint_dontsearch == 0) { 383 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 384 name); 385 } else 386 mtp = NULL; 387 if (mtp == NULL) 388 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 389 name, mp_maxid + 1); 390 if (mtp == NULL) { 391 free(ucp_array); 392 _memstat_mtl_empty(list); 393 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 394 return (-1); 395 } 396 /* 397 * Reset the statistics on a current node. 398 */ 399 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 400 mtp->mt_numallocs = uz.uz_allocs; 401 mtp->mt_numfrees = uz.uz_frees; 402 mtp->mt_failures = uz.uz_fails; 403 mtp->mt_sleeps = uz.uz_sleeps; 404 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 405 goto skip_percpu; 406 for (i = 0; i < mp_maxid + 1; i++) { 407 if (!CPU_ISSET(i, &all_cpus)) 408 continue; 409 ucp = &ucp_array[i]; 410 mtp->mt_numallocs += ucp->uc_allocs; 411 mtp->mt_numfrees += ucp->uc_frees; 412 413 if (ucp->uc_allocbucket != NULL) { 414 ret = kread(kvm, ucp->uc_allocbucket, 415 &ub, sizeof(ub), 0); 416 if (ret != 0) { 417 free(ucp_array); 418 _memstat_mtl_empty(list); 419 list->mtl_error = ret; 420 return (-1); 421 } 422 mtp->mt_free += ub.ub_cnt; 423 } 424 if (ucp->uc_freebucket != NULL) { 425 ret = kread(kvm, ucp->uc_freebucket, 426 &ub, sizeof(ub), 0); 427 if (ret != 0) { 428 free(ucp_array); 429 _memstat_mtl_empty(list); 430 list->mtl_error = ret; 431 return (-1); 432 } 433 mtp->mt_free += ub.ub_cnt; 434 } 435 } 436 skip_percpu: 437 mtp->mt_size = kz.uk_size; 438 mtp->mt_rsize = kz.uk_rsize; 439 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 440 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 441 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 442 if (kz.uk_ppera > 1) 443 mtp->mt_countlimit = kz.uk_maxpages / 444 kz.uk_ipers; 445 else 446 mtp->mt_countlimit = kz.uk_maxpages * 447 kz.uk_ipers; 448 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 449 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 450 for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != 451 NULL; ubp = LIST_NEXT(&ub, ub_link)) { 452 ret = kread(kvm, ubp, &ub, sizeof(ub), 0); 453 mtp->mt_zonefree += ub.ub_cnt; 454 } 455 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 456 LIST_FIRST(&kz.uk_zones) != uzp)) { 457 mtp->mt_kegfree = kz.uk_free; 458 mtp->mt_free += mtp->mt_kegfree; 459 } 460 mtp->mt_free += mtp->mt_zonefree; 461 } 462 } 463 free(ucp_array); 464 return (0); 465 } 466