1 /*- 2 * Copyright (c) 2005-2006 Robert N. M. Watson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/cpuset.h> 31 #include <sys/sysctl.h> 32 33 #include <vm/uma.h> 34 #include <vm/uma_int.h> 35 36 #include <err.h> 37 #include <errno.h> 38 #include <kvm.h> 39 #include <nlist.h> 40 #include <stddef.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 46 #include "memstat.h" 47 #include "memstat_internal.h" 48 49 static struct nlist namelist[] = { 50 #define X_UMA_KEGS 0 51 { .n_name = "_uma_kegs" }, 52 #define X_MP_MAXID 1 53 { .n_name = "_mp_maxid" }, 54 #define X_ALL_CPUS 2 55 { .n_name = "_all_cpus" }, 56 { .n_name = "" }, 57 }; 58 59 /* 60 * Extract uma(9) statistics from the running kernel, and store all memory 61 * type information in the passed list. For each type, check the list for an 62 * existing entry with the right name/allocator -- if present, update that 63 * entry. Otherwise, add a new entry. On error, the entire list will be 64 * cleared, as entries will be in an inconsistent state. 65 * 66 * To reduce the level of work for a list that starts empty, we keep around a 67 * hint as to whether it was empty when we began, so we can avoid searching 68 * the list for entries to update. Updates are O(n^2) due to searching for 69 * each entry before adding it. 70 */ 71 int 72 memstat_sysctl_uma(struct memory_type_list *list, int flags) 73 { 74 struct uma_stream_header *ushp; 75 struct uma_type_header *uthp; 76 struct uma_percpu_stat *upsp; 77 struct memory_type *mtp; 78 int count, hint_dontsearch, i, j, maxcpus, maxid; 79 char *buffer, *p; 80 size_t size; 81 82 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 83 84 /* 85 * Query the number of CPUs, number of malloc types so that we can 86 * guess an initial buffer size. We loop until we succeed or really 87 * fail. Note that the value of maxcpus we query using sysctl is not 88 * the version we use when processing the real data -- that is read 89 * from the header. 90 */ 91 retry: 92 size = sizeof(maxid); 93 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 94 if (errno == EACCES || errno == EPERM) 95 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 96 else 97 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 98 return (-1); 99 } 100 if (size != sizeof(maxid)) { 101 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 102 return (-1); 103 } 104 105 size = sizeof(count); 106 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 107 if (errno == EACCES || errno == EPERM) 108 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 109 else 110 list->mtl_error = MEMSTAT_ERROR_VERSION; 111 return (-1); 112 } 113 if (size != sizeof(count)) { 114 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 115 return (-1); 116 } 117 118 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 119 (maxid + 1)); 120 121 buffer = malloc(size); 122 if (buffer == NULL) { 123 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 124 return (-1); 125 } 126 127 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 128 /* 129 * XXXRW: ENOMEM is an ambiguous return, we should bound the 130 * number of loops, perhaps. 131 */ 132 if (errno == ENOMEM) { 133 free(buffer); 134 goto retry; 135 } 136 if (errno == EACCES || errno == EPERM) 137 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 138 else 139 list->mtl_error = MEMSTAT_ERROR_VERSION; 140 free(buffer); 141 return (-1); 142 } 143 144 if (size == 0) { 145 free(buffer); 146 return (0); 147 } 148 149 if (size < sizeof(*ushp)) { 150 list->mtl_error = MEMSTAT_ERROR_VERSION; 151 free(buffer); 152 return (-1); 153 } 154 p = buffer; 155 ushp = (struct uma_stream_header *)p; 156 p += sizeof(*ushp); 157 158 if (ushp->ush_version != UMA_STREAM_VERSION) { 159 list->mtl_error = MEMSTAT_ERROR_VERSION; 160 free(buffer); 161 return (-1); 162 } 163 164 /* 165 * For the remainder of this function, we are quite trusting about 166 * the layout of structures and sizes, since we've determined we have 167 * a matching version and acceptable CPU count. 168 */ 169 maxcpus = ushp->ush_maxcpus; 170 count = ushp->ush_count; 171 for (i = 0; i < count; i++) { 172 uthp = (struct uma_type_header *)p; 173 p += sizeof(*uthp); 174 175 if (hint_dontsearch == 0) { 176 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 177 uthp->uth_name); 178 } else 179 mtp = NULL; 180 if (mtp == NULL) 181 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 182 uthp->uth_name, maxid + 1); 183 if (mtp == NULL) { 184 _memstat_mtl_empty(list); 185 free(buffer); 186 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 187 return (-1); 188 } 189 190 /* 191 * Reset the statistics on a current node. 192 */ 193 _memstat_mt_reset_stats(mtp, maxid + 1); 194 195 mtp->mt_numallocs = uthp->uth_allocs; 196 mtp->mt_numfrees = uthp->uth_frees; 197 mtp->mt_failures = uthp->uth_fails; 198 mtp->mt_sleeps = uthp->uth_sleeps; 199 200 for (j = 0; j < maxcpus; j++) { 201 upsp = (struct uma_percpu_stat *)p; 202 p += sizeof(*upsp); 203 204 mtp->mt_percpu_cache[j].mtp_free = 205 upsp->ups_cache_free; 206 mtp->mt_free += upsp->ups_cache_free; 207 mtp->mt_numallocs += upsp->ups_allocs; 208 mtp->mt_numfrees += upsp->ups_frees; 209 } 210 211 mtp->mt_size = uthp->uth_size; 212 mtp->mt_rsize = uthp->uth_rsize; 213 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 214 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 215 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 216 mtp->mt_countlimit = uthp->uth_limit; 217 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 218 219 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 220 mtp->mt_zonefree = uthp->uth_zone_free; 221 222 /* 223 * UMA secondary zones share a keg with the primary zone. To 224 * avoid double-reporting of free items, report keg free 225 * items only in the primary zone. 226 */ 227 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 228 mtp->mt_kegfree = uthp->uth_keg_free; 229 mtp->mt_free += mtp->mt_kegfree; 230 } 231 mtp->mt_free += mtp->mt_zonefree; 232 } 233 234 free(buffer); 235 236 return (0); 237 } 238 239 static int 240 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 241 size_t offset) 242 { 243 ssize_t ret; 244 245 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 246 size); 247 if (ret < 0) 248 return (MEMSTAT_ERROR_KVM); 249 if ((size_t)ret != size) 250 return (MEMSTAT_ERROR_KVM_SHORTREAD); 251 return (0); 252 } 253 254 static int 255 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 256 { 257 ssize_t ret; 258 int i; 259 260 for (i = 0; i < buflen; i++) { 261 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 262 &(buffer[i]), sizeof(char)); 263 if (ret < 0) 264 return (MEMSTAT_ERROR_KVM); 265 if ((size_t)ret != sizeof(char)) 266 return (MEMSTAT_ERROR_KVM_SHORTREAD); 267 if (buffer[i] == '\0') 268 return (0); 269 } 270 /* Truncate. */ 271 buffer[i-1] = '\0'; 272 return (0); 273 } 274 275 static int 276 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 277 size_t offset) 278 { 279 ssize_t ret; 280 281 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 282 if (ret < 0) 283 return (MEMSTAT_ERROR_KVM); 284 if ((size_t)ret != size) 285 return (MEMSTAT_ERROR_KVM_SHORTREAD); 286 return (0); 287 } 288 289 /* 290 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 291 * UMA(9) statistics from a kernel core/memory file. 292 */ 293 int 294 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 295 { 296 LIST_HEAD(, uma_keg) uma_kegs; 297 struct memory_type *mtp; 298 struct uma_bucket *ubp, ub; 299 struct uma_cache *ucp, *ucp_array; 300 struct uma_zone *uzp, uz; 301 struct uma_keg *kzp, kz; 302 int hint_dontsearch, i, mp_maxid, ret; 303 char name[MEMTYPE_MAXNAME]; 304 cpuset_t all_cpus; 305 long cpusetsize; 306 kvm_t *kvm; 307 308 kvm = (kvm_t *)kvm_handle; 309 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 310 if (kvm_nlist(kvm, namelist) != 0) { 311 list->mtl_error = MEMSTAT_ERROR_KVM; 312 return (-1); 313 } 314 if (namelist[X_UMA_KEGS].n_type == 0 || 315 namelist[X_UMA_KEGS].n_value == 0) { 316 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 317 return (-1); 318 } 319 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 320 if (ret != 0) { 321 list->mtl_error = ret; 322 return (-1); 323 } 324 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 325 if (ret != 0) { 326 list->mtl_error = ret; 327 return (-1); 328 } 329 cpusetsize = sysconf(_SC_CPUSET_SIZE); 330 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 331 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 332 return (-1); 333 } 334 CPU_ZERO(&all_cpus); 335 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 336 if (ret != 0) { 337 list->mtl_error = ret; 338 return (-1); 339 } 340 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 341 if (ucp_array == NULL) { 342 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 343 return (-1); 344 } 345 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 346 LIST_NEXT(&kz, uk_link)) { 347 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 348 if (ret != 0) { 349 free(ucp_array); 350 _memstat_mtl_empty(list); 351 list->mtl_error = ret; 352 return (-1); 353 } 354 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 355 LIST_NEXT(&uz, uz_link)) { 356 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 357 if (ret != 0) { 358 free(ucp_array); 359 _memstat_mtl_empty(list); 360 list->mtl_error = ret; 361 return (-1); 362 } 363 ret = kread(kvm, uzp, ucp_array, 364 sizeof(struct uma_cache) * (mp_maxid + 1), 365 offsetof(struct uma_zone, uz_cpu[0])); 366 if (ret != 0) { 367 free(ucp_array); 368 _memstat_mtl_empty(list); 369 list->mtl_error = ret; 370 return (-1); 371 } 372 ret = kread_string(kvm, uz.uz_name, name, 373 MEMTYPE_MAXNAME); 374 if (ret != 0) { 375 free(ucp_array); 376 _memstat_mtl_empty(list); 377 list->mtl_error = ret; 378 return (-1); 379 } 380 if (hint_dontsearch == 0) { 381 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 382 name); 383 } else 384 mtp = NULL; 385 if (mtp == NULL) 386 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 387 name, mp_maxid + 1); 388 if (mtp == NULL) { 389 free(ucp_array); 390 _memstat_mtl_empty(list); 391 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 392 return (-1); 393 } 394 /* 395 * Reset the statistics on a current node. 396 */ 397 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 398 mtp->mt_numallocs = uz.uz_allocs; 399 mtp->mt_numfrees = uz.uz_frees; 400 mtp->mt_failures = uz.uz_fails; 401 mtp->mt_sleeps = uz.uz_sleeps; 402 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 403 goto skip_percpu; 404 for (i = 0; i < mp_maxid + 1; i++) { 405 if (!CPU_ISSET(i, &all_cpus)) 406 continue; 407 ucp = &ucp_array[i]; 408 mtp->mt_numallocs += ucp->uc_allocs; 409 mtp->mt_numfrees += ucp->uc_frees; 410 411 if (ucp->uc_allocbucket != NULL) { 412 ret = kread(kvm, ucp->uc_allocbucket, 413 &ub, sizeof(ub), 0); 414 if (ret != 0) { 415 free(ucp_array); 416 _memstat_mtl_empty(list); 417 list->mtl_error = ret; 418 return (-1); 419 } 420 mtp->mt_free += ub.ub_cnt; 421 } 422 if (ucp->uc_freebucket != NULL) { 423 ret = kread(kvm, ucp->uc_freebucket, 424 &ub, sizeof(ub), 0); 425 if (ret != 0) { 426 free(ucp_array); 427 _memstat_mtl_empty(list); 428 list->mtl_error = ret; 429 return (-1); 430 } 431 mtp->mt_free += ub.ub_cnt; 432 } 433 } 434 skip_percpu: 435 mtp->mt_size = kz.uk_size; 436 mtp->mt_rsize = kz.uk_rsize; 437 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 438 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 439 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 440 if (kz.uk_ppera > 1) 441 mtp->mt_countlimit = kz.uk_maxpages / 442 kz.uk_ipers; 443 else 444 mtp->mt_countlimit = kz.uk_maxpages * 445 kz.uk_ipers; 446 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 447 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 448 for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != 449 NULL; ubp = LIST_NEXT(&ub, ub_link)) { 450 ret = kread(kvm, ubp, &ub, sizeof(ub), 0); 451 mtp->mt_zonefree += ub.ub_cnt; 452 } 453 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 454 LIST_FIRST(&kz.uk_zones) != uzp)) { 455 mtp->mt_kegfree = kz.uk_free; 456 mtp->mt_free += mtp->mt_kegfree; 457 } 458 mtp->mt_free += mtp->mt_zonefree; 459 } 460 } 461 free(ucp_array); 462 return (0); 463 } 464