1 /*- 2 * Copyright (c) 2005-2006 Robert N. M. Watson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/cpuset.h> 31 #include <sys/sysctl.h> 32 33 #include <vm/vm.h> 34 #include <vm/vm_page.h> 35 36 #include <vm/uma.h> 37 #include <vm/uma_int.h> 38 39 #include <err.h> 40 #include <errno.h> 41 #include <kvm.h> 42 #include <nlist.h> 43 #include <stddef.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "memstat.h" 50 #include "memstat_internal.h" 51 52 static struct nlist namelist[] = { 53 #define X_UMA_KEGS 0 54 { .n_name = "_uma_kegs" }, 55 #define X_MP_MAXID 1 56 { .n_name = "_mp_maxid" }, 57 #define X_ALL_CPUS 2 58 { .n_name = "_all_cpus" }, 59 { .n_name = "" }, 60 }; 61 62 /* 63 * Extract uma(9) statistics from the running kernel, and store all memory 64 * type information in the passed list. For each type, check the list for an 65 * existing entry with the right name/allocator -- if present, update that 66 * entry. Otherwise, add a new entry. On error, the entire list will be 67 * cleared, as entries will be in an inconsistent state. 68 * 69 * To reduce the level of work for a list that starts empty, we keep around a 70 * hint as to whether it was empty when we began, so we can avoid searching 71 * the list for entries to update. Updates are O(n^2) due to searching for 72 * each entry before adding it. 73 */ 74 int 75 memstat_sysctl_uma(struct memory_type_list *list, int flags) 76 { 77 struct uma_stream_header *ushp; 78 struct uma_type_header *uthp; 79 struct uma_percpu_stat *upsp; 80 struct memory_type *mtp; 81 int count, hint_dontsearch, i, j, maxcpus, maxid; 82 char *buffer, *p; 83 size_t size; 84 85 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 86 87 /* 88 * Query the number of CPUs, number of malloc types so that we can 89 * guess an initial buffer size. We loop until we succeed or really 90 * fail. Note that the value of maxcpus we query using sysctl is not 91 * the version we use when processing the real data -- that is read 92 * from the header. 93 */ 94 retry: 95 size = sizeof(maxid); 96 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 97 if (errno == EACCES || errno == EPERM) 98 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 99 else 100 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 101 return (-1); 102 } 103 if (size != sizeof(maxid)) { 104 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 105 return (-1); 106 } 107 108 size = sizeof(count); 109 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 110 if (errno == EACCES || errno == EPERM) 111 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 112 else 113 list->mtl_error = MEMSTAT_ERROR_VERSION; 114 return (-1); 115 } 116 if (size != sizeof(count)) { 117 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 118 return (-1); 119 } 120 121 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 122 (maxid + 1)); 123 124 buffer = malloc(size); 125 if (buffer == NULL) { 126 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 127 return (-1); 128 } 129 130 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 131 /* 132 * XXXRW: ENOMEM is an ambiguous return, we should bound the 133 * number of loops, perhaps. 134 */ 135 if (errno == ENOMEM) { 136 free(buffer); 137 goto retry; 138 } 139 if (errno == EACCES || errno == EPERM) 140 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 141 else 142 list->mtl_error = MEMSTAT_ERROR_VERSION; 143 free(buffer); 144 return (-1); 145 } 146 147 if (size == 0) { 148 free(buffer); 149 return (0); 150 } 151 152 if (size < sizeof(*ushp)) { 153 list->mtl_error = MEMSTAT_ERROR_VERSION; 154 free(buffer); 155 return (-1); 156 } 157 p = buffer; 158 ushp = (struct uma_stream_header *)p; 159 p += sizeof(*ushp); 160 161 if (ushp->ush_version != UMA_STREAM_VERSION) { 162 list->mtl_error = MEMSTAT_ERROR_VERSION; 163 free(buffer); 164 return (-1); 165 } 166 167 /* 168 * For the remainder of this function, we are quite trusting about 169 * the layout of structures and sizes, since we've determined we have 170 * a matching version and acceptable CPU count. 171 */ 172 maxcpus = ushp->ush_maxcpus; 173 count = ushp->ush_count; 174 for (i = 0; i < count; i++) { 175 uthp = (struct uma_type_header *)p; 176 p += sizeof(*uthp); 177 178 if (hint_dontsearch == 0) { 179 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 180 uthp->uth_name); 181 } else 182 mtp = NULL; 183 if (mtp == NULL) 184 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 185 uthp->uth_name, maxid + 1); 186 if (mtp == NULL) { 187 _memstat_mtl_empty(list); 188 free(buffer); 189 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 190 return (-1); 191 } 192 193 /* 194 * Reset the statistics on a current node. 195 */ 196 _memstat_mt_reset_stats(mtp, maxid + 1); 197 198 mtp->mt_numallocs = uthp->uth_allocs; 199 mtp->mt_numfrees = uthp->uth_frees; 200 mtp->mt_failures = uthp->uth_fails; 201 mtp->mt_sleeps = uthp->uth_sleeps; 202 203 for (j = 0; j < maxcpus; j++) { 204 upsp = (struct uma_percpu_stat *)p; 205 p += sizeof(*upsp); 206 207 mtp->mt_percpu_cache[j].mtp_free = 208 upsp->ups_cache_free; 209 mtp->mt_free += upsp->ups_cache_free; 210 mtp->mt_numallocs += upsp->ups_allocs; 211 mtp->mt_numfrees += upsp->ups_frees; 212 } 213 214 mtp->mt_size = uthp->uth_size; 215 mtp->mt_rsize = uthp->uth_rsize; 216 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 217 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 218 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 219 mtp->mt_countlimit = uthp->uth_limit; 220 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 221 222 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 223 mtp->mt_zonefree = uthp->uth_zone_free; 224 225 /* 226 * UMA secondary zones share a keg with the primary zone. To 227 * avoid double-reporting of free items, report keg free 228 * items only in the primary zone. 229 */ 230 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 231 mtp->mt_kegfree = uthp->uth_keg_free; 232 mtp->mt_free += mtp->mt_kegfree; 233 } 234 mtp->mt_free += mtp->mt_zonefree; 235 } 236 237 free(buffer); 238 239 return (0); 240 } 241 242 static int 243 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 244 size_t offset) 245 { 246 ssize_t ret; 247 248 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 249 size); 250 if (ret < 0) 251 return (MEMSTAT_ERROR_KVM); 252 if ((size_t)ret != size) 253 return (MEMSTAT_ERROR_KVM_SHORTREAD); 254 return (0); 255 } 256 257 static int 258 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 259 { 260 ssize_t ret; 261 int i; 262 263 for (i = 0; i < buflen; i++) { 264 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 265 &(buffer[i]), sizeof(char)); 266 if (ret < 0) 267 return (MEMSTAT_ERROR_KVM); 268 if ((size_t)ret != sizeof(char)) 269 return (MEMSTAT_ERROR_KVM_SHORTREAD); 270 if (buffer[i] == '\0') 271 return (0); 272 } 273 /* Truncate. */ 274 buffer[i-1] = '\0'; 275 return (0); 276 } 277 278 static int 279 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 280 size_t offset) 281 { 282 ssize_t ret; 283 284 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 285 if (ret < 0) 286 return (MEMSTAT_ERROR_KVM); 287 if ((size_t)ret != size) 288 return (MEMSTAT_ERROR_KVM_SHORTREAD); 289 return (0); 290 } 291 292 /* 293 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 294 * UMA(9) statistics from a kernel core/memory file. 295 */ 296 int 297 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 298 { 299 LIST_HEAD(, uma_keg) uma_kegs; 300 struct memory_type *mtp; 301 struct uma_bucket *ubp, ub; 302 struct uma_cache *ucp, *ucp_array; 303 struct uma_zone *uzp, uz; 304 struct uma_keg *kzp, kz; 305 int hint_dontsearch, i, mp_maxid, ret; 306 char name[MEMTYPE_MAXNAME]; 307 cpuset_t all_cpus; 308 long cpusetsize; 309 kvm_t *kvm; 310 311 kvm = (kvm_t *)kvm_handle; 312 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 313 if (kvm_nlist(kvm, namelist) != 0) { 314 list->mtl_error = MEMSTAT_ERROR_KVM; 315 return (-1); 316 } 317 if (namelist[X_UMA_KEGS].n_type == 0 || 318 namelist[X_UMA_KEGS].n_value == 0) { 319 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 320 return (-1); 321 } 322 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 323 if (ret != 0) { 324 list->mtl_error = ret; 325 return (-1); 326 } 327 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 328 if (ret != 0) { 329 list->mtl_error = ret; 330 return (-1); 331 } 332 cpusetsize = sysconf(_SC_CPUSET_SIZE); 333 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 334 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 335 return (-1); 336 } 337 CPU_ZERO(&all_cpus); 338 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 339 if (ret != 0) { 340 list->mtl_error = ret; 341 return (-1); 342 } 343 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 344 if (ucp_array == NULL) { 345 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 346 return (-1); 347 } 348 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 349 LIST_NEXT(&kz, uk_link)) { 350 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 351 if (ret != 0) { 352 free(ucp_array); 353 _memstat_mtl_empty(list); 354 list->mtl_error = ret; 355 return (-1); 356 } 357 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 358 LIST_NEXT(&uz, uz_link)) { 359 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 360 if (ret != 0) { 361 free(ucp_array); 362 _memstat_mtl_empty(list); 363 list->mtl_error = ret; 364 return (-1); 365 } 366 ret = kread(kvm, uzp, ucp_array, 367 sizeof(struct uma_cache) * (mp_maxid + 1), 368 offsetof(struct uma_zone, uz_cpu[0])); 369 if (ret != 0) { 370 free(ucp_array); 371 _memstat_mtl_empty(list); 372 list->mtl_error = ret; 373 return (-1); 374 } 375 ret = kread_string(kvm, uz.uz_name, name, 376 MEMTYPE_MAXNAME); 377 if (ret != 0) { 378 free(ucp_array); 379 _memstat_mtl_empty(list); 380 list->mtl_error = ret; 381 return (-1); 382 } 383 if (hint_dontsearch == 0) { 384 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 385 name); 386 } else 387 mtp = NULL; 388 if (mtp == NULL) 389 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 390 name, mp_maxid + 1); 391 if (mtp == NULL) { 392 free(ucp_array); 393 _memstat_mtl_empty(list); 394 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 395 return (-1); 396 } 397 /* 398 * Reset the statistics on a current node. 399 */ 400 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 401 mtp->mt_numallocs = uz.uz_allocs; 402 mtp->mt_numfrees = uz.uz_frees; 403 mtp->mt_failures = uz.uz_fails; 404 mtp->mt_sleeps = uz.uz_sleeps; 405 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 406 goto skip_percpu; 407 for (i = 0; i < mp_maxid + 1; i++) { 408 if (!CPU_ISSET(i, &all_cpus)) 409 continue; 410 ucp = &ucp_array[i]; 411 mtp->mt_numallocs += ucp->uc_allocs; 412 mtp->mt_numfrees += ucp->uc_frees; 413 414 if (ucp->uc_allocbucket != NULL) { 415 ret = kread(kvm, ucp->uc_allocbucket, 416 &ub, sizeof(ub), 0); 417 if (ret != 0) { 418 free(ucp_array); 419 _memstat_mtl_empty(list); 420 list->mtl_error = ret; 421 return (-1); 422 } 423 mtp->mt_free += ub.ub_cnt; 424 } 425 if (ucp->uc_freebucket != NULL) { 426 ret = kread(kvm, ucp->uc_freebucket, 427 &ub, sizeof(ub), 0); 428 if (ret != 0) { 429 free(ucp_array); 430 _memstat_mtl_empty(list); 431 list->mtl_error = ret; 432 return (-1); 433 } 434 mtp->mt_free += ub.ub_cnt; 435 } 436 } 437 skip_percpu: 438 mtp->mt_size = kz.uk_size; 439 mtp->mt_rsize = kz.uk_rsize; 440 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 441 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 442 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 443 if (kz.uk_ppera > 1) 444 mtp->mt_countlimit = kz.uk_maxpages / 445 kz.uk_ipers; 446 else 447 mtp->mt_countlimit = kz.uk_maxpages * 448 kz.uk_ipers; 449 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 450 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 451 for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != 452 NULL; ubp = LIST_NEXT(&ub, ub_link)) { 453 ret = kread(kvm, ubp, &ub, sizeof(ub), 0); 454 mtp->mt_zonefree += ub.ub_cnt; 455 } 456 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 457 LIST_FIRST(&kz.uk_zones) != uzp)) { 458 mtp->mt_kegfree = kz.uk_free; 459 mtp->mt_free += mtp->mt_kegfree; 460 } 461 mtp->mt_free += mtp->mt_zonefree; 462 } 463 } 464 free(ucp_array); 465 return (0); 466 } 467