1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_sched.h" 35 36 #include <sys/param.h> 37 #include <sys/buf.h> 38 #include <sys/systm.h> 39 #include <sys/eventhandler.h> 40 #include <sys/jail.h> 41 #include <sys/kernel.h> 42 #include <sys/kthread.h> 43 #include <sys/lock.h> 44 #include <sys/loginclass.h> 45 #include <sys/malloc.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/racct.h> 49 #include <sys/resourcevar.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/sdt.h> 53 #include <sys/smp.h> 54 #include <sys/sx.h> 55 #include <sys/sysctl.h> 56 #include <sys/sysproto.h> 57 #include <sys/umtxvar.h> 58 #include <machine/smp.h> 59 60 #ifdef RCTL 61 #include <sys/rctl.h> 62 #endif 63 64 #ifdef RACCT 65 66 FEATURE(racct, "Resource Accounting"); 67 68 /* 69 * Do not block processes that have their %cpu usage <= pcpu_threshold. 70 */ 71 static int pcpu_threshold = 1; 72 #ifdef RACCT_DEFAULT_TO_DISABLED 73 bool __read_frequently racct_enable = false; 74 #else 75 bool __read_frequently racct_enable = true; 76 #endif 77 78 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 79 "Resource Accounting"); 80 SYSCTL_BOOL(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, 81 0, "Enable RACCT/RCTL"); 82 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 83 0, "Processes with higher %cpu usage than this value can be throttled."); 84 85 /* 86 * How many seconds it takes to use the scheduler %cpu calculations. When a 87 * process starts, we compute its %cpu usage by dividing its runtime by the 88 * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 89 * provided by the scheduler. 90 */ 91 #define RACCT_PCPU_SECS 3 92 93 struct mtx racct_lock; 94 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 95 96 static uma_zone_t racct_zone; 97 98 static void racct_sub_racct(struct racct *dest, const struct racct *src); 99 static void racct_sub_cred_locked(struct ucred *cred, int resource, 100 uint64_t amount); 101 static void racct_add_cred_locked(struct ucred *cred, int resource, 102 uint64_t amount); 103 104 SDT_PROVIDER_DEFINE(racct); 105 SDT_PROBE_DEFINE3(racct, , rusage, add, 106 "struct proc *", "int", "uint64_t"); 107 SDT_PROBE_DEFINE3(racct, , rusage, add__failure, 108 "struct proc *", "int", "uint64_t"); 109 SDT_PROBE_DEFINE3(racct, , rusage, add__buf, 110 "struct proc *", "const struct buf *", "int"); 111 SDT_PROBE_DEFINE3(racct, , rusage, add__cred, 112 "struct ucred *", "int", "uint64_t"); 113 SDT_PROBE_DEFINE3(racct, , rusage, add__force, 114 "struct proc *", "int", "uint64_t"); 115 SDT_PROBE_DEFINE3(racct, , rusage, set, 116 "struct proc *", "int", "uint64_t"); 117 SDT_PROBE_DEFINE3(racct, , rusage, set__failure, 118 "struct proc *", "int", "uint64_t"); 119 SDT_PROBE_DEFINE3(racct, , rusage, set__force, 120 "struct proc *", "int", "uint64_t"); 121 SDT_PROBE_DEFINE3(racct, , rusage, sub, 122 "struct proc *", "int", "uint64_t"); 123 SDT_PROBE_DEFINE3(racct, , rusage, sub__cred, 124 "struct ucred *", "int", "uint64_t"); 125 SDT_PROBE_DEFINE1(racct, , racct, create, 126 "struct racct *"); 127 SDT_PROBE_DEFINE1(racct, , racct, destroy, 128 "struct racct *"); 129 SDT_PROBE_DEFINE2(racct, , racct, join, 130 "struct racct *", "struct racct *"); 131 SDT_PROBE_DEFINE2(racct, , racct, join__failure, 132 "struct racct *", "struct racct *"); 133 SDT_PROBE_DEFINE2(racct, , racct, leave, 134 "struct racct *", "struct racct *"); 135 136 int racct_types[] = { 137 [RACCT_CPU] = 138 RACCT_IN_MILLIONS, 139 [RACCT_DATA] = 140 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 141 [RACCT_STACK] = 142 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 143 [RACCT_CORE] = 144 RACCT_DENIABLE, 145 [RACCT_RSS] = 146 RACCT_RECLAIMABLE, 147 [RACCT_MEMLOCK] = 148 RACCT_RECLAIMABLE | RACCT_DENIABLE, 149 [RACCT_NPROC] = 150 RACCT_RECLAIMABLE | RACCT_DENIABLE, 151 [RACCT_NOFILE] = 152 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 153 [RACCT_VMEM] = 154 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 155 [RACCT_NPTS] = 156 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 157 [RACCT_SWAP] = 158 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 159 [RACCT_NTHR] = 160 RACCT_RECLAIMABLE | RACCT_DENIABLE, 161 [RACCT_MSGQQUEUED] = 162 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163 [RACCT_MSGQSIZE] = 164 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 165 [RACCT_NMSGQ] = 166 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 167 [RACCT_NSEM] = 168 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 169 [RACCT_NSEMOP] = 170 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 171 [RACCT_NSHM] = 172 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 173 [RACCT_SHMSIZE] = 174 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 175 [RACCT_WALLCLOCK] = 176 RACCT_IN_MILLIONS, 177 [RACCT_PCTCPU] = 178 RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS, 179 [RACCT_READBPS] = 180 RACCT_DECAYING, 181 [RACCT_WRITEBPS] = 182 RACCT_DECAYING, 183 [RACCT_READIOPS] = 184 RACCT_DECAYING, 185 [RACCT_WRITEIOPS] = 186 RACCT_DECAYING }; 187 188 static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 189 190 #ifdef SCHED_4BSD 191 /* 192 * Contains intermediate values for %cpu calculations to avoid using floating 193 * point in the kernel. 194 * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 195 * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 196 * zero so the calculations are more straightforward. 197 */ 198 fixpt_t ccpu_exp[] = { 199 [0] = FSCALE * 1, 200 [1] = FSCALE * 0.95122942450071400909, 201 [2] = FSCALE * 0.90483741803595957316, 202 [3] = FSCALE * 0.86070797642505780722, 203 [4] = FSCALE * 0.81873075307798185866, 204 [5] = FSCALE * 0.77880078307140486824, 205 [6] = FSCALE * 0.74081822068171786606, 206 [7] = FSCALE * 0.70468808971871343435, 207 [8] = FSCALE * 0.67032004603563930074, 208 [9] = FSCALE * 0.63762815162177329314, 209 [10] = FSCALE * 0.60653065971263342360, 210 [11] = FSCALE * 0.57694981038048669531, 211 [12] = FSCALE * 0.54881163609402643262, 212 [13] = FSCALE * 0.52204577676101604789, 213 [14] = FSCALE * 0.49658530379140951470, 214 [15] = FSCALE * 0.47236655274101470713, 215 [16] = FSCALE * 0.44932896411722159143, 216 [17] = FSCALE * 0.42741493194872666992, 217 [18] = FSCALE * 0.40656965974059911188, 218 [19] = FSCALE * 0.38674102345450120691, 219 [20] = FSCALE * 0.36787944117144232159, 220 [21] = FSCALE * 0.34993774911115535467, 221 [22] = FSCALE * 0.33287108369807955328, 222 [23] = FSCALE * 0.31663676937905321821, 223 [24] = FSCALE * 0.30119421191220209664, 224 [25] = FSCALE * 0.28650479686019010032, 225 [26] = FSCALE * 0.27253179303401260312, 226 [27] = FSCALE * 0.25924026064589150757, 227 [28] = FSCALE * 0.24659696394160647693, 228 [29] = FSCALE * 0.23457028809379765313, 229 [30] = FSCALE * 0.22313016014842982893, 230 [31] = FSCALE * 0.21224797382674305771, 231 [32] = FSCALE * 0.20189651799465540848, 232 [33] = FSCALE * 0.19204990862075411423, 233 [34] = FSCALE * 0.18268352405273465022, 234 [35] = FSCALE * 0.17377394345044512668, 235 [36] = FSCALE * 0.16529888822158653829, 236 [37] = FSCALE * 0.15723716631362761621, 237 [38] = FSCALE * 0.14956861922263505264, 238 [39] = FSCALE * 0.14227407158651357185, 239 [40] = FSCALE * 0.13533528323661269189, 240 [41] = FSCALE * 0.12873490358780421886, 241 [42] = FSCALE * 0.12245642825298191021, 242 [43] = FSCALE * 0.11648415777349695786, 243 [44] = FSCALE * 0.11080315836233388333, 244 [45] = FSCALE * 0.10539922456186433678, 245 [46] = FSCALE * 0.10025884372280373372, 246 [47] = FSCALE * 0.09536916221554961888, 247 [48] = FSCALE * 0.09071795328941250337, 248 [49] = FSCALE * 0.08629358649937051097, 249 [50] = FSCALE * 0.08208499862389879516, 250 [51] = FSCALE * 0.07808166600115315231, 251 [52] = FSCALE * 0.07427357821433388042, 252 [53] = FSCALE * 0.07065121306042958674, 253 [54] = FSCALE * 0.06720551273974976512, 254 [55] = FSCALE * 0.06392786120670757270, 255 [56] = FSCALE * 0.06081006262521796499, 256 [57] = FSCALE * 0.05784432087483846296, 257 [58] = FSCALE * 0.05502322005640722902, 258 [59] = FSCALE * 0.05233970594843239308, 259 [60] = FSCALE * 0.04978706836786394297, 260 [61] = FSCALE * 0.04735892439114092119, 261 [62] = FSCALE * 0.04504920239355780606, 262 [63] = FSCALE * 0.04285212686704017991, 263 [64] = FSCALE * 0.04076220397836621516, 264 [65] = FSCALE * 0.03877420783172200988, 265 [66] = FSCALE * 0.03688316740124000544, 266 [67] = FSCALE * 0.03508435410084502588, 267 [68] = FSCALE * 0.03337326996032607948, 268 [69] = FSCALE * 0.03174563637806794323, 269 [70] = FSCALE * 0.03019738342231850073, 270 [71] = FSCALE * 0.02872463965423942912, 271 [72] = FSCALE * 0.02732372244729256080, 272 [73] = FSCALE * 0.02599112877875534358, 273 [74] = FSCALE * 0.02472352647033939120, 274 [75] = FSCALE * 0.02351774585600910823, 275 [76] = FSCALE * 0.02237077185616559577, 276 [77] = FSCALE * 0.02127973643837716938, 277 [78] = FSCALE * 0.02024191144580438847, 278 [79] = FSCALE * 0.01925470177538692429, 279 [80] = FSCALE * 0.01831563888873418029, 280 [81] = FSCALE * 0.01742237463949351138, 281 [82] = FSCALE * 0.01657267540176124754, 282 [83] = FSCALE * 0.01576441648485449082, 283 [84] = FSCALE * 0.01499557682047770621, 284 [85] = FSCALE * 0.01426423390899925527, 285 [86] = FSCALE * 0.01356855901220093175, 286 [87] = FSCALE * 0.01290681258047986886, 287 [88] = FSCALE * 0.01227733990306844117, 288 [89] = FSCALE * 0.01167856697039544521, 289 [90] = FSCALE * 0.01110899653824230649, 290 [91] = FSCALE * 0.01056720438385265337, 291 [92] = FSCALE * 0.01005183574463358164, 292 [93] = FSCALE * 0.00956160193054350793, 293 [94] = FSCALE * 0.00909527710169581709, 294 [95] = FSCALE * 0.00865169520312063417, 295 [96] = FSCALE * 0.00822974704902002884, 296 [97] = FSCALE * 0.00782837754922577143, 297 [98] = FSCALE * 0.00744658307092434051, 298 [99] = FSCALE * 0.00708340892905212004, 299 [100] = FSCALE * 0.00673794699908546709, 300 [101] = FSCALE * 0.00640933344625638184, 301 [102] = FSCALE * 0.00609674656551563610, 302 [103] = FSCALE * 0.00579940472684214321, 303 [104] = FSCALE * 0.00551656442076077241, 304 [105] = FSCALE * 0.00524751839918138427, 305 [106] = FSCALE * 0.00499159390691021621, 306 [107] = FSCALE * 0.00474815099941147558, 307 [108] = FSCALE * 0.00451658094261266798, 308 [109] = FSCALE * 0.00429630469075234057, 309 [110] = FSCALE * 0.00408677143846406699, 310 }; 311 #endif 312 313 #define CCPU_EXP_MAX 110 314 315 /* 316 * This function is analogical to the getpcpu() function in the ps(1) command. 317 * They should both calculate in the same way so that the racct %cpu 318 * calculations are consistent with the values showed by the ps(1) tool. 319 * The calculations are more complex in the 4BSD scheduler because of the value 320 * of the ccpu variable. In ULE it is defined to be zero which saves us some 321 * work. 322 */ 323 static uint64_t 324 racct_getpcpu(struct proc *p, u_int pcpu) 325 { 326 u_int swtime; 327 #ifdef SCHED_4BSD 328 fixpt_t pctcpu, pctcpu_next; 329 #endif 330 #ifdef SMP 331 struct pcpu *pc; 332 int found; 333 #endif 334 fixpt_t p_pctcpu; 335 struct thread *td; 336 337 ASSERT_RACCT_ENABLED(); 338 339 /* 340 * If the process is swapped out, we count its %cpu usage as zero. 341 * This behaviour is consistent with the userland ps(1) tool. 342 */ 343 if ((p->p_flag & P_INMEM) == 0) 344 return (0); 345 swtime = (ticks - p->p_swtick) / hz; 346 347 /* 348 * For short-lived processes, the sched_pctcpu() returns small 349 * values even for cpu intensive processes. Therefore we use 350 * our own estimate in this case. 351 */ 352 if (swtime < RACCT_PCPU_SECS) 353 return (pcpu); 354 355 p_pctcpu = 0; 356 FOREACH_THREAD_IN_PROC(p, td) { 357 if (td == PCPU_GET(idlethread)) 358 continue; 359 #ifdef SMP 360 found = 0; 361 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 362 if (td == pc->pc_idlethread) { 363 found = 1; 364 break; 365 } 366 } 367 if (found) 368 continue; 369 #endif 370 thread_lock(td); 371 #ifdef SCHED_4BSD 372 pctcpu = sched_pctcpu(td); 373 /* Count also the yet unfinished second. */ 374 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 375 pctcpu_next += sched_pctcpu_delta(td); 376 p_pctcpu += max(pctcpu, pctcpu_next); 377 #else 378 /* 379 * In ULE the %cpu statistics are updated on every 380 * sched_pctcpu() call. So special calculations to 381 * account for the latest (unfinished) second are 382 * not needed. 383 */ 384 p_pctcpu += sched_pctcpu(td); 385 #endif 386 thread_unlock(td); 387 } 388 389 #ifdef SCHED_4BSD 390 if (swtime <= CCPU_EXP_MAX) 391 return ((100 * (uint64_t)p_pctcpu * 1000000) / 392 (FSCALE - ccpu_exp[swtime])); 393 #endif 394 395 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 396 } 397 398 static void 399 racct_add_racct(struct racct *dest, const struct racct *src) 400 { 401 int i; 402 403 ASSERT_RACCT_ENABLED(); 404 RACCT_LOCK_ASSERT(); 405 406 /* 407 * Update resource usage in dest. 408 */ 409 for (i = 0; i <= RACCT_MAX; i++) { 410 KASSERT(dest->r_resources[i] >= 0, 411 ("%s: resource %d propagation meltdown: dest < 0", 412 __func__, i)); 413 KASSERT(src->r_resources[i] >= 0, 414 ("%s: resource %d propagation meltdown: src < 0", 415 __func__, i)); 416 dest->r_resources[i] += src->r_resources[i]; 417 } 418 } 419 420 static void 421 racct_sub_racct(struct racct *dest, const struct racct *src) 422 { 423 int i; 424 425 ASSERT_RACCT_ENABLED(); 426 RACCT_LOCK_ASSERT(); 427 428 /* 429 * Update resource usage in dest. 430 */ 431 for (i = 0; i <= RACCT_MAX; i++) { 432 if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 433 KASSERT(dest->r_resources[i] >= 0, 434 ("%s: resource %d propagation meltdown: dest < 0", 435 __func__, i)); 436 KASSERT(src->r_resources[i] >= 0, 437 ("%s: resource %d propagation meltdown: src < 0", 438 __func__, i)); 439 KASSERT(src->r_resources[i] <= dest->r_resources[i], 440 ("%s: resource %d propagation meltdown: src > dest", 441 __func__, i)); 442 } 443 if (RACCT_CAN_DROP(i)) { 444 dest->r_resources[i] -= src->r_resources[i]; 445 if (dest->r_resources[i] < 0) 446 dest->r_resources[i] = 0; 447 } 448 } 449 } 450 451 void 452 racct_create(struct racct **racctp) 453 { 454 455 if (!racct_enable) 456 return; 457 458 SDT_PROBE1(racct, , racct, create, racctp); 459 460 KASSERT(*racctp == NULL, ("racct already allocated")); 461 462 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 463 } 464 465 static void 466 racct_destroy_locked(struct racct **racctp) 467 { 468 struct racct *racct; 469 int i; 470 471 ASSERT_RACCT_ENABLED(); 472 473 SDT_PROBE1(racct, , racct, destroy, racctp); 474 475 RACCT_LOCK_ASSERT(); 476 KASSERT(racctp != NULL, ("NULL racctp")); 477 KASSERT(*racctp != NULL, ("NULL racct")); 478 479 racct = *racctp; 480 481 for (i = 0; i <= RACCT_MAX; i++) { 482 if (RACCT_IS_SLOPPY(i)) 483 continue; 484 if (!RACCT_IS_RECLAIMABLE(i)) 485 continue; 486 KASSERT(racct->r_resources[i] == 0, 487 ("destroying non-empty racct: " 488 "%ju allocated for resource %d\n", 489 racct->r_resources[i], i)); 490 } 491 uma_zfree(racct_zone, racct); 492 *racctp = NULL; 493 } 494 495 void 496 racct_destroy(struct racct **racct) 497 { 498 499 if (!racct_enable) 500 return; 501 502 RACCT_LOCK(); 503 racct_destroy_locked(racct); 504 RACCT_UNLOCK(); 505 } 506 507 /* 508 * Increase consumption of 'resource' by 'amount' for 'racct', 509 * but not its parents. Differently from other cases, 'amount' here 510 * may be less than zero. 511 */ 512 static void 513 racct_adjust_resource(struct racct *racct, int resource, 514 int64_t amount) 515 { 516 517 ASSERT_RACCT_ENABLED(); 518 RACCT_LOCK_ASSERT(); 519 KASSERT(racct != NULL, ("NULL racct")); 520 521 racct->r_resources[resource] += amount; 522 if (racct->r_resources[resource] < 0) { 523 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 524 ("%s: resource %d usage < 0", __func__, resource)); 525 racct->r_resources[resource] = 0; 526 } 527 528 /* 529 * There are some cases where the racct %cpu resource would grow 530 * beyond 100% per core. For example in racct_proc_exit() we add 531 * the process %cpu usage to the ucred racct containers. If too 532 * many processes terminated in a short time span, the ucred %cpu 533 * resource could grow too much. Also, the 4BSD scheduler sometimes 534 * returns for a thread more than 100% cpu usage. So we set a sane 535 * boundary here to 100% * the maximum number of CPUs. 536 */ 537 if ((resource == RACCT_PCTCPU) && 538 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU)) 539 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU; 540 } 541 542 static int 543 racct_add_locked(struct proc *p, int resource, uint64_t amount, int force) 544 { 545 #ifdef RCTL 546 int error; 547 #endif 548 549 ASSERT_RACCT_ENABLED(); 550 551 /* 552 * We need proc lock to dereference p->p_ucred. 553 */ 554 PROC_LOCK_ASSERT(p, MA_OWNED); 555 556 #ifdef RCTL 557 error = rctl_enforce(p, resource, amount); 558 if (error && !force && RACCT_IS_DENIABLE(resource)) { 559 SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount); 560 return (error); 561 } 562 #endif 563 racct_adjust_resource(p->p_racct, resource, amount); 564 racct_add_cred_locked(p->p_ucred, resource, amount); 565 566 return (0); 567 } 568 569 /* 570 * Increase allocation of 'resource' by 'amount' for process 'p'. 571 * Return 0 if it's below limits, or errno, if it's not. 572 */ 573 int 574 racct_add(struct proc *p, int resource, uint64_t amount) 575 { 576 int error; 577 578 if (!racct_enable) 579 return (0); 580 581 SDT_PROBE3(racct, , rusage, add, p, resource, amount); 582 583 RACCT_LOCK(); 584 error = racct_add_locked(p, resource, amount, 0); 585 RACCT_UNLOCK(); 586 return (error); 587 } 588 589 /* 590 * Increase allocation of 'resource' by 'amount' for process 'p'. 591 * Doesn't check for limits and never fails. 592 */ 593 void 594 racct_add_force(struct proc *p, int resource, uint64_t amount) 595 { 596 597 if (!racct_enable) 598 return; 599 600 SDT_PROBE3(racct, , rusage, add__force, p, resource, amount); 601 602 RACCT_LOCK(); 603 racct_add_locked(p, resource, amount, 1); 604 RACCT_UNLOCK(); 605 } 606 607 static void 608 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 609 { 610 struct prison *pr; 611 612 ASSERT_RACCT_ENABLED(); 613 614 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 615 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 616 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 617 amount); 618 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount); 619 } 620 621 /* 622 * Increase allocation of 'resource' by 'amount' for credential 'cred'. 623 * Doesn't check for limits and never fails. 624 */ 625 void 626 racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 627 { 628 629 if (!racct_enable) 630 return; 631 632 SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount); 633 634 RACCT_LOCK(); 635 racct_add_cred_locked(cred, resource, amount); 636 RACCT_UNLOCK(); 637 } 638 639 /* 640 * Account for disk IO resource consumption. Checks for limits, 641 * but never fails, due to disk limits being undeniable. 642 */ 643 void 644 racct_add_buf(struct proc *p, const struct buf *bp, int is_write) 645 { 646 647 ASSERT_RACCT_ENABLED(); 648 PROC_LOCK_ASSERT(p, MA_OWNED); 649 650 SDT_PROBE3(racct, , rusage, add__buf, p, bp, is_write); 651 652 RACCT_LOCK(); 653 if (is_write) { 654 racct_add_locked(curproc, RACCT_WRITEBPS, bp->b_bcount, 1); 655 racct_add_locked(curproc, RACCT_WRITEIOPS, 1, 1); 656 } else { 657 racct_add_locked(curproc, RACCT_READBPS, bp->b_bcount, 1); 658 racct_add_locked(curproc, RACCT_READIOPS, 1, 1); 659 } 660 RACCT_UNLOCK(); 661 } 662 663 static int 664 racct_set_locked(struct proc *p, int resource, uint64_t amount, int force) 665 { 666 int64_t old_amount, decayed_amount, diff_proc, diff_cred; 667 #ifdef RCTL 668 int error; 669 #endif 670 671 ASSERT_RACCT_ENABLED(); 672 673 /* 674 * We need proc lock to dereference p->p_ucred. 675 */ 676 PROC_LOCK_ASSERT(p, MA_OWNED); 677 678 old_amount = p->p_racct->r_resources[resource]; 679 /* 680 * The diffs may be negative. 681 */ 682 diff_proc = amount - old_amount; 683 if (resource == RACCT_PCTCPU) { 684 /* 685 * Resources in per-credential racct containers may decay. 686 * If this is the case, we need to calculate the difference 687 * between the new amount and the proportional value of the 688 * old amount that has decayed in the ucred racct containers. 689 */ 690 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 691 diff_cred = amount - decayed_amount; 692 } else 693 diff_cred = diff_proc; 694 #ifdef notyet 695 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 696 ("%s: usage of non-droppable resource %d dropping", __func__, 697 resource)); 698 #endif 699 #ifdef RCTL 700 if (diff_proc > 0) { 701 error = rctl_enforce(p, resource, diff_proc); 702 if (error && !force && RACCT_IS_DENIABLE(resource)) { 703 SDT_PROBE3(racct, , rusage, set__failure, p, resource, 704 amount); 705 return (error); 706 } 707 } 708 #endif 709 racct_adjust_resource(p->p_racct, resource, diff_proc); 710 if (diff_cred > 0) 711 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 712 else if (diff_cred < 0) 713 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 714 715 return (0); 716 } 717 718 /* 719 * Set allocation of 'resource' to 'amount' for process 'p'. 720 * Return 0 if it's below limits, or errno, if it's not. 721 * 722 * Note that decreasing the allocation always returns 0, 723 * even if it's above the limit. 724 */ 725 int 726 racct_set_unlocked(struct proc *p, int resource, uint64_t amount) 727 { 728 int error; 729 730 ASSERT_RACCT_ENABLED(); 731 PROC_LOCK(p); 732 error = racct_set(p, resource, amount); 733 PROC_UNLOCK(p); 734 return (error); 735 } 736 737 int 738 racct_set(struct proc *p, int resource, uint64_t amount) 739 { 740 int error; 741 742 if (!racct_enable) 743 return (0); 744 745 SDT_PROBE3(racct, , rusage, set__force, p, resource, amount); 746 747 RACCT_LOCK(); 748 error = racct_set_locked(p, resource, amount, 0); 749 RACCT_UNLOCK(); 750 return (error); 751 } 752 753 void 754 racct_set_force(struct proc *p, int resource, uint64_t amount) 755 { 756 757 if (!racct_enable) 758 return; 759 760 SDT_PROBE3(racct, , rusage, set, p, resource, amount); 761 762 RACCT_LOCK(); 763 racct_set_locked(p, resource, amount, 1); 764 RACCT_UNLOCK(); 765 } 766 767 /* 768 * Returns amount of 'resource' the process 'p' can keep allocated. 769 * Allocating more than that would be denied, unless the resource 770 * is marked undeniable. Amount of already allocated resource does 771 * not matter. 772 */ 773 uint64_t 774 racct_get_limit(struct proc *p, int resource) 775 { 776 #ifdef RCTL 777 uint64_t available; 778 779 if (!racct_enable) 780 return (UINT64_MAX); 781 782 RACCT_LOCK(); 783 available = rctl_get_limit(p, resource); 784 RACCT_UNLOCK(); 785 786 return (available); 787 #else 788 789 return (UINT64_MAX); 790 #endif 791 } 792 793 /* 794 * Returns amount of 'resource' the process 'p' can keep allocated. 795 * Allocating more than that would be denied, unless the resource 796 * is marked undeniable. Amount of already allocated resource does 797 * matter. 798 */ 799 uint64_t 800 racct_get_available(struct proc *p, int resource) 801 { 802 #ifdef RCTL 803 uint64_t available; 804 805 if (!racct_enable) 806 return (UINT64_MAX); 807 808 RACCT_LOCK(); 809 available = rctl_get_available(p, resource); 810 RACCT_UNLOCK(); 811 812 return (available); 813 #else 814 815 return (UINT64_MAX); 816 #endif 817 } 818 819 /* 820 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 821 * utilization. Adding more than that would lead to the process being 822 * throttled. 823 */ 824 static int64_t 825 racct_pcpu_available(struct proc *p) 826 { 827 #ifdef RCTL 828 uint64_t available; 829 830 ASSERT_RACCT_ENABLED(); 831 832 RACCT_LOCK(); 833 available = rctl_pcpu_available(p); 834 RACCT_UNLOCK(); 835 836 return (available); 837 #else 838 839 return (INT64_MAX); 840 #endif 841 } 842 843 /* 844 * Decrease allocation of 'resource' by 'amount' for process 'p'. 845 */ 846 void 847 racct_sub(struct proc *p, int resource, uint64_t amount) 848 { 849 850 if (!racct_enable) 851 return; 852 853 SDT_PROBE3(racct, , rusage, sub, p, resource, amount); 854 855 /* 856 * We need proc lock to dereference p->p_ucred. 857 */ 858 PROC_LOCK_ASSERT(p, MA_OWNED); 859 KASSERT(RACCT_CAN_DROP(resource), 860 ("%s: called for non-droppable resource %d", __func__, resource)); 861 862 RACCT_LOCK(); 863 KASSERT(amount <= p->p_racct->r_resources[resource], 864 ("%s: freeing %ju of resource %d, which is more " 865 "than allocated %jd for %s (pid %d)", __func__, amount, resource, 866 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 867 868 racct_adjust_resource(p->p_racct, resource, -amount); 869 racct_sub_cred_locked(p->p_ucred, resource, amount); 870 RACCT_UNLOCK(); 871 } 872 873 static void 874 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 875 { 876 struct prison *pr; 877 878 ASSERT_RACCT_ENABLED(); 879 880 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 881 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 882 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 883 -amount); 884 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount); 885 } 886 887 /* 888 * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 889 */ 890 void 891 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 892 { 893 894 if (!racct_enable) 895 return; 896 897 SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount); 898 899 #ifdef notyet 900 KASSERT(RACCT_CAN_DROP(resource), 901 ("%s: called for resource %d which can not drop", __func__, 902 resource)); 903 #endif 904 905 RACCT_LOCK(); 906 racct_sub_cred_locked(cred, resource, amount); 907 RACCT_UNLOCK(); 908 } 909 910 /* 911 * Inherit resource usage information from the parent process. 912 */ 913 int 914 racct_proc_fork(struct proc *parent, struct proc *child) 915 { 916 int i, error = 0; 917 918 if (!racct_enable) 919 return (0); 920 921 /* 922 * Create racct for the child process. 923 */ 924 racct_create(&child->p_racct); 925 926 PROC_LOCK(parent); 927 PROC_LOCK(child); 928 RACCT_LOCK(); 929 930 #ifdef RCTL 931 error = rctl_proc_fork(parent, child); 932 if (error != 0) 933 goto out; 934 #endif 935 936 /* Init process cpu time. */ 937 child->p_prev_runtime = 0; 938 child->p_throttled = 0; 939 940 /* 941 * Inherit resource usage. 942 */ 943 for (i = 0; i <= RACCT_MAX; i++) { 944 if (parent->p_racct->r_resources[i] == 0 || 945 !RACCT_IS_INHERITABLE(i)) 946 continue; 947 948 error = racct_set_locked(child, i, 949 parent->p_racct->r_resources[i], 0); 950 if (error != 0) 951 goto out; 952 } 953 954 error = racct_add_locked(child, RACCT_NPROC, 1, 0); 955 error += racct_add_locked(child, RACCT_NTHR, 1, 0); 956 957 out: 958 RACCT_UNLOCK(); 959 PROC_UNLOCK(child); 960 PROC_UNLOCK(parent); 961 962 if (error != 0) 963 racct_proc_exit(child); 964 965 return (error); 966 } 967 968 /* 969 * Called at the end of fork1(), to handle rules that require the process 970 * to be fully initialized. 971 */ 972 void 973 racct_proc_fork_done(struct proc *child) 974 { 975 976 if (!racct_enable) 977 return; 978 979 #ifdef RCTL 980 PROC_LOCK(child); 981 RACCT_LOCK(); 982 rctl_enforce(child, RACCT_NPROC, 0); 983 rctl_enforce(child, RACCT_NTHR, 0); 984 RACCT_UNLOCK(); 985 PROC_UNLOCK(child); 986 #endif 987 } 988 989 void 990 racct_proc_exit(struct proc *p) 991 { 992 struct timeval wallclock; 993 uint64_t pct_estimate, pct, runtime; 994 int i; 995 996 if (!racct_enable) 997 return; 998 999 PROC_LOCK(p); 1000 /* 1001 * We don't need to calculate rux, proc_reap() has already done this. 1002 */ 1003 runtime = cputick2usec(p->p_rux.rux_runtime); 1004 #ifdef notyet 1005 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 1006 #else 1007 if (runtime < p->p_prev_runtime) 1008 runtime = p->p_prev_runtime; 1009 #endif 1010 microuptime(&wallclock); 1011 timevalsub(&wallclock, &p->p_stats->p_start); 1012 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1013 pct_estimate = (1000000 * runtime * 100) / 1014 ((uint64_t)wallclock.tv_sec * 1000000 + 1015 wallclock.tv_usec); 1016 } else 1017 pct_estimate = 0; 1018 pct = racct_getpcpu(p, pct_estimate); 1019 1020 RACCT_LOCK(); 1021 racct_set_locked(p, RACCT_CPU, runtime, 0); 1022 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 1023 1024 KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0, 1025 ("process reaped with %ju allocated for RSS\n", 1026 p->p_racct->r_resources[RACCT_RSS])); 1027 for (i = 0; i <= RACCT_MAX; i++) { 1028 if (p->p_racct->r_resources[i] == 0) 1029 continue; 1030 if (!RACCT_IS_RECLAIMABLE(i)) 1031 continue; 1032 racct_set_locked(p, i, 0, 0); 1033 } 1034 1035 #ifdef RCTL 1036 rctl_racct_release(p->p_racct); 1037 #endif 1038 racct_destroy_locked(&p->p_racct); 1039 RACCT_UNLOCK(); 1040 PROC_UNLOCK(p); 1041 } 1042 1043 /* 1044 * Called after credentials change, to move resource utilisation 1045 * between raccts. 1046 */ 1047 void 1048 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 1049 struct ucred *newcred) 1050 { 1051 struct uidinfo *olduip, *newuip; 1052 struct loginclass *oldlc, *newlc; 1053 struct prison *oldpr, *newpr, *pr; 1054 1055 if (!racct_enable) 1056 return; 1057 1058 PROC_LOCK_ASSERT(p, MA_OWNED); 1059 1060 newuip = newcred->cr_ruidinfo; 1061 olduip = oldcred->cr_ruidinfo; 1062 newlc = newcred->cr_loginclass; 1063 oldlc = oldcred->cr_loginclass; 1064 newpr = newcred->cr_prison; 1065 oldpr = oldcred->cr_prison; 1066 1067 RACCT_LOCK(); 1068 if (newuip != olduip) { 1069 racct_sub_racct(olduip->ui_racct, p->p_racct); 1070 racct_add_racct(newuip->ui_racct, p->p_racct); 1071 } 1072 if (newlc != oldlc) { 1073 racct_sub_racct(oldlc->lc_racct, p->p_racct); 1074 racct_add_racct(newlc->lc_racct, p->p_racct); 1075 } 1076 if (newpr != oldpr) { 1077 for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 1078 racct_sub_racct(pr->pr_prison_racct->prr_racct, 1079 p->p_racct); 1080 for (pr = newpr; pr != NULL; pr = pr->pr_parent) 1081 racct_add_racct(pr->pr_prison_racct->prr_racct, 1082 p->p_racct); 1083 } 1084 RACCT_UNLOCK(); 1085 } 1086 1087 void 1088 racct_move(struct racct *dest, struct racct *src) 1089 { 1090 1091 ASSERT_RACCT_ENABLED(); 1092 1093 RACCT_LOCK(); 1094 racct_add_racct(dest, src); 1095 racct_sub_racct(src, src); 1096 RACCT_UNLOCK(); 1097 } 1098 1099 static void 1100 ast_racct(struct thread *td, int tda __unused) 1101 { 1102 struct proc *p; 1103 1104 ASSERT_RACCT_ENABLED(); 1105 1106 p = td->td_proc; 1107 if (p->p_throttled == 0) 1108 return; 1109 1110 PROC_LOCK(p); 1111 while (p->p_throttled != 0) { 1112 msleep(p->p_racct, &p->p_mtx, 0, "racct", 1113 p->p_throttled < 0 ? 0 : p->p_throttled); 1114 if (p->p_throttled > 0) 1115 p->p_throttled = 0; 1116 } 1117 PROC_UNLOCK(p); 1118 } 1119 1120 /* 1121 * Make the process sleep in userret() for 'timeout' ticks. Setting 1122 * timeout to -1 makes it sleep until woken up by racct_proc_wakeup(). 1123 */ 1124 void 1125 racct_proc_throttle(struct proc *p, int timeout) 1126 { 1127 struct thread *td; 1128 #ifdef SMP 1129 int cpuid; 1130 #endif 1131 1132 KASSERT(timeout != 0, ("timeout %d", timeout)); 1133 ASSERT_RACCT_ENABLED(); 1134 PROC_LOCK_ASSERT(p, MA_OWNED); 1135 1136 /* 1137 * Do not block kernel processes. Also do not block processes with 1138 * low %cpu utilization to improve interactivity. 1139 */ 1140 if ((p->p_flag & (P_SYSTEM | P_KPROC)) != 0) 1141 return; 1142 1143 if (p->p_throttled < 0 || (timeout > 0 && p->p_throttled > timeout)) 1144 return; 1145 1146 p->p_throttled = timeout; 1147 1148 FOREACH_THREAD_IN_PROC(p, td) { 1149 thread_lock(td); 1150 ast_sched_locked(td, TDA_RACCT); 1151 1152 switch (TD_GET_STATE(td)) { 1153 case TDS_RUNQ: 1154 /* 1155 * If the thread is on the scheduler run-queue, we can 1156 * not just remove it from there. So we set the flag 1157 * TDA_SCHED for the thread, so that once it is 1158 * running, it is taken off the cpu as soon as possible. 1159 */ 1160 ast_sched_locked(td, TDA_SCHED); 1161 break; 1162 case TDS_RUNNING: 1163 /* 1164 * If the thread is running, we request a context 1165 * switch for it by setting the TDA_SCHED flag. 1166 */ 1167 ast_sched_locked(td, TDA_SCHED); 1168 #ifdef SMP 1169 cpuid = td->td_oncpu; 1170 if ((cpuid != NOCPU) && (td != curthread)) 1171 ipi_cpu(cpuid, IPI_AST); 1172 #endif 1173 break; 1174 default: 1175 break; 1176 } 1177 thread_unlock(td); 1178 } 1179 } 1180 1181 static void 1182 racct_proc_wakeup(struct proc *p) 1183 { 1184 1185 ASSERT_RACCT_ENABLED(); 1186 1187 PROC_LOCK_ASSERT(p, MA_OWNED); 1188 1189 if (p->p_throttled != 0) { 1190 p->p_throttled = 0; 1191 wakeup(p->p_racct); 1192 } 1193 } 1194 1195 static void 1196 racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2) 1197 { 1198 int64_t r_old, r_new; 1199 1200 ASSERT_RACCT_ENABLED(); 1201 RACCT_LOCK_ASSERT(); 1202 1203 #ifdef RCTL 1204 rctl_throttle_decay(racct, RACCT_READBPS); 1205 rctl_throttle_decay(racct, RACCT_WRITEBPS); 1206 rctl_throttle_decay(racct, RACCT_READIOPS); 1207 rctl_throttle_decay(racct, RACCT_WRITEIOPS); 1208 #endif 1209 1210 r_old = racct->r_resources[RACCT_PCTCPU]; 1211 1212 /* If there is nothing to decay, just exit. */ 1213 if (r_old <= 0) 1214 return; 1215 1216 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1217 racct->r_resources[RACCT_PCTCPU] = r_new; 1218 } 1219 1220 static void 1221 racct_decay_pre(void) 1222 { 1223 1224 RACCT_LOCK(); 1225 } 1226 1227 static void 1228 racct_decay_post(void) 1229 { 1230 1231 RACCT_UNLOCK(); 1232 } 1233 1234 static void 1235 racct_decay(void) 1236 { 1237 1238 ASSERT_RACCT_ENABLED(); 1239 1240 ui_racct_foreach(racct_decay_callback, racct_decay_pre, 1241 racct_decay_post, NULL, NULL); 1242 loginclass_racct_foreach(racct_decay_callback, racct_decay_pre, 1243 racct_decay_post, NULL, NULL); 1244 prison_racct_foreach(racct_decay_callback, racct_decay_pre, 1245 racct_decay_post, NULL, NULL); 1246 } 1247 1248 static void 1249 racctd(void) 1250 { 1251 struct thread *td; 1252 struct proc *p; 1253 struct timeval wallclock; 1254 uint64_t pct, pct_estimate, runtime; 1255 1256 ASSERT_RACCT_ENABLED(); 1257 1258 for (;;) { 1259 racct_decay(); 1260 1261 sx_slock(&allproc_lock); 1262 1263 FOREACH_PROC_IN_SYSTEM(p) { 1264 PROC_LOCK(p); 1265 if (p->p_state != PRS_NORMAL) { 1266 if (p->p_state == PRS_ZOMBIE) 1267 racct_set(p, RACCT_PCTCPU, 0); 1268 PROC_UNLOCK(p); 1269 continue; 1270 } 1271 1272 microuptime(&wallclock); 1273 timevalsub(&wallclock, &p->p_stats->p_start); 1274 PROC_STATLOCK(p); 1275 FOREACH_THREAD_IN_PROC(p, td) 1276 ruxagg(p, td); 1277 runtime = cputick2usec(p->p_rux.rux_runtime); 1278 PROC_STATUNLOCK(p); 1279 #ifdef notyet 1280 KASSERT(runtime >= p->p_prev_runtime, 1281 ("runtime < p_prev_runtime")); 1282 #else 1283 if (runtime < p->p_prev_runtime) 1284 runtime = p->p_prev_runtime; 1285 #endif 1286 p->p_prev_runtime = runtime; 1287 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1288 pct_estimate = (1000000 * runtime * 100) / 1289 ((uint64_t)wallclock.tv_sec * 1000000 + 1290 wallclock.tv_usec); 1291 } else 1292 pct_estimate = 0; 1293 pct = racct_getpcpu(p, pct_estimate); 1294 RACCT_LOCK(); 1295 #ifdef RCTL 1296 rctl_throttle_decay(p->p_racct, RACCT_READBPS); 1297 rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS); 1298 rctl_throttle_decay(p->p_racct, RACCT_READIOPS); 1299 rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); 1300 #endif 1301 racct_set_locked(p, RACCT_PCTCPU, pct, 1); 1302 racct_set_locked(p, RACCT_CPU, runtime, 0); 1303 racct_set_locked(p, RACCT_WALLCLOCK, 1304 (uint64_t)wallclock.tv_sec * 1000000 + 1305 wallclock.tv_usec, 0); 1306 RACCT_UNLOCK(); 1307 PROC_UNLOCK(p); 1308 } 1309 1310 /* 1311 * To ensure that processes are throttled in a fair way, we need 1312 * to iterate over all processes again and check the limits 1313 * for %cpu resource only after ucred racct containers have been 1314 * properly filled. 1315 */ 1316 FOREACH_PROC_IN_SYSTEM(p) { 1317 PROC_LOCK(p); 1318 if (p->p_state != PRS_NORMAL) { 1319 PROC_UNLOCK(p); 1320 continue; 1321 } 1322 1323 if (racct_pcpu_available(p) <= 0) { 1324 if (p->p_racct->r_resources[RACCT_PCTCPU] > 1325 pcpu_threshold) 1326 racct_proc_throttle(p, -1); 1327 } else if (p->p_throttled == -1) { 1328 racct_proc_wakeup(p); 1329 } 1330 PROC_UNLOCK(p); 1331 } 1332 sx_sunlock(&allproc_lock); 1333 pause("-", hz); 1334 } 1335 } 1336 1337 static struct kproc_desc racctd_kp = { 1338 "racctd", 1339 racctd, 1340 NULL 1341 }; 1342 1343 static void 1344 racctd_init(void) 1345 { 1346 if (!racct_enable) 1347 return; 1348 1349 kproc_start(&racctd_kp); 1350 } 1351 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); 1352 1353 static void 1354 racct_init(void) 1355 { 1356 if (!racct_enable) 1357 return; 1358 1359 racct_zone = uma_zcreate("racct", sizeof(struct racct), 1360 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1361 ast_register(TDA_RACCT, ASTR_ASTF_REQUIRED, 0, ast_racct); 1362 1363 /* 1364 * XXX: Move this somewhere. 1365 */ 1366 prison0.pr_prison_racct = prison_racct_find("0"); 1367 } 1368 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1369 1370 #endif /* !RACCT */ 1371