1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_sched.h" 33 34 #include <sys/param.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/eventhandler.h> 38 #include <sys/jail.h> 39 #include <sys/kernel.h> 40 #include <sys/kthread.h> 41 #include <sys/lock.h> 42 #include <sys/loginclass.h> 43 #include <sys/malloc.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/racct.h> 47 #include <sys/resourcevar.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/sdt.h> 51 #include <sys/smp.h> 52 #include <sys/sx.h> 53 #include <sys/sysctl.h> 54 #include <sys/sysproto.h> 55 #include <sys/umtxvar.h> 56 #include <machine/smp.h> 57 58 #ifdef RCTL 59 #include <sys/rctl.h> 60 #endif 61 62 #ifdef RACCT 63 64 FEATURE(racct, "Resource Accounting"); 65 66 /* 67 * Do not block processes that have their %cpu usage <= pcpu_threshold. 68 */ 69 static int pcpu_threshold = 1; 70 #ifdef RACCT_DEFAULT_TO_DISABLED 71 bool __read_frequently racct_enable = false; 72 #else 73 bool __read_frequently racct_enable = true; 74 #endif 75 76 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 77 "Resource Accounting"); 78 SYSCTL_BOOL(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, 79 0, "Enable RACCT/RCTL"); 80 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 81 0, "Processes with higher %cpu usage than this value can be throttled."); 82 83 /* 84 * How many seconds it takes to use the scheduler %cpu calculations. When a 85 * process starts, we compute its %cpu usage by dividing its runtime by the 86 * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 87 * provided by the scheduler. 88 */ 89 #define RACCT_PCPU_SECS 3 90 91 struct mtx racct_lock; 92 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 93 94 static uma_zone_t racct_zone; 95 96 static void racct_sub_racct(struct racct *dest, const struct racct *src); 97 static void racct_sub_cred_locked(struct ucred *cred, int resource, 98 uint64_t amount); 99 static void racct_add_cred_locked(struct ucred *cred, int resource, 100 uint64_t amount); 101 102 SDT_PROVIDER_DEFINE(racct); 103 SDT_PROBE_DEFINE3(racct, , rusage, add, 104 "struct proc *", "int", "uint64_t"); 105 SDT_PROBE_DEFINE3(racct, , rusage, add__failure, 106 "struct proc *", "int", "uint64_t"); 107 SDT_PROBE_DEFINE3(racct, , rusage, add__buf, 108 "struct proc *", "const struct buf *", "int"); 109 SDT_PROBE_DEFINE3(racct, , rusage, add__cred, 110 "struct ucred *", "int", "uint64_t"); 111 SDT_PROBE_DEFINE3(racct, , rusage, add__force, 112 "struct proc *", "int", "uint64_t"); 113 SDT_PROBE_DEFINE3(racct, , rusage, set, 114 "struct proc *", "int", "uint64_t"); 115 SDT_PROBE_DEFINE3(racct, , rusage, set__failure, 116 "struct proc *", "int", "uint64_t"); 117 SDT_PROBE_DEFINE3(racct, , rusage, set__force, 118 "struct proc *", "int", "uint64_t"); 119 SDT_PROBE_DEFINE3(racct, , rusage, sub, 120 "struct proc *", "int", "uint64_t"); 121 SDT_PROBE_DEFINE3(racct, , rusage, sub__cred, 122 "struct ucred *", "int", "uint64_t"); 123 SDT_PROBE_DEFINE1(racct, , racct, create, 124 "struct racct *"); 125 SDT_PROBE_DEFINE1(racct, , racct, destroy, 126 "struct racct *"); 127 SDT_PROBE_DEFINE2(racct, , racct, join, 128 "struct racct *", "struct racct *"); 129 SDT_PROBE_DEFINE2(racct, , racct, join__failure, 130 "struct racct *", "struct racct *"); 131 SDT_PROBE_DEFINE2(racct, , racct, leave, 132 "struct racct *", "struct racct *"); 133 134 int racct_types[] = { 135 [RACCT_CPU] = 136 RACCT_IN_MILLIONS, 137 [RACCT_DATA] = 138 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 139 [RACCT_STACK] = 140 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 141 [RACCT_CORE] = 142 RACCT_DENIABLE, 143 [RACCT_RSS] = 144 RACCT_RECLAIMABLE, 145 [RACCT_MEMLOCK] = 146 RACCT_RECLAIMABLE | RACCT_DENIABLE, 147 [RACCT_NPROC] = 148 RACCT_RECLAIMABLE | RACCT_DENIABLE, 149 [RACCT_NOFILE] = 150 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 151 [RACCT_VMEM] = 152 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 153 [RACCT_NPTS] = 154 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 155 [RACCT_SWAP] = 156 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 157 [RACCT_NTHR] = 158 RACCT_RECLAIMABLE | RACCT_DENIABLE, 159 [RACCT_MSGQQUEUED] = 160 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161 [RACCT_MSGQSIZE] = 162 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163 [RACCT_NMSGQ] = 164 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 165 [RACCT_NSEM] = 166 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 167 [RACCT_NSEMOP] = 168 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 169 [RACCT_NSHM] = 170 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 171 [RACCT_SHMSIZE] = 172 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 173 [RACCT_WALLCLOCK] = 174 RACCT_IN_MILLIONS, 175 [RACCT_PCTCPU] = 176 RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS, 177 [RACCT_READBPS] = 178 RACCT_DECAYING, 179 [RACCT_WRITEBPS] = 180 RACCT_DECAYING, 181 [RACCT_READIOPS] = 182 RACCT_DECAYING, 183 [RACCT_WRITEIOPS] = 184 RACCT_DECAYING }; 185 186 static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 187 188 #ifdef SCHED_4BSD 189 /* 190 * Contains intermediate values for %cpu calculations to avoid using floating 191 * point in the kernel. 192 * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 193 * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 194 * zero so the calculations are more straightforward. 195 */ 196 fixpt_t ccpu_exp[] = { 197 [0] = FSCALE * 1, 198 [1] = FSCALE * 0.95122942450071400909, 199 [2] = FSCALE * 0.90483741803595957316, 200 [3] = FSCALE * 0.86070797642505780722, 201 [4] = FSCALE * 0.81873075307798185866, 202 [5] = FSCALE * 0.77880078307140486824, 203 [6] = FSCALE * 0.74081822068171786606, 204 [7] = FSCALE * 0.70468808971871343435, 205 [8] = FSCALE * 0.67032004603563930074, 206 [9] = FSCALE * 0.63762815162177329314, 207 [10] = FSCALE * 0.60653065971263342360, 208 [11] = FSCALE * 0.57694981038048669531, 209 [12] = FSCALE * 0.54881163609402643262, 210 [13] = FSCALE * 0.52204577676101604789, 211 [14] = FSCALE * 0.49658530379140951470, 212 [15] = FSCALE * 0.47236655274101470713, 213 [16] = FSCALE * 0.44932896411722159143, 214 [17] = FSCALE * 0.42741493194872666992, 215 [18] = FSCALE * 0.40656965974059911188, 216 [19] = FSCALE * 0.38674102345450120691, 217 [20] = FSCALE * 0.36787944117144232159, 218 [21] = FSCALE * 0.34993774911115535467, 219 [22] = FSCALE * 0.33287108369807955328, 220 [23] = FSCALE * 0.31663676937905321821, 221 [24] = FSCALE * 0.30119421191220209664, 222 [25] = FSCALE * 0.28650479686019010032, 223 [26] = FSCALE * 0.27253179303401260312, 224 [27] = FSCALE * 0.25924026064589150757, 225 [28] = FSCALE * 0.24659696394160647693, 226 [29] = FSCALE * 0.23457028809379765313, 227 [30] = FSCALE * 0.22313016014842982893, 228 [31] = FSCALE * 0.21224797382674305771, 229 [32] = FSCALE * 0.20189651799465540848, 230 [33] = FSCALE * 0.19204990862075411423, 231 [34] = FSCALE * 0.18268352405273465022, 232 [35] = FSCALE * 0.17377394345044512668, 233 [36] = FSCALE * 0.16529888822158653829, 234 [37] = FSCALE * 0.15723716631362761621, 235 [38] = FSCALE * 0.14956861922263505264, 236 [39] = FSCALE * 0.14227407158651357185, 237 [40] = FSCALE * 0.13533528323661269189, 238 [41] = FSCALE * 0.12873490358780421886, 239 [42] = FSCALE * 0.12245642825298191021, 240 [43] = FSCALE * 0.11648415777349695786, 241 [44] = FSCALE * 0.11080315836233388333, 242 [45] = FSCALE * 0.10539922456186433678, 243 [46] = FSCALE * 0.10025884372280373372, 244 [47] = FSCALE * 0.09536916221554961888, 245 [48] = FSCALE * 0.09071795328941250337, 246 [49] = FSCALE * 0.08629358649937051097, 247 [50] = FSCALE * 0.08208499862389879516, 248 [51] = FSCALE * 0.07808166600115315231, 249 [52] = FSCALE * 0.07427357821433388042, 250 [53] = FSCALE * 0.07065121306042958674, 251 [54] = FSCALE * 0.06720551273974976512, 252 [55] = FSCALE * 0.06392786120670757270, 253 [56] = FSCALE * 0.06081006262521796499, 254 [57] = FSCALE * 0.05784432087483846296, 255 [58] = FSCALE * 0.05502322005640722902, 256 [59] = FSCALE * 0.05233970594843239308, 257 [60] = FSCALE * 0.04978706836786394297, 258 [61] = FSCALE * 0.04735892439114092119, 259 [62] = FSCALE * 0.04504920239355780606, 260 [63] = FSCALE * 0.04285212686704017991, 261 [64] = FSCALE * 0.04076220397836621516, 262 [65] = FSCALE * 0.03877420783172200988, 263 [66] = FSCALE * 0.03688316740124000544, 264 [67] = FSCALE * 0.03508435410084502588, 265 [68] = FSCALE * 0.03337326996032607948, 266 [69] = FSCALE * 0.03174563637806794323, 267 [70] = FSCALE * 0.03019738342231850073, 268 [71] = FSCALE * 0.02872463965423942912, 269 [72] = FSCALE * 0.02732372244729256080, 270 [73] = FSCALE * 0.02599112877875534358, 271 [74] = FSCALE * 0.02472352647033939120, 272 [75] = FSCALE * 0.02351774585600910823, 273 [76] = FSCALE * 0.02237077185616559577, 274 [77] = FSCALE * 0.02127973643837716938, 275 [78] = FSCALE * 0.02024191144580438847, 276 [79] = FSCALE * 0.01925470177538692429, 277 [80] = FSCALE * 0.01831563888873418029, 278 [81] = FSCALE * 0.01742237463949351138, 279 [82] = FSCALE * 0.01657267540176124754, 280 [83] = FSCALE * 0.01576441648485449082, 281 [84] = FSCALE * 0.01499557682047770621, 282 [85] = FSCALE * 0.01426423390899925527, 283 [86] = FSCALE * 0.01356855901220093175, 284 [87] = FSCALE * 0.01290681258047986886, 285 [88] = FSCALE * 0.01227733990306844117, 286 [89] = FSCALE * 0.01167856697039544521, 287 [90] = FSCALE * 0.01110899653824230649, 288 [91] = FSCALE * 0.01056720438385265337, 289 [92] = FSCALE * 0.01005183574463358164, 290 [93] = FSCALE * 0.00956160193054350793, 291 [94] = FSCALE * 0.00909527710169581709, 292 [95] = FSCALE * 0.00865169520312063417, 293 [96] = FSCALE * 0.00822974704902002884, 294 [97] = FSCALE * 0.00782837754922577143, 295 [98] = FSCALE * 0.00744658307092434051, 296 [99] = FSCALE * 0.00708340892905212004, 297 [100] = FSCALE * 0.00673794699908546709, 298 [101] = FSCALE * 0.00640933344625638184, 299 [102] = FSCALE * 0.00609674656551563610, 300 [103] = FSCALE * 0.00579940472684214321, 301 [104] = FSCALE * 0.00551656442076077241, 302 [105] = FSCALE * 0.00524751839918138427, 303 [106] = FSCALE * 0.00499159390691021621, 304 [107] = FSCALE * 0.00474815099941147558, 305 [108] = FSCALE * 0.00451658094261266798, 306 [109] = FSCALE * 0.00429630469075234057, 307 [110] = FSCALE * 0.00408677143846406699, 308 }; 309 #endif 310 311 #define CCPU_EXP_MAX 110 312 313 /* 314 * This function is analogical to the getpcpu() function in the ps(1) command. 315 * They should both calculate in the same way so that the racct %cpu 316 * calculations are consistent with the values showed by the ps(1) tool. 317 * The calculations are more complex in the 4BSD scheduler because of the value 318 * of the ccpu variable. In ULE it is defined to be zero which saves us some 319 * work. 320 */ 321 static uint64_t 322 racct_getpcpu(struct proc *p, u_int pcpu) 323 { 324 u_int swtime; 325 #ifdef SCHED_4BSD 326 fixpt_t pctcpu, pctcpu_next; 327 #endif 328 #ifdef SMP 329 struct pcpu *pc; 330 int found; 331 #endif 332 fixpt_t p_pctcpu; 333 struct thread *td; 334 335 ASSERT_RACCT_ENABLED(); 336 337 /* 338 * If the process is swapped out, we count its %cpu usage as zero. 339 * This behaviour is consistent with the userland ps(1) tool. 340 */ 341 if ((p->p_flag & P_INMEM) == 0) 342 return (0); 343 swtime = (ticks - p->p_swtick) / hz; 344 345 /* 346 * For short-lived processes, the sched_pctcpu() returns small 347 * values even for cpu intensive processes. Therefore we use 348 * our own estimate in this case. 349 */ 350 if (swtime < RACCT_PCPU_SECS) 351 return (pcpu); 352 353 p_pctcpu = 0; 354 FOREACH_THREAD_IN_PROC(p, td) { 355 if (td == PCPU_GET(idlethread)) 356 continue; 357 #ifdef SMP 358 found = 0; 359 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 360 if (td == pc->pc_idlethread) { 361 found = 1; 362 break; 363 } 364 } 365 if (found) 366 continue; 367 #endif 368 thread_lock(td); 369 #ifdef SCHED_4BSD 370 pctcpu = sched_pctcpu(td); 371 /* Count also the yet unfinished second. */ 372 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 373 pctcpu_next += sched_pctcpu_delta(td); 374 p_pctcpu += max(pctcpu, pctcpu_next); 375 #else 376 /* 377 * In ULE the %cpu statistics are updated on every 378 * sched_pctcpu() call. So special calculations to 379 * account for the latest (unfinished) second are 380 * not needed. 381 */ 382 p_pctcpu += sched_pctcpu(td); 383 #endif 384 thread_unlock(td); 385 } 386 387 #ifdef SCHED_4BSD 388 if (swtime <= CCPU_EXP_MAX) 389 return ((100 * (uint64_t)p_pctcpu * 1000000) / 390 (FSCALE - ccpu_exp[swtime])); 391 #endif 392 393 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 394 } 395 396 static void 397 racct_add_racct(struct racct *dest, const struct racct *src) 398 { 399 int i; 400 401 ASSERT_RACCT_ENABLED(); 402 RACCT_LOCK_ASSERT(); 403 404 /* 405 * Update resource usage in dest. 406 */ 407 for (i = 0; i <= RACCT_MAX; i++) { 408 KASSERT(dest->r_resources[i] >= 0, 409 ("%s: resource %d propagation meltdown: dest < 0", 410 __func__, i)); 411 KASSERT(src->r_resources[i] >= 0, 412 ("%s: resource %d propagation meltdown: src < 0", 413 __func__, i)); 414 dest->r_resources[i] += src->r_resources[i]; 415 } 416 } 417 418 static void 419 racct_sub_racct(struct racct *dest, const struct racct *src) 420 { 421 int i; 422 423 ASSERT_RACCT_ENABLED(); 424 RACCT_LOCK_ASSERT(); 425 426 /* 427 * Update resource usage in dest. 428 */ 429 for (i = 0; i <= RACCT_MAX; i++) { 430 if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 431 KASSERT(dest->r_resources[i] >= 0, 432 ("%s: resource %d propagation meltdown: dest < 0", 433 __func__, i)); 434 KASSERT(src->r_resources[i] >= 0, 435 ("%s: resource %d propagation meltdown: src < 0", 436 __func__, i)); 437 KASSERT(src->r_resources[i] <= dest->r_resources[i], 438 ("%s: resource %d propagation meltdown: src > dest", 439 __func__, i)); 440 } 441 if (RACCT_CAN_DROP(i)) { 442 dest->r_resources[i] -= src->r_resources[i]; 443 if (dest->r_resources[i] < 0) 444 dest->r_resources[i] = 0; 445 } 446 } 447 } 448 449 void 450 racct_create(struct racct **racctp) 451 { 452 453 if (!racct_enable) 454 return; 455 456 SDT_PROBE1(racct, , racct, create, racctp); 457 458 KASSERT(*racctp == NULL, ("racct already allocated")); 459 460 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 461 } 462 463 static void 464 racct_destroy_locked(struct racct **racctp) 465 { 466 struct racct *racct; 467 int i; 468 469 ASSERT_RACCT_ENABLED(); 470 471 SDT_PROBE1(racct, , racct, destroy, racctp); 472 473 RACCT_LOCK_ASSERT(); 474 KASSERT(racctp != NULL, ("NULL racctp")); 475 KASSERT(*racctp != NULL, ("NULL racct")); 476 477 racct = *racctp; 478 479 for (i = 0; i <= RACCT_MAX; i++) { 480 if (RACCT_IS_SLOPPY(i)) 481 continue; 482 if (!RACCT_IS_RECLAIMABLE(i)) 483 continue; 484 KASSERT(racct->r_resources[i] == 0, 485 ("destroying non-empty racct: " 486 "%ju allocated for resource %d\n", 487 racct->r_resources[i], i)); 488 } 489 uma_zfree(racct_zone, racct); 490 *racctp = NULL; 491 } 492 493 void 494 racct_destroy(struct racct **racct) 495 { 496 497 if (!racct_enable) 498 return; 499 500 RACCT_LOCK(); 501 racct_destroy_locked(racct); 502 RACCT_UNLOCK(); 503 } 504 505 /* 506 * Increase consumption of 'resource' by 'amount' for 'racct', 507 * but not its parents. Differently from other cases, 'amount' here 508 * may be less than zero. 509 */ 510 static void 511 racct_adjust_resource(struct racct *racct, int resource, 512 int64_t amount) 513 { 514 515 ASSERT_RACCT_ENABLED(); 516 RACCT_LOCK_ASSERT(); 517 KASSERT(racct != NULL, ("NULL racct")); 518 519 racct->r_resources[resource] += amount; 520 if (racct->r_resources[resource] < 0) { 521 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 522 ("%s: resource %d usage < 0", __func__, resource)); 523 racct->r_resources[resource] = 0; 524 } 525 526 /* 527 * There are some cases where the racct %cpu resource would grow 528 * beyond 100% per core. For example in racct_proc_exit() we add 529 * the process %cpu usage to the ucred racct containers. If too 530 * many processes terminated in a short time span, the ucred %cpu 531 * resource could grow too much. Also, the 4BSD scheduler sometimes 532 * returns for a thread more than 100% cpu usage. So we set a sane 533 * boundary here to 100% * the maximum number of CPUs. 534 */ 535 if ((resource == RACCT_PCTCPU) && 536 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU)) 537 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU; 538 } 539 540 static int 541 racct_add_locked(struct proc *p, int resource, uint64_t amount, int force) 542 { 543 #ifdef RCTL 544 int error; 545 #endif 546 547 ASSERT_RACCT_ENABLED(); 548 549 /* 550 * We need proc lock to dereference p->p_ucred. 551 */ 552 PROC_LOCK_ASSERT(p, MA_OWNED); 553 554 #ifdef RCTL 555 error = rctl_enforce(p, resource, amount); 556 if (error && !force && RACCT_IS_DENIABLE(resource)) { 557 SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount); 558 return (error); 559 } 560 #endif 561 racct_adjust_resource(p->p_racct, resource, amount); 562 racct_add_cred_locked(p->p_ucred, resource, amount); 563 564 return (0); 565 } 566 567 /* 568 * Increase allocation of 'resource' by 'amount' for process 'p'. 569 * Return 0 if it's below limits, or errno, if it's not. 570 */ 571 int 572 racct_add(struct proc *p, int resource, uint64_t amount) 573 { 574 int error; 575 576 if (!racct_enable) 577 return (0); 578 579 SDT_PROBE3(racct, , rusage, add, p, resource, amount); 580 581 RACCT_LOCK(); 582 error = racct_add_locked(p, resource, amount, 0); 583 RACCT_UNLOCK(); 584 return (error); 585 } 586 587 /* 588 * Increase allocation of 'resource' by 'amount' for process 'p'. 589 * Doesn't check for limits and never fails. 590 */ 591 void 592 racct_add_force(struct proc *p, int resource, uint64_t amount) 593 { 594 595 if (!racct_enable) 596 return; 597 598 SDT_PROBE3(racct, , rusage, add__force, p, resource, amount); 599 600 RACCT_LOCK(); 601 racct_add_locked(p, resource, amount, 1); 602 RACCT_UNLOCK(); 603 } 604 605 static void 606 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 607 { 608 struct prison *pr; 609 610 ASSERT_RACCT_ENABLED(); 611 612 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 613 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 614 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 615 amount); 616 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount); 617 } 618 619 /* 620 * Increase allocation of 'resource' by 'amount' for credential 'cred'. 621 * Doesn't check for limits and never fails. 622 */ 623 void 624 racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 625 { 626 627 if (!racct_enable) 628 return; 629 630 SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount); 631 632 RACCT_LOCK(); 633 racct_add_cred_locked(cred, resource, amount); 634 RACCT_UNLOCK(); 635 } 636 637 /* 638 * Account for disk IO resource consumption. Checks for limits, 639 * but never fails, due to disk limits being undeniable. 640 */ 641 void 642 racct_add_buf(struct proc *p, const struct buf *bp, int is_write) 643 { 644 645 ASSERT_RACCT_ENABLED(); 646 PROC_LOCK_ASSERT(p, MA_OWNED); 647 648 SDT_PROBE3(racct, , rusage, add__buf, p, bp, is_write); 649 650 RACCT_LOCK(); 651 if (is_write) { 652 racct_add_locked(curproc, RACCT_WRITEBPS, bp->b_bcount, 1); 653 racct_add_locked(curproc, RACCT_WRITEIOPS, 1, 1); 654 } else { 655 racct_add_locked(curproc, RACCT_READBPS, bp->b_bcount, 1); 656 racct_add_locked(curproc, RACCT_READIOPS, 1, 1); 657 } 658 RACCT_UNLOCK(); 659 } 660 661 static int 662 racct_set_locked(struct proc *p, int resource, uint64_t amount, int force) 663 { 664 int64_t old_amount, decayed_amount, diff_proc, diff_cred; 665 #ifdef RCTL 666 int error; 667 #endif 668 669 ASSERT_RACCT_ENABLED(); 670 671 /* 672 * We need proc lock to dereference p->p_ucred. 673 */ 674 PROC_LOCK_ASSERT(p, MA_OWNED); 675 676 old_amount = p->p_racct->r_resources[resource]; 677 /* 678 * The diffs may be negative. 679 */ 680 diff_proc = amount - old_amount; 681 if (resource == RACCT_PCTCPU) { 682 /* 683 * Resources in per-credential racct containers may decay. 684 * If this is the case, we need to calculate the difference 685 * between the new amount and the proportional value of the 686 * old amount that has decayed in the ucred racct containers. 687 */ 688 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 689 diff_cred = amount - decayed_amount; 690 } else 691 diff_cred = diff_proc; 692 #ifdef notyet 693 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 694 ("%s: usage of non-droppable resource %d dropping", __func__, 695 resource)); 696 #endif 697 #ifdef RCTL 698 if (diff_proc > 0) { 699 error = rctl_enforce(p, resource, diff_proc); 700 if (error && !force && RACCT_IS_DENIABLE(resource)) { 701 SDT_PROBE3(racct, , rusage, set__failure, p, resource, 702 amount); 703 return (error); 704 } 705 } 706 #endif 707 racct_adjust_resource(p->p_racct, resource, diff_proc); 708 if (diff_cred > 0) 709 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 710 else if (diff_cred < 0) 711 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 712 713 return (0); 714 } 715 716 /* 717 * Set allocation of 'resource' to 'amount' for process 'p'. 718 * Return 0 if it's below limits, or errno, if it's not. 719 * 720 * Note that decreasing the allocation always returns 0, 721 * even if it's above the limit. 722 */ 723 int 724 racct_set_unlocked(struct proc *p, int resource, uint64_t amount) 725 { 726 int error; 727 728 ASSERT_RACCT_ENABLED(); 729 PROC_LOCK(p); 730 error = racct_set(p, resource, amount); 731 PROC_UNLOCK(p); 732 return (error); 733 } 734 735 int 736 racct_set(struct proc *p, int resource, uint64_t amount) 737 { 738 int error; 739 740 if (!racct_enable) 741 return (0); 742 743 SDT_PROBE3(racct, , rusage, set__force, p, resource, amount); 744 745 RACCT_LOCK(); 746 error = racct_set_locked(p, resource, amount, 0); 747 RACCT_UNLOCK(); 748 return (error); 749 } 750 751 void 752 racct_set_force(struct proc *p, int resource, uint64_t amount) 753 { 754 755 if (!racct_enable) 756 return; 757 758 SDT_PROBE3(racct, , rusage, set, p, resource, amount); 759 760 RACCT_LOCK(); 761 racct_set_locked(p, resource, amount, 1); 762 RACCT_UNLOCK(); 763 } 764 765 /* 766 * Returns amount of 'resource' the process 'p' can keep allocated. 767 * Allocating more than that would be denied, unless the resource 768 * is marked undeniable. Amount of already allocated resource does 769 * not matter. 770 */ 771 uint64_t 772 racct_get_limit(struct proc *p, int resource) 773 { 774 #ifdef RCTL 775 uint64_t available; 776 777 if (!racct_enable) 778 return (UINT64_MAX); 779 780 RACCT_LOCK(); 781 available = rctl_get_limit(p, resource); 782 RACCT_UNLOCK(); 783 784 return (available); 785 #else 786 787 return (UINT64_MAX); 788 #endif 789 } 790 791 /* 792 * Returns amount of 'resource' the process 'p' can keep allocated. 793 * Allocating more than that would be denied, unless the resource 794 * is marked undeniable. Amount of already allocated resource does 795 * matter. 796 */ 797 uint64_t 798 racct_get_available(struct proc *p, int resource) 799 { 800 #ifdef RCTL 801 uint64_t available; 802 803 if (!racct_enable) 804 return (UINT64_MAX); 805 806 RACCT_LOCK(); 807 available = rctl_get_available(p, resource); 808 RACCT_UNLOCK(); 809 810 return (available); 811 #else 812 813 return (UINT64_MAX); 814 #endif 815 } 816 817 /* 818 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 819 * utilization. Adding more than that would lead to the process being 820 * throttled. 821 */ 822 static int64_t 823 racct_pcpu_available(struct proc *p) 824 { 825 #ifdef RCTL 826 uint64_t available; 827 828 ASSERT_RACCT_ENABLED(); 829 830 RACCT_LOCK(); 831 available = rctl_pcpu_available(p); 832 RACCT_UNLOCK(); 833 834 return (available); 835 #else 836 837 return (INT64_MAX); 838 #endif 839 } 840 841 /* 842 * Decrease allocation of 'resource' by 'amount' for process 'p'. 843 */ 844 void 845 racct_sub(struct proc *p, int resource, uint64_t amount) 846 { 847 848 if (!racct_enable) 849 return; 850 851 SDT_PROBE3(racct, , rusage, sub, p, resource, amount); 852 853 /* 854 * We need proc lock to dereference p->p_ucred. 855 */ 856 PROC_LOCK_ASSERT(p, MA_OWNED); 857 KASSERT(RACCT_CAN_DROP(resource), 858 ("%s: called for non-droppable resource %d", __func__, resource)); 859 860 RACCT_LOCK(); 861 KASSERT(amount <= p->p_racct->r_resources[resource], 862 ("%s: freeing %ju of resource %d, which is more " 863 "than allocated %jd for %s (pid %d)", __func__, amount, resource, 864 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 865 866 racct_adjust_resource(p->p_racct, resource, -amount); 867 racct_sub_cred_locked(p->p_ucred, resource, amount); 868 RACCT_UNLOCK(); 869 } 870 871 static void 872 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 873 { 874 struct prison *pr; 875 876 ASSERT_RACCT_ENABLED(); 877 878 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 879 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 880 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 881 -amount); 882 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount); 883 } 884 885 /* 886 * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 887 */ 888 void 889 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 890 { 891 892 if (!racct_enable) 893 return; 894 895 SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount); 896 897 #ifdef notyet 898 KASSERT(RACCT_CAN_DROP(resource), 899 ("%s: called for resource %d which can not drop", __func__, 900 resource)); 901 #endif 902 903 RACCT_LOCK(); 904 racct_sub_cred_locked(cred, resource, amount); 905 RACCT_UNLOCK(); 906 } 907 908 /* 909 * Inherit resource usage information from the parent process. 910 */ 911 int 912 racct_proc_fork(struct proc *parent, struct proc *child) 913 { 914 int i, error = 0; 915 916 if (!racct_enable) 917 return (0); 918 919 /* 920 * Create racct for the child process. 921 */ 922 racct_create(&child->p_racct); 923 924 PROC_LOCK(parent); 925 PROC_LOCK(child); 926 RACCT_LOCK(); 927 928 #ifdef RCTL 929 error = rctl_proc_fork(parent, child); 930 if (error != 0) 931 goto out; 932 #endif 933 934 /* Init process cpu time. */ 935 child->p_prev_runtime = 0; 936 child->p_throttled = 0; 937 938 /* 939 * Inherit resource usage. 940 */ 941 for (i = 0; i <= RACCT_MAX; i++) { 942 if (parent->p_racct->r_resources[i] == 0 || 943 !RACCT_IS_INHERITABLE(i)) 944 continue; 945 946 error = racct_set_locked(child, i, 947 parent->p_racct->r_resources[i], 0); 948 if (error != 0) 949 goto out; 950 } 951 952 error = racct_add_locked(child, RACCT_NPROC, 1, 0); 953 error += racct_add_locked(child, RACCT_NTHR, 1, 0); 954 955 out: 956 RACCT_UNLOCK(); 957 PROC_UNLOCK(child); 958 PROC_UNLOCK(parent); 959 960 if (error != 0) 961 racct_proc_exit(child); 962 963 return (error); 964 } 965 966 /* 967 * Called at the end of fork1(), to handle rules that require the process 968 * to be fully initialized. 969 */ 970 void 971 racct_proc_fork_done(struct proc *child) 972 { 973 974 if (!racct_enable) 975 return; 976 977 #ifdef RCTL 978 PROC_LOCK(child); 979 RACCT_LOCK(); 980 rctl_enforce(child, RACCT_NPROC, 0); 981 rctl_enforce(child, RACCT_NTHR, 0); 982 RACCT_UNLOCK(); 983 PROC_UNLOCK(child); 984 #endif 985 } 986 987 void 988 racct_proc_exit(struct proc *p) 989 { 990 struct timeval wallclock; 991 uint64_t pct_estimate, pct, runtime; 992 int i; 993 994 if (!racct_enable) 995 return; 996 997 PROC_LOCK(p); 998 /* 999 * We don't need to calculate rux, proc_reap() has already done this. 1000 */ 1001 runtime = cputick2usec(p->p_rux.rux_runtime); 1002 #ifdef notyet 1003 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 1004 #else 1005 if (runtime < p->p_prev_runtime) 1006 runtime = p->p_prev_runtime; 1007 #endif 1008 microuptime(&wallclock); 1009 timevalsub(&wallclock, &p->p_stats->p_start); 1010 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1011 pct_estimate = (1000000 * runtime * 100) / 1012 ((uint64_t)wallclock.tv_sec * 1000000 + 1013 wallclock.tv_usec); 1014 } else 1015 pct_estimate = 0; 1016 pct = racct_getpcpu(p, pct_estimate); 1017 1018 RACCT_LOCK(); 1019 racct_set_locked(p, RACCT_CPU, runtime, 0); 1020 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 1021 1022 KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0, 1023 ("process reaped with %ju allocated for RSS\n", 1024 p->p_racct->r_resources[RACCT_RSS])); 1025 for (i = 0; i <= RACCT_MAX; i++) { 1026 if (p->p_racct->r_resources[i] == 0) 1027 continue; 1028 if (!RACCT_IS_RECLAIMABLE(i)) 1029 continue; 1030 racct_set_locked(p, i, 0, 0); 1031 } 1032 1033 #ifdef RCTL 1034 rctl_racct_release(p->p_racct); 1035 #endif 1036 racct_destroy_locked(&p->p_racct); 1037 RACCT_UNLOCK(); 1038 PROC_UNLOCK(p); 1039 } 1040 1041 /* 1042 * Called after credentials change, to move resource utilisation 1043 * between raccts. 1044 */ 1045 void 1046 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 1047 struct ucred *newcred) 1048 { 1049 struct uidinfo *olduip, *newuip; 1050 struct loginclass *oldlc, *newlc; 1051 struct prison *oldpr, *newpr, *pr; 1052 1053 if (!racct_enable) 1054 return; 1055 1056 PROC_LOCK_ASSERT(p, MA_OWNED); 1057 1058 newuip = newcred->cr_ruidinfo; 1059 olduip = oldcred->cr_ruidinfo; 1060 newlc = newcred->cr_loginclass; 1061 oldlc = oldcred->cr_loginclass; 1062 newpr = newcred->cr_prison; 1063 oldpr = oldcred->cr_prison; 1064 1065 RACCT_LOCK(); 1066 if (newuip != olduip) { 1067 racct_sub_racct(olduip->ui_racct, p->p_racct); 1068 racct_add_racct(newuip->ui_racct, p->p_racct); 1069 } 1070 if (newlc != oldlc) { 1071 racct_sub_racct(oldlc->lc_racct, p->p_racct); 1072 racct_add_racct(newlc->lc_racct, p->p_racct); 1073 } 1074 if (newpr != oldpr) { 1075 for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 1076 racct_sub_racct(pr->pr_prison_racct->prr_racct, 1077 p->p_racct); 1078 for (pr = newpr; pr != NULL; pr = pr->pr_parent) 1079 racct_add_racct(pr->pr_prison_racct->prr_racct, 1080 p->p_racct); 1081 } 1082 RACCT_UNLOCK(); 1083 } 1084 1085 void 1086 racct_move(struct racct *dest, struct racct *src) 1087 { 1088 1089 ASSERT_RACCT_ENABLED(); 1090 1091 RACCT_LOCK(); 1092 racct_add_racct(dest, src); 1093 racct_sub_racct(src, src); 1094 RACCT_UNLOCK(); 1095 } 1096 1097 static void 1098 ast_racct(struct thread *td, int tda __unused) 1099 { 1100 struct proc *p; 1101 1102 ASSERT_RACCT_ENABLED(); 1103 1104 p = td->td_proc; 1105 if (p->p_throttled == 0) 1106 return; 1107 1108 PROC_LOCK(p); 1109 while (p->p_throttled != 0) { 1110 msleep(p->p_racct, &p->p_mtx, 0, "racct", 1111 p->p_throttled < 0 ? 0 : p->p_throttled); 1112 if (p->p_throttled > 0) 1113 p->p_throttled = 0; 1114 } 1115 PROC_UNLOCK(p); 1116 } 1117 1118 /* 1119 * Make the process sleep in userret() for 'timeout' ticks. Setting 1120 * timeout to -1 makes it sleep until woken up by racct_proc_wakeup(). 1121 */ 1122 void 1123 racct_proc_throttle(struct proc *p, int timeout) 1124 { 1125 struct thread *td; 1126 #ifdef SMP 1127 int cpuid; 1128 #endif 1129 1130 KASSERT(timeout != 0, ("timeout %d", timeout)); 1131 ASSERT_RACCT_ENABLED(); 1132 PROC_LOCK_ASSERT(p, MA_OWNED); 1133 1134 /* 1135 * Do not block kernel processes. Also do not block processes with 1136 * low %cpu utilization to improve interactivity. 1137 */ 1138 if ((p->p_flag & (P_SYSTEM | P_KPROC)) != 0) 1139 return; 1140 1141 if (p->p_throttled < 0 || (timeout > 0 && p->p_throttled > timeout)) 1142 return; 1143 1144 p->p_throttled = timeout; 1145 1146 FOREACH_THREAD_IN_PROC(p, td) { 1147 thread_lock(td); 1148 ast_sched_locked(td, TDA_RACCT); 1149 1150 switch (TD_GET_STATE(td)) { 1151 case TDS_RUNQ: 1152 /* 1153 * If the thread is on the scheduler run-queue, we can 1154 * not just remove it from there. So we set the flag 1155 * TDA_SCHED for the thread, so that once it is 1156 * running, it is taken off the cpu as soon as possible. 1157 */ 1158 ast_sched_locked(td, TDA_SCHED); 1159 break; 1160 case TDS_RUNNING: 1161 /* 1162 * If the thread is running, we request a context 1163 * switch for it by setting the TDA_SCHED flag. 1164 */ 1165 ast_sched_locked(td, TDA_SCHED); 1166 #ifdef SMP 1167 cpuid = td->td_oncpu; 1168 if ((cpuid != NOCPU) && (td != curthread)) 1169 ipi_cpu(cpuid, IPI_AST); 1170 #endif 1171 break; 1172 default: 1173 break; 1174 } 1175 thread_unlock(td); 1176 } 1177 } 1178 1179 static void 1180 racct_proc_wakeup(struct proc *p) 1181 { 1182 1183 ASSERT_RACCT_ENABLED(); 1184 1185 PROC_LOCK_ASSERT(p, MA_OWNED); 1186 1187 if (p->p_throttled != 0) { 1188 p->p_throttled = 0; 1189 wakeup(p->p_racct); 1190 } 1191 } 1192 1193 static void 1194 racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2) 1195 { 1196 int64_t r_old, r_new; 1197 1198 ASSERT_RACCT_ENABLED(); 1199 RACCT_LOCK_ASSERT(); 1200 1201 #ifdef RCTL 1202 rctl_throttle_decay(racct, RACCT_READBPS); 1203 rctl_throttle_decay(racct, RACCT_WRITEBPS); 1204 rctl_throttle_decay(racct, RACCT_READIOPS); 1205 rctl_throttle_decay(racct, RACCT_WRITEIOPS); 1206 #endif 1207 1208 r_old = racct->r_resources[RACCT_PCTCPU]; 1209 1210 /* If there is nothing to decay, just exit. */ 1211 if (r_old <= 0) 1212 return; 1213 1214 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1215 racct->r_resources[RACCT_PCTCPU] = r_new; 1216 } 1217 1218 static void 1219 racct_decay_pre(void) 1220 { 1221 1222 RACCT_LOCK(); 1223 } 1224 1225 static void 1226 racct_decay_post(void) 1227 { 1228 1229 RACCT_UNLOCK(); 1230 } 1231 1232 static void 1233 racct_decay(void) 1234 { 1235 1236 ASSERT_RACCT_ENABLED(); 1237 1238 ui_racct_foreach(racct_decay_callback, racct_decay_pre, 1239 racct_decay_post, NULL, NULL); 1240 loginclass_racct_foreach(racct_decay_callback, racct_decay_pre, 1241 racct_decay_post, NULL, NULL); 1242 prison_racct_foreach(racct_decay_callback, racct_decay_pre, 1243 racct_decay_post, NULL, NULL); 1244 } 1245 1246 static void 1247 racctd(void) 1248 { 1249 struct thread *td; 1250 struct proc *p; 1251 struct timeval wallclock; 1252 uint64_t pct, pct_estimate, runtime; 1253 1254 ASSERT_RACCT_ENABLED(); 1255 1256 for (;;) { 1257 racct_decay(); 1258 1259 sx_slock(&allproc_lock); 1260 1261 FOREACH_PROC_IN_SYSTEM(p) { 1262 PROC_LOCK(p); 1263 if (p->p_state != PRS_NORMAL) { 1264 if (p->p_state == PRS_ZOMBIE) 1265 racct_set(p, RACCT_PCTCPU, 0); 1266 PROC_UNLOCK(p); 1267 continue; 1268 } 1269 1270 microuptime(&wallclock); 1271 timevalsub(&wallclock, &p->p_stats->p_start); 1272 PROC_STATLOCK(p); 1273 FOREACH_THREAD_IN_PROC(p, td) 1274 ruxagg(p, td); 1275 runtime = cputick2usec(p->p_rux.rux_runtime); 1276 PROC_STATUNLOCK(p); 1277 #ifdef notyet 1278 KASSERT(runtime >= p->p_prev_runtime, 1279 ("runtime < p_prev_runtime")); 1280 #else 1281 if (runtime < p->p_prev_runtime) 1282 runtime = p->p_prev_runtime; 1283 #endif 1284 p->p_prev_runtime = runtime; 1285 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1286 pct_estimate = (1000000 * runtime * 100) / 1287 ((uint64_t)wallclock.tv_sec * 1000000 + 1288 wallclock.tv_usec); 1289 } else 1290 pct_estimate = 0; 1291 pct = racct_getpcpu(p, pct_estimate); 1292 RACCT_LOCK(); 1293 #ifdef RCTL 1294 rctl_throttle_decay(p->p_racct, RACCT_READBPS); 1295 rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS); 1296 rctl_throttle_decay(p->p_racct, RACCT_READIOPS); 1297 rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); 1298 #endif 1299 racct_set_locked(p, RACCT_PCTCPU, pct, 1); 1300 racct_set_locked(p, RACCT_CPU, runtime, 0); 1301 racct_set_locked(p, RACCT_WALLCLOCK, 1302 (uint64_t)wallclock.tv_sec * 1000000 + 1303 wallclock.tv_usec, 0); 1304 RACCT_UNLOCK(); 1305 PROC_UNLOCK(p); 1306 } 1307 1308 /* 1309 * To ensure that processes are throttled in a fair way, we need 1310 * to iterate over all processes again and check the limits 1311 * for %cpu resource only after ucred racct containers have been 1312 * properly filled. 1313 */ 1314 FOREACH_PROC_IN_SYSTEM(p) { 1315 PROC_LOCK(p); 1316 if (p->p_state != PRS_NORMAL) { 1317 PROC_UNLOCK(p); 1318 continue; 1319 } 1320 1321 if (racct_pcpu_available(p) <= 0) { 1322 if (p->p_racct->r_resources[RACCT_PCTCPU] > 1323 pcpu_threshold) 1324 racct_proc_throttle(p, -1); 1325 } else if (p->p_throttled == -1) { 1326 racct_proc_wakeup(p); 1327 } 1328 PROC_UNLOCK(p); 1329 } 1330 sx_sunlock(&allproc_lock); 1331 pause("-", hz); 1332 } 1333 } 1334 1335 static struct kproc_desc racctd_kp = { 1336 "racctd", 1337 racctd, 1338 NULL 1339 }; 1340 1341 static void 1342 racctd_init(void) 1343 { 1344 if (!racct_enable) 1345 return; 1346 1347 kproc_start(&racctd_kp); 1348 } 1349 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); 1350 1351 static void 1352 racct_init(void) 1353 { 1354 if (!racct_enable) 1355 return; 1356 1357 racct_zone = uma_zcreate("racct", sizeof(struct racct), 1358 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1359 ast_register(TDA_RACCT, ASTR_ASTF_REQUIRED, 0, ast_racct); 1360 1361 /* 1362 * XXX: Move this somewhere. 1363 */ 1364 prison0.pr_prison_racct = prison_racct_find("0"); 1365 } 1366 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1367 1368 #endif /* !RACCT */ 1369