1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_sched.h" 33 34 #include <sys/param.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/eventhandler.h> 38 #include <sys/jail.h> 39 #include <sys/kernel.h> 40 #include <sys/kthread.h> 41 #include <sys/lock.h> 42 #include <sys/loginclass.h> 43 #include <sys/malloc.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/racct.h> 47 #include <sys/resourcevar.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/sdt.h> 51 #include <sys/smp.h> 52 #include <sys/sx.h> 53 #include <sys/sysctl.h> 54 #include <sys/sysproto.h> 55 #include <sys/umtxvar.h> 56 #include <machine/smp.h> 57 58 #ifdef RCTL 59 #include <sys/rctl.h> 60 #endif 61 62 FEATURE(racct, "Resource Accounting"); 63 64 /* 65 * Do not block processes that have their %cpu usage <= pcpu_threshold. 66 */ 67 static int pcpu_threshold = 1; 68 #ifdef RACCT_DEFAULT_TO_DISABLED 69 bool __read_frequently racct_enable = false; 70 #else 71 bool __read_frequently racct_enable = true; 72 #endif 73 74 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "Resource Accounting"); 76 SYSCTL_BOOL(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, 77 0, "Enable RACCT/RCTL"); 78 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 79 0, "Processes with higher %cpu usage than this value can be throttled."); 80 81 /* 82 * How many seconds it takes to use the scheduler %cpu calculations. When a 83 * process starts, we compute its %cpu usage by dividing its runtime by the 84 * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 85 * provided by the scheduler. 86 */ 87 #define RACCT_PCPU_SECS 3 88 89 struct mtx racct_lock; 90 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 91 92 static uma_zone_t racct_zone; 93 94 static void racct_sub_racct(struct racct *dest, const struct racct *src); 95 static void racct_sub_cred_locked(struct ucred *cred, int resource, 96 uint64_t amount); 97 static void racct_add_cred_locked(struct ucred *cred, int resource, 98 uint64_t amount); 99 100 SDT_PROVIDER_DEFINE(racct); 101 SDT_PROBE_DEFINE3(racct, , rusage, add, 102 "struct proc *", "int", "uint64_t"); 103 SDT_PROBE_DEFINE3(racct, , rusage, add__failure, 104 "struct proc *", "int", "uint64_t"); 105 SDT_PROBE_DEFINE3(racct, , rusage, add__buf, 106 "struct proc *", "const struct buf *", "int"); 107 SDT_PROBE_DEFINE3(racct, , rusage, add__cred, 108 "struct ucred *", "int", "uint64_t"); 109 SDT_PROBE_DEFINE3(racct, , rusage, add__force, 110 "struct proc *", "int", "uint64_t"); 111 SDT_PROBE_DEFINE3(racct, , rusage, set, 112 "struct proc *", "int", "uint64_t"); 113 SDT_PROBE_DEFINE3(racct, , rusage, set__failure, 114 "struct proc *", "int", "uint64_t"); 115 SDT_PROBE_DEFINE3(racct, , rusage, set__force, 116 "struct proc *", "int", "uint64_t"); 117 SDT_PROBE_DEFINE3(racct, , rusage, sub, 118 "struct proc *", "int", "uint64_t"); 119 SDT_PROBE_DEFINE3(racct, , rusage, sub__cred, 120 "struct ucred *", "int", "uint64_t"); 121 SDT_PROBE_DEFINE1(racct, , racct, create, 122 "struct racct *"); 123 SDT_PROBE_DEFINE1(racct, , racct, destroy, 124 "struct racct *"); 125 SDT_PROBE_DEFINE2(racct, , racct, join, 126 "struct racct *", "struct racct *"); 127 SDT_PROBE_DEFINE2(racct, , racct, join__failure, 128 "struct racct *", "struct racct *"); 129 SDT_PROBE_DEFINE2(racct, , racct, leave, 130 "struct racct *", "struct racct *"); 131 132 int racct_types[] = { 133 [RACCT_CPU] = 134 RACCT_IN_MILLIONS, 135 [RACCT_DATA] = 136 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 137 [RACCT_STACK] = 138 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 139 [RACCT_CORE] = 140 RACCT_DENIABLE, 141 [RACCT_RSS] = 142 RACCT_RECLAIMABLE, 143 [RACCT_MEMLOCK] = 144 RACCT_RECLAIMABLE | RACCT_DENIABLE, 145 [RACCT_NPROC] = 146 RACCT_RECLAIMABLE | RACCT_DENIABLE, 147 [RACCT_NOFILE] = 148 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 149 [RACCT_VMEM] = 150 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 151 [RACCT_NPTS] = 152 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 153 [RACCT_SWAP] = 154 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 155 [RACCT_NTHR] = 156 RACCT_RECLAIMABLE | RACCT_DENIABLE, 157 [RACCT_MSGQQUEUED] = 158 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 159 [RACCT_MSGQSIZE] = 160 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161 [RACCT_NMSGQ] = 162 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163 [RACCT_NSEM] = 164 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 165 [RACCT_NSEMOP] = 166 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 167 [RACCT_NSHM] = 168 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 169 [RACCT_SHMSIZE] = 170 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 171 [RACCT_WALLCLOCK] = 172 RACCT_IN_MILLIONS, 173 [RACCT_PCTCPU] = 174 RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS, 175 [RACCT_READBPS] = 176 RACCT_DECAYING, 177 [RACCT_WRITEBPS] = 178 RACCT_DECAYING, 179 [RACCT_READIOPS] = 180 RACCT_DECAYING, 181 [RACCT_WRITEIOPS] = 182 RACCT_DECAYING }; 183 184 static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 185 186 #ifdef SCHED_4BSD 187 /* 188 * Contains intermediate values for %cpu calculations to avoid using floating 189 * point in the kernel. 190 * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 191 * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 192 * zero so the calculations are more straightforward. 193 */ 194 fixpt_t ccpu_exp[] = { 195 [0] = FSCALE * 1, 196 [1] = FSCALE * 0.95122942450071400909, 197 [2] = FSCALE * 0.90483741803595957316, 198 [3] = FSCALE * 0.86070797642505780722, 199 [4] = FSCALE * 0.81873075307798185866, 200 [5] = FSCALE * 0.77880078307140486824, 201 [6] = FSCALE * 0.74081822068171786606, 202 [7] = FSCALE * 0.70468808971871343435, 203 [8] = FSCALE * 0.67032004603563930074, 204 [9] = FSCALE * 0.63762815162177329314, 205 [10] = FSCALE * 0.60653065971263342360, 206 [11] = FSCALE * 0.57694981038048669531, 207 [12] = FSCALE * 0.54881163609402643262, 208 [13] = FSCALE * 0.52204577676101604789, 209 [14] = FSCALE * 0.49658530379140951470, 210 [15] = FSCALE * 0.47236655274101470713, 211 [16] = FSCALE * 0.44932896411722159143, 212 [17] = FSCALE * 0.42741493194872666992, 213 [18] = FSCALE * 0.40656965974059911188, 214 [19] = FSCALE * 0.38674102345450120691, 215 [20] = FSCALE * 0.36787944117144232159, 216 [21] = FSCALE * 0.34993774911115535467, 217 [22] = FSCALE * 0.33287108369807955328, 218 [23] = FSCALE * 0.31663676937905321821, 219 [24] = FSCALE * 0.30119421191220209664, 220 [25] = FSCALE * 0.28650479686019010032, 221 [26] = FSCALE * 0.27253179303401260312, 222 [27] = FSCALE * 0.25924026064589150757, 223 [28] = FSCALE * 0.24659696394160647693, 224 [29] = FSCALE * 0.23457028809379765313, 225 [30] = FSCALE * 0.22313016014842982893, 226 [31] = FSCALE * 0.21224797382674305771, 227 [32] = FSCALE * 0.20189651799465540848, 228 [33] = FSCALE * 0.19204990862075411423, 229 [34] = FSCALE * 0.18268352405273465022, 230 [35] = FSCALE * 0.17377394345044512668, 231 [36] = FSCALE * 0.16529888822158653829, 232 [37] = FSCALE * 0.15723716631362761621, 233 [38] = FSCALE * 0.14956861922263505264, 234 [39] = FSCALE * 0.14227407158651357185, 235 [40] = FSCALE * 0.13533528323661269189, 236 [41] = FSCALE * 0.12873490358780421886, 237 [42] = FSCALE * 0.12245642825298191021, 238 [43] = FSCALE * 0.11648415777349695786, 239 [44] = FSCALE * 0.11080315836233388333, 240 [45] = FSCALE * 0.10539922456186433678, 241 [46] = FSCALE * 0.10025884372280373372, 242 [47] = FSCALE * 0.09536916221554961888, 243 [48] = FSCALE * 0.09071795328941250337, 244 [49] = FSCALE * 0.08629358649937051097, 245 [50] = FSCALE * 0.08208499862389879516, 246 [51] = FSCALE * 0.07808166600115315231, 247 [52] = FSCALE * 0.07427357821433388042, 248 [53] = FSCALE * 0.07065121306042958674, 249 [54] = FSCALE * 0.06720551273974976512, 250 [55] = FSCALE * 0.06392786120670757270, 251 [56] = FSCALE * 0.06081006262521796499, 252 [57] = FSCALE * 0.05784432087483846296, 253 [58] = FSCALE * 0.05502322005640722902, 254 [59] = FSCALE * 0.05233970594843239308, 255 [60] = FSCALE * 0.04978706836786394297, 256 [61] = FSCALE * 0.04735892439114092119, 257 [62] = FSCALE * 0.04504920239355780606, 258 [63] = FSCALE * 0.04285212686704017991, 259 [64] = FSCALE * 0.04076220397836621516, 260 [65] = FSCALE * 0.03877420783172200988, 261 [66] = FSCALE * 0.03688316740124000544, 262 [67] = FSCALE * 0.03508435410084502588, 263 [68] = FSCALE * 0.03337326996032607948, 264 [69] = FSCALE * 0.03174563637806794323, 265 [70] = FSCALE * 0.03019738342231850073, 266 [71] = FSCALE * 0.02872463965423942912, 267 [72] = FSCALE * 0.02732372244729256080, 268 [73] = FSCALE * 0.02599112877875534358, 269 [74] = FSCALE * 0.02472352647033939120, 270 [75] = FSCALE * 0.02351774585600910823, 271 [76] = FSCALE * 0.02237077185616559577, 272 [77] = FSCALE * 0.02127973643837716938, 273 [78] = FSCALE * 0.02024191144580438847, 274 [79] = FSCALE * 0.01925470177538692429, 275 [80] = FSCALE * 0.01831563888873418029, 276 [81] = FSCALE * 0.01742237463949351138, 277 [82] = FSCALE * 0.01657267540176124754, 278 [83] = FSCALE * 0.01576441648485449082, 279 [84] = FSCALE * 0.01499557682047770621, 280 [85] = FSCALE * 0.01426423390899925527, 281 [86] = FSCALE * 0.01356855901220093175, 282 [87] = FSCALE * 0.01290681258047986886, 283 [88] = FSCALE * 0.01227733990306844117, 284 [89] = FSCALE * 0.01167856697039544521, 285 [90] = FSCALE * 0.01110899653824230649, 286 [91] = FSCALE * 0.01056720438385265337, 287 [92] = FSCALE * 0.01005183574463358164, 288 [93] = FSCALE * 0.00956160193054350793, 289 [94] = FSCALE * 0.00909527710169581709, 290 [95] = FSCALE * 0.00865169520312063417, 291 [96] = FSCALE * 0.00822974704902002884, 292 [97] = FSCALE * 0.00782837754922577143, 293 [98] = FSCALE * 0.00744658307092434051, 294 [99] = FSCALE * 0.00708340892905212004, 295 [100] = FSCALE * 0.00673794699908546709, 296 [101] = FSCALE * 0.00640933344625638184, 297 [102] = FSCALE * 0.00609674656551563610, 298 [103] = FSCALE * 0.00579940472684214321, 299 [104] = FSCALE * 0.00551656442076077241, 300 [105] = FSCALE * 0.00524751839918138427, 301 [106] = FSCALE * 0.00499159390691021621, 302 [107] = FSCALE * 0.00474815099941147558, 303 [108] = FSCALE * 0.00451658094261266798, 304 [109] = FSCALE * 0.00429630469075234057, 305 [110] = FSCALE * 0.00408677143846406699, 306 }; 307 #endif 308 309 #define CCPU_EXP_MAX 110 310 311 /* 312 * This function is analogical to the getpcpu() function in the ps(1) command. 313 * They should both calculate in the same way so that the racct %cpu 314 * calculations are consistent with the values showed by the ps(1) tool. 315 * The calculations are more complex in the 4BSD scheduler because of the value 316 * of the ccpu variable. In ULE it is defined to be zero which saves us some 317 * work. 318 */ 319 static uint64_t 320 racct_getpcpu(struct proc *p, u_int pcpu) 321 { 322 u_int swtime; 323 #ifdef SCHED_4BSD 324 fixpt_t pctcpu, pctcpu_next; 325 #endif 326 #ifdef SMP 327 struct pcpu *pc; 328 int found; 329 #endif 330 fixpt_t p_pctcpu; 331 struct thread *td; 332 333 ASSERT_RACCT_ENABLED(); 334 335 /* 336 * If the process is swapped out, we count its %cpu usage as zero. 337 * This behaviour is consistent with the userland ps(1) tool. 338 */ 339 if ((p->p_flag & P_INMEM) == 0) 340 return (0); 341 swtime = (ticks - p->p_swtick) / hz; 342 343 /* 344 * For short-lived processes, the sched_pctcpu() returns small 345 * values even for cpu intensive processes. Therefore we use 346 * our own estimate in this case. 347 */ 348 if (swtime < RACCT_PCPU_SECS) 349 return (pcpu); 350 351 p_pctcpu = 0; 352 FOREACH_THREAD_IN_PROC(p, td) { 353 if (td == PCPU_GET(idlethread)) 354 continue; 355 #ifdef SMP 356 found = 0; 357 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 358 if (td == pc->pc_idlethread) { 359 found = 1; 360 break; 361 } 362 } 363 if (found) 364 continue; 365 #endif 366 thread_lock(td); 367 #ifdef SCHED_4BSD 368 pctcpu = sched_pctcpu(td); 369 /* Count also the yet unfinished second. */ 370 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 371 pctcpu_next += sched_pctcpu_delta(td); 372 p_pctcpu += max(pctcpu, pctcpu_next); 373 #else 374 /* 375 * In ULE the %cpu statistics are updated on every 376 * sched_pctcpu() call. So special calculations to 377 * account for the latest (unfinished) second are 378 * not needed. 379 */ 380 p_pctcpu += sched_pctcpu(td); 381 #endif 382 thread_unlock(td); 383 } 384 385 #ifdef SCHED_4BSD 386 if (swtime <= CCPU_EXP_MAX) 387 return ((100 * (uint64_t)p_pctcpu * 1000000) / 388 (FSCALE - ccpu_exp[swtime])); 389 #endif 390 391 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 392 } 393 394 static void 395 racct_add_racct(struct racct *dest, const struct racct *src) 396 { 397 int i; 398 399 ASSERT_RACCT_ENABLED(); 400 RACCT_LOCK_ASSERT(); 401 402 /* 403 * Update resource usage in dest. 404 */ 405 for (i = 0; i <= RACCT_MAX; i++) { 406 KASSERT(dest->r_resources[i] >= 0, 407 ("%s: resource %d propagation meltdown: dest < 0", 408 __func__, i)); 409 KASSERT(src->r_resources[i] >= 0, 410 ("%s: resource %d propagation meltdown: src < 0", 411 __func__, i)); 412 dest->r_resources[i] += src->r_resources[i]; 413 } 414 } 415 416 static void 417 racct_sub_racct(struct racct *dest, const struct racct *src) 418 { 419 int i; 420 421 ASSERT_RACCT_ENABLED(); 422 RACCT_LOCK_ASSERT(); 423 424 /* 425 * Update resource usage in dest. 426 */ 427 for (i = 0; i <= RACCT_MAX; i++) { 428 if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 429 KASSERT(dest->r_resources[i] >= 0, 430 ("%s: resource %d propagation meltdown: dest < 0", 431 __func__, i)); 432 KASSERT(src->r_resources[i] >= 0, 433 ("%s: resource %d propagation meltdown: src < 0", 434 __func__, i)); 435 KASSERT(src->r_resources[i] <= dest->r_resources[i], 436 ("%s: resource %d propagation meltdown: src > dest", 437 __func__, i)); 438 } 439 if (RACCT_CAN_DROP(i)) { 440 dest->r_resources[i] -= src->r_resources[i]; 441 if (dest->r_resources[i] < 0) 442 dest->r_resources[i] = 0; 443 } 444 } 445 } 446 447 void 448 racct_create(struct racct **racctp) 449 { 450 451 if (!racct_enable) 452 return; 453 454 SDT_PROBE1(racct, , racct, create, racctp); 455 456 KASSERT(*racctp == NULL, ("racct already allocated")); 457 458 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 459 } 460 461 static void 462 racct_destroy_locked(struct racct **racctp) 463 { 464 struct racct *racct; 465 int i; 466 467 ASSERT_RACCT_ENABLED(); 468 469 SDT_PROBE1(racct, , racct, destroy, racctp); 470 471 RACCT_LOCK_ASSERT(); 472 KASSERT(racctp != NULL, ("NULL racctp")); 473 KASSERT(*racctp != NULL, ("NULL racct")); 474 475 racct = *racctp; 476 477 for (i = 0; i <= RACCT_MAX; i++) { 478 if (RACCT_IS_SLOPPY(i)) 479 continue; 480 if (!RACCT_IS_RECLAIMABLE(i)) 481 continue; 482 KASSERT(racct->r_resources[i] == 0, 483 ("destroying non-empty racct: " 484 "%ju allocated for resource %d\n", 485 racct->r_resources[i], i)); 486 } 487 uma_zfree(racct_zone, racct); 488 *racctp = NULL; 489 } 490 491 void 492 racct_destroy(struct racct **racct) 493 { 494 495 if (!racct_enable) 496 return; 497 498 RACCT_LOCK(); 499 racct_destroy_locked(racct); 500 RACCT_UNLOCK(); 501 } 502 503 /* 504 * Increase consumption of 'resource' by 'amount' for 'racct', 505 * but not its parents. Differently from other cases, 'amount' here 506 * may be less than zero. 507 */ 508 static void 509 racct_adjust_resource(struct racct *racct, int resource, 510 int64_t amount) 511 { 512 513 ASSERT_RACCT_ENABLED(); 514 RACCT_LOCK_ASSERT(); 515 KASSERT(racct != NULL, ("NULL racct")); 516 517 racct->r_resources[resource] += amount; 518 if (racct->r_resources[resource] < 0) { 519 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 520 ("%s: resource %d usage < 0", __func__, resource)); 521 racct->r_resources[resource] = 0; 522 } 523 524 /* 525 * There are some cases where the racct %cpu resource would grow 526 * beyond 100% per core. For example in racct_proc_exit() we add 527 * the process %cpu usage to the ucred racct containers. If too 528 * many processes terminated in a short time span, the ucred %cpu 529 * resource could grow too much. Also, the 4BSD scheduler sometimes 530 * returns for a thread more than 100% cpu usage. So we set a sane 531 * boundary here to 100% * the maximum number of CPUs. 532 */ 533 if ((resource == RACCT_PCTCPU) && 534 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU)) 535 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU; 536 } 537 538 static int 539 racct_add_locked(struct proc *p, int resource, uint64_t amount, int force) 540 { 541 #ifdef RCTL 542 int error; 543 #endif 544 545 ASSERT_RACCT_ENABLED(); 546 547 /* 548 * We need proc lock to dereference p->p_ucred. 549 */ 550 PROC_LOCK_ASSERT(p, MA_OWNED); 551 552 #ifdef RCTL 553 error = rctl_enforce(p, resource, amount); 554 if (error && !force && RACCT_IS_DENIABLE(resource)) { 555 SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount); 556 return (error); 557 } 558 #endif 559 racct_adjust_resource(p->p_racct, resource, amount); 560 racct_add_cred_locked(p->p_ucred, resource, amount); 561 562 return (0); 563 } 564 565 /* 566 * Increase allocation of 'resource' by 'amount' for process 'p'. 567 * Return 0 if it's below limits, or errno, if it's not. 568 */ 569 int 570 racct_add(struct proc *p, int resource, uint64_t amount) 571 { 572 int error; 573 574 if (!racct_enable) 575 return (0); 576 577 SDT_PROBE3(racct, , rusage, add, p, resource, amount); 578 579 RACCT_LOCK(); 580 error = racct_add_locked(p, resource, amount, 0); 581 RACCT_UNLOCK(); 582 return (error); 583 } 584 585 /* 586 * Increase allocation of 'resource' by 'amount' for process 'p'. 587 * Doesn't check for limits and never fails. 588 */ 589 void 590 racct_add_force(struct proc *p, int resource, uint64_t amount) 591 { 592 593 if (!racct_enable) 594 return; 595 596 SDT_PROBE3(racct, , rusage, add__force, p, resource, amount); 597 598 RACCT_LOCK(); 599 racct_add_locked(p, resource, amount, 1); 600 RACCT_UNLOCK(); 601 } 602 603 static void 604 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 605 { 606 struct prison *pr; 607 608 ASSERT_RACCT_ENABLED(); 609 610 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 611 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 612 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 613 amount); 614 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount); 615 } 616 617 /* 618 * Increase allocation of 'resource' by 'amount' for credential 'cred'. 619 * Doesn't check for limits and never fails. 620 */ 621 void 622 racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 623 { 624 625 if (!racct_enable) 626 return; 627 628 SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount); 629 630 RACCT_LOCK(); 631 racct_add_cred_locked(cred, resource, amount); 632 RACCT_UNLOCK(); 633 } 634 635 /* 636 * Account for disk IO resource consumption. Checks for limits, 637 * but never fails, due to disk limits being undeniable. 638 */ 639 void 640 racct_add_buf(struct proc *p, const struct buf *bp, int is_write) 641 { 642 643 ASSERT_RACCT_ENABLED(); 644 PROC_LOCK_ASSERT(p, MA_OWNED); 645 646 SDT_PROBE3(racct, , rusage, add__buf, p, bp, is_write); 647 648 RACCT_LOCK(); 649 if (is_write) { 650 racct_add_locked(curproc, RACCT_WRITEBPS, bp->b_bcount, 1); 651 racct_add_locked(curproc, RACCT_WRITEIOPS, 1, 1); 652 } else { 653 racct_add_locked(curproc, RACCT_READBPS, bp->b_bcount, 1); 654 racct_add_locked(curproc, RACCT_READIOPS, 1, 1); 655 } 656 RACCT_UNLOCK(); 657 } 658 659 static int 660 racct_set_locked(struct proc *p, int resource, uint64_t amount, int force) 661 { 662 int64_t old_amount, decayed_amount, diff_proc, diff_cred; 663 #ifdef RCTL 664 int error; 665 #endif 666 667 ASSERT_RACCT_ENABLED(); 668 669 /* 670 * We need proc lock to dereference p->p_ucred. 671 */ 672 PROC_LOCK_ASSERT(p, MA_OWNED); 673 674 old_amount = p->p_racct->r_resources[resource]; 675 /* 676 * The diffs may be negative. 677 */ 678 diff_proc = amount - old_amount; 679 if (resource == RACCT_PCTCPU) { 680 /* 681 * Resources in per-credential racct containers may decay. 682 * If this is the case, we need to calculate the difference 683 * between the new amount and the proportional value of the 684 * old amount that has decayed in the ucred racct containers. 685 */ 686 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 687 diff_cred = amount - decayed_amount; 688 } else 689 diff_cred = diff_proc; 690 #ifdef notyet 691 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 692 ("%s: usage of non-droppable resource %d dropping", __func__, 693 resource)); 694 #endif 695 #ifdef RCTL 696 if (diff_proc > 0) { 697 error = rctl_enforce(p, resource, diff_proc); 698 if (error && !force && RACCT_IS_DENIABLE(resource)) { 699 SDT_PROBE3(racct, , rusage, set__failure, p, resource, 700 amount); 701 return (error); 702 } 703 } 704 #endif 705 racct_adjust_resource(p->p_racct, resource, diff_proc); 706 if (diff_cred > 0) 707 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 708 else if (diff_cred < 0) 709 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 710 711 return (0); 712 } 713 714 /* 715 * Set allocation of 'resource' to 'amount' for process 'p'. 716 * Return 0 if it's below limits, or errno, if it's not. 717 * 718 * Note that decreasing the allocation always returns 0, 719 * even if it's above the limit. 720 */ 721 int 722 racct_set_unlocked(struct proc *p, int resource, uint64_t amount) 723 { 724 int error; 725 726 ASSERT_RACCT_ENABLED(); 727 PROC_LOCK(p); 728 error = racct_set(p, resource, amount); 729 PROC_UNLOCK(p); 730 return (error); 731 } 732 733 int 734 racct_set(struct proc *p, int resource, uint64_t amount) 735 { 736 int error; 737 738 if (!racct_enable) 739 return (0); 740 741 SDT_PROBE3(racct, , rusage, set__force, p, resource, amount); 742 743 RACCT_LOCK(); 744 error = racct_set_locked(p, resource, amount, 0); 745 RACCT_UNLOCK(); 746 return (error); 747 } 748 749 void 750 racct_set_force(struct proc *p, int resource, uint64_t amount) 751 { 752 753 if (!racct_enable) 754 return; 755 756 SDT_PROBE3(racct, , rusage, set, p, resource, amount); 757 758 RACCT_LOCK(); 759 racct_set_locked(p, resource, amount, 1); 760 RACCT_UNLOCK(); 761 } 762 763 /* 764 * Returns amount of 'resource' the process 'p' can keep allocated. 765 * Allocating more than that would be denied, unless the resource 766 * is marked undeniable. Amount of already allocated resource does 767 * not matter. 768 */ 769 uint64_t 770 racct_get_limit(struct proc *p, int resource) 771 { 772 #ifdef RCTL 773 uint64_t available; 774 775 if (!racct_enable) 776 return (UINT64_MAX); 777 778 RACCT_LOCK(); 779 available = rctl_get_limit(p, resource); 780 RACCT_UNLOCK(); 781 782 return (available); 783 #else 784 785 return (UINT64_MAX); 786 #endif 787 } 788 789 /* 790 * Returns amount of 'resource' the process 'p' can keep allocated. 791 * Allocating more than that would be denied, unless the resource 792 * is marked undeniable. Amount of already allocated resource does 793 * matter. 794 */ 795 uint64_t 796 racct_get_available(struct proc *p, int resource) 797 { 798 #ifdef RCTL 799 uint64_t available; 800 801 if (!racct_enable) 802 return (UINT64_MAX); 803 804 RACCT_LOCK(); 805 available = rctl_get_available(p, resource); 806 RACCT_UNLOCK(); 807 808 return (available); 809 #else 810 811 return (UINT64_MAX); 812 #endif 813 } 814 815 /* 816 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 817 * utilization. Adding more than that would lead to the process being 818 * throttled. 819 */ 820 static int64_t 821 racct_pcpu_available(struct proc *p) 822 { 823 #ifdef RCTL 824 uint64_t available; 825 826 ASSERT_RACCT_ENABLED(); 827 828 RACCT_LOCK(); 829 available = rctl_pcpu_available(p); 830 RACCT_UNLOCK(); 831 832 return (available); 833 #else 834 835 return (INT64_MAX); 836 #endif 837 } 838 839 /* 840 * Decrease allocation of 'resource' by 'amount' for process 'p'. 841 */ 842 void 843 racct_sub(struct proc *p, int resource, uint64_t amount) 844 { 845 846 if (!racct_enable) 847 return; 848 849 SDT_PROBE3(racct, , rusage, sub, p, resource, amount); 850 851 /* 852 * We need proc lock to dereference p->p_ucred. 853 */ 854 PROC_LOCK_ASSERT(p, MA_OWNED); 855 KASSERT(RACCT_CAN_DROP(resource), 856 ("%s: called for non-droppable resource %d", __func__, resource)); 857 858 RACCT_LOCK(); 859 KASSERT(amount <= p->p_racct->r_resources[resource], 860 ("%s: freeing %ju of resource %d, which is more " 861 "than allocated %jd for %s (pid %d)", __func__, amount, resource, 862 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 863 864 racct_adjust_resource(p->p_racct, resource, -amount); 865 racct_sub_cred_locked(p->p_ucred, resource, amount); 866 RACCT_UNLOCK(); 867 } 868 869 static void 870 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 871 { 872 struct prison *pr; 873 874 ASSERT_RACCT_ENABLED(); 875 876 racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 877 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 878 racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 879 -amount); 880 racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount); 881 } 882 883 /* 884 * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 885 */ 886 void 887 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 888 { 889 890 if (!racct_enable) 891 return; 892 893 SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount); 894 895 #ifdef notyet 896 KASSERT(RACCT_CAN_DROP(resource), 897 ("%s: called for resource %d which can not drop", __func__, 898 resource)); 899 #endif 900 901 RACCT_LOCK(); 902 racct_sub_cred_locked(cred, resource, amount); 903 RACCT_UNLOCK(); 904 } 905 906 /* 907 * Inherit resource usage information from the parent process. 908 */ 909 int 910 racct_proc_fork(struct proc *parent, struct proc *child) 911 { 912 int i, error = 0; 913 914 if (!racct_enable) 915 return (0); 916 917 /* 918 * Create racct for the child process. 919 */ 920 racct_create(&child->p_racct); 921 922 PROC_LOCK(parent); 923 PROC_LOCK(child); 924 RACCT_LOCK(); 925 926 #ifdef RCTL 927 error = rctl_proc_fork(parent, child); 928 if (error != 0) 929 goto out; 930 #endif 931 932 /* Init process cpu time. */ 933 child->p_prev_runtime = 0; 934 child->p_throttled = 0; 935 936 /* 937 * Inherit resource usage. 938 */ 939 for (i = 0; i <= RACCT_MAX; i++) { 940 if (parent->p_racct->r_resources[i] == 0 || 941 !RACCT_IS_INHERITABLE(i)) 942 continue; 943 944 error = racct_set_locked(child, i, 945 parent->p_racct->r_resources[i], 0); 946 if (error != 0) 947 goto out; 948 } 949 950 error = racct_add_locked(child, RACCT_NPROC, 1, 0); 951 error += racct_add_locked(child, RACCT_NTHR, 1, 0); 952 953 out: 954 RACCT_UNLOCK(); 955 PROC_UNLOCK(child); 956 PROC_UNLOCK(parent); 957 958 if (error != 0) 959 racct_proc_exit(child); 960 961 return (error); 962 } 963 964 /* 965 * Called at the end of fork1(), to handle rules that require the process 966 * to be fully initialized. 967 */ 968 void 969 racct_proc_fork_done(struct proc *child) 970 { 971 972 if (!racct_enable) 973 return; 974 975 #ifdef RCTL 976 PROC_LOCK(child); 977 RACCT_LOCK(); 978 rctl_enforce(child, RACCT_NPROC, 0); 979 rctl_enforce(child, RACCT_NTHR, 0); 980 RACCT_UNLOCK(); 981 PROC_UNLOCK(child); 982 #endif 983 } 984 985 void 986 racct_proc_exit(struct proc *p) 987 { 988 struct timeval wallclock; 989 uint64_t pct_estimate, pct, runtime; 990 int i; 991 992 if (!racct_enable) 993 return; 994 995 PROC_LOCK(p); 996 /* 997 * We don't need to calculate rux, proc_reap() has already done this. 998 */ 999 runtime = cputick2usec(p->p_rux.rux_runtime); 1000 #ifdef notyet 1001 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 1002 #else 1003 if (runtime < p->p_prev_runtime) 1004 runtime = p->p_prev_runtime; 1005 #endif 1006 microuptime(&wallclock); 1007 timevalsub(&wallclock, &p->p_stats->p_start); 1008 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1009 pct_estimate = (1000000 * runtime * 100) / 1010 ((uint64_t)wallclock.tv_sec * 1000000 + 1011 wallclock.tv_usec); 1012 } else 1013 pct_estimate = 0; 1014 pct = racct_getpcpu(p, pct_estimate); 1015 1016 RACCT_LOCK(); 1017 racct_set_locked(p, RACCT_CPU, runtime, 0); 1018 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 1019 1020 KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0, 1021 ("process reaped with %ju allocated for RSS\n", 1022 p->p_racct->r_resources[RACCT_RSS])); 1023 for (i = 0; i <= RACCT_MAX; i++) { 1024 if (p->p_racct->r_resources[i] == 0) 1025 continue; 1026 if (!RACCT_IS_RECLAIMABLE(i)) 1027 continue; 1028 racct_set_locked(p, i, 0, 0); 1029 } 1030 1031 #ifdef RCTL 1032 rctl_racct_release(p->p_racct); 1033 #endif 1034 racct_destroy_locked(&p->p_racct); 1035 RACCT_UNLOCK(); 1036 PROC_UNLOCK(p); 1037 } 1038 1039 /* 1040 * Called after credentials change, to move resource utilisation 1041 * between raccts. 1042 */ 1043 void 1044 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 1045 struct ucred *newcred) 1046 { 1047 struct uidinfo *olduip, *newuip; 1048 struct loginclass *oldlc, *newlc; 1049 struct prison *oldpr, *newpr, *pr; 1050 1051 if (!racct_enable) 1052 return; 1053 1054 PROC_LOCK_ASSERT(p, MA_OWNED); 1055 1056 newuip = newcred->cr_ruidinfo; 1057 olduip = oldcred->cr_ruidinfo; 1058 newlc = newcred->cr_loginclass; 1059 oldlc = oldcred->cr_loginclass; 1060 newpr = newcred->cr_prison; 1061 oldpr = oldcred->cr_prison; 1062 1063 RACCT_LOCK(); 1064 if (newuip != olduip) { 1065 racct_sub_racct(olduip->ui_racct, p->p_racct); 1066 racct_add_racct(newuip->ui_racct, p->p_racct); 1067 } 1068 if (newlc != oldlc) { 1069 racct_sub_racct(oldlc->lc_racct, p->p_racct); 1070 racct_add_racct(newlc->lc_racct, p->p_racct); 1071 } 1072 if (newpr != oldpr) { 1073 for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 1074 racct_sub_racct(pr->pr_prison_racct->prr_racct, 1075 p->p_racct); 1076 for (pr = newpr; pr != NULL; pr = pr->pr_parent) 1077 racct_add_racct(pr->pr_prison_racct->prr_racct, 1078 p->p_racct); 1079 } 1080 RACCT_UNLOCK(); 1081 } 1082 1083 void 1084 racct_move(struct racct *dest, struct racct *src) 1085 { 1086 1087 ASSERT_RACCT_ENABLED(); 1088 1089 RACCT_LOCK(); 1090 racct_add_racct(dest, src); 1091 racct_sub_racct(src, src); 1092 RACCT_UNLOCK(); 1093 } 1094 1095 static void 1096 ast_racct(struct thread *td, int tda __unused) 1097 { 1098 struct proc *p; 1099 1100 ASSERT_RACCT_ENABLED(); 1101 1102 p = td->td_proc; 1103 if (p->p_throttled == 0) 1104 return; 1105 1106 PROC_LOCK(p); 1107 while (p->p_throttled != 0) { 1108 msleep(p->p_racct, &p->p_mtx, 0, "racct", 1109 p->p_throttled < 0 ? 0 : p->p_throttled); 1110 if (p->p_throttled > 0) 1111 p->p_throttled = 0; 1112 } 1113 PROC_UNLOCK(p); 1114 } 1115 1116 /* 1117 * Make the process sleep in userret() for 'timeout' ticks. Setting 1118 * timeout to -1 makes it sleep until woken up by racct_proc_wakeup(). 1119 */ 1120 void 1121 racct_proc_throttle(struct proc *p, int timeout) 1122 { 1123 struct thread *td; 1124 #ifdef SMP 1125 int cpuid; 1126 #endif 1127 1128 KASSERT(timeout != 0, ("timeout %d", timeout)); 1129 ASSERT_RACCT_ENABLED(); 1130 PROC_LOCK_ASSERT(p, MA_OWNED); 1131 1132 /* 1133 * Do not block kernel processes. Also do not block processes with 1134 * low %cpu utilization to improve interactivity. 1135 */ 1136 if ((p->p_flag & (P_SYSTEM | P_KPROC)) != 0) 1137 return; 1138 1139 if (p->p_throttled < 0 || (timeout > 0 && p->p_throttled > timeout)) 1140 return; 1141 1142 p->p_throttled = timeout; 1143 1144 FOREACH_THREAD_IN_PROC(p, td) { 1145 thread_lock(td); 1146 ast_sched_locked(td, TDA_RACCT); 1147 1148 switch (TD_GET_STATE(td)) { 1149 case TDS_RUNQ: 1150 /* 1151 * If the thread is on the scheduler run-queue, we can 1152 * not just remove it from there. So we set the flag 1153 * TDA_SCHED for the thread, so that once it is 1154 * running, it is taken off the cpu as soon as possible. 1155 */ 1156 ast_sched_locked(td, TDA_SCHED); 1157 break; 1158 case TDS_RUNNING: 1159 /* 1160 * If the thread is running, we request a context 1161 * switch for it by setting the TDA_SCHED flag. 1162 */ 1163 ast_sched_locked(td, TDA_SCHED); 1164 #ifdef SMP 1165 cpuid = td->td_oncpu; 1166 if ((cpuid != NOCPU) && (td != curthread)) 1167 ipi_cpu(cpuid, IPI_AST); 1168 #endif 1169 break; 1170 default: 1171 break; 1172 } 1173 thread_unlock(td); 1174 } 1175 } 1176 1177 static void 1178 racct_proc_wakeup(struct proc *p) 1179 { 1180 1181 ASSERT_RACCT_ENABLED(); 1182 1183 PROC_LOCK_ASSERT(p, MA_OWNED); 1184 1185 if (p->p_throttled != 0) { 1186 p->p_throttled = 0; 1187 wakeup(p->p_racct); 1188 } 1189 } 1190 1191 static void 1192 racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2) 1193 { 1194 int64_t r_old, r_new; 1195 1196 ASSERT_RACCT_ENABLED(); 1197 RACCT_LOCK_ASSERT(); 1198 1199 #ifdef RCTL 1200 rctl_throttle_decay(racct, RACCT_READBPS); 1201 rctl_throttle_decay(racct, RACCT_WRITEBPS); 1202 rctl_throttle_decay(racct, RACCT_READIOPS); 1203 rctl_throttle_decay(racct, RACCT_WRITEIOPS); 1204 #endif 1205 1206 r_old = racct->r_resources[RACCT_PCTCPU]; 1207 1208 /* If there is nothing to decay, just exit. */ 1209 if (r_old <= 0) 1210 return; 1211 1212 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1213 racct->r_resources[RACCT_PCTCPU] = r_new; 1214 } 1215 1216 static void 1217 racct_decay_pre(void) 1218 { 1219 1220 RACCT_LOCK(); 1221 } 1222 1223 static void 1224 racct_decay_post(void) 1225 { 1226 1227 RACCT_UNLOCK(); 1228 } 1229 1230 static void 1231 racct_decay(void) 1232 { 1233 1234 ASSERT_RACCT_ENABLED(); 1235 1236 ui_racct_foreach(racct_decay_callback, racct_decay_pre, 1237 racct_decay_post, NULL, NULL); 1238 loginclass_racct_foreach(racct_decay_callback, racct_decay_pre, 1239 racct_decay_post, NULL, NULL); 1240 prison_racct_foreach(racct_decay_callback, racct_decay_pre, 1241 racct_decay_post, NULL, NULL); 1242 } 1243 1244 static void 1245 racctd(void) 1246 { 1247 struct thread *td; 1248 struct proc *p; 1249 struct timeval wallclock; 1250 uint64_t pct, pct_estimate, runtime; 1251 1252 ASSERT_RACCT_ENABLED(); 1253 1254 for (;;) { 1255 racct_decay(); 1256 1257 sx_slock(&allproc_lock); 1258 1259 FOREACH_PROC_IN_SYSTEM(p) { 1260 PROC_LOCK(p); 1261 if (p->p_state != PRS_NORMAL) { 1262 if (p->p_state == PRS_ZOMBIE) 1263 racct_set(p, RACCT_PCTCPU, 0); 1264 PROC_UNLOCK(p); 1265 continue; 1266 } 1267 1268 microuptime(&wallclock); 1269 timevalsub(&wallclock, &p->p_stats->p_start); 1270 PROC_STATLOCK(p); 1271 FOREACH_THREAD_IN_PROC(p, td) 1272 ruxagg(p, td); 1273 runtime = cputick2usec(p->p_rux.rux_runtime); 1274 PROC_STATUNLOCK(p); 1275 #ifdef notyet 1276 KASSERT(runtime >= p->p_prev_runtime, 1277 ("runtime < p_prev_runtime")); 1278 #else 1279 if (runtime < p->p_prev_runtime) 1280 runtime = p->p_prev_runtime; 1281 #endif 1282 p->p_prev_runtime = runtime; 1283 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1284 pct_estimate = (1000000 * runtime * 100) / 1285 ((uint64_t)wallclock.tv_sec * 1000000 + 1286 wallclock.tv_usec); 1287 } else 1288 pct_estimate = 0; 1289 pct = racct_getpcpu(p, pct_estimate); 1290 RACCT_LOCK(); 1291 #ifdef RCTL 1292 rctl_throttle_decay(p->p_racct, RACCT_READBPS); 1293 rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS); 1294 rctl_throttle_decay(p->p_racct, RACCT_READIOPS); 1295 rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); 1296 #endif 1297 racct_set_locked(p, RACCT_PCTCPU, pct, 1); 1298 racct_set_locked(p, RACCT_CPU, runtime, 0); 1299 racct_set_locked(p, RACCT_WALLCLOCK, 1300 (uint64_t)wallclock.tv_sec * 1000000 + 1301 wallclock.tv_usec, 0); 1302 RACCT_UNLOCK(); 1303 PROC_UNLOCK(p); 1304 } 1305 1306 /* 1307 * To ensure that processes are throttled in a fair way, we need 1308 * to iterate over all processes again and check the limits 1309 * for %cpu resource only after ucred racct containers have been 1310 * properly filled. 1311 */ 1312 FOREACH_PROC_IN_SYSTEM(p) { 1313 PROC_LOCK(p); 1314 if (p->p_state != PRS_NORMAL) { 1315 PROC_UNLOCK(p); 1316 continue; 1317 } 1318 1319 if (racct_pcpu_available(p) <= 0) { 1320 if (p->p_racct->r_resources[RACCT_PCTCPU] > 1321 pcpu_threshold) 1322 racct_proc_throttle(p, -1); 1323 } else if (p->p_throttled == -1) { 1324 racct_proc_wakeup(p); 1325 } 1326 PROC_UNLOCK(p); 1327 } 1328 sx_sunlock(&allproc_lock); 1329 pause("-", hz); 1330 } 1331 } 1332 1333 static struct kproc_desc racctd_kp = { 1334 "racctd", 1335 racctd, 1336 NULL 1337 }; 1338 1339 static void 1340 racctd_init(void) 1341 { 1342 if (!racct_enable) 1343 return; 1344 1345 kproc_start(&racctd_kp); 1346 } 1347 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); 1348 1349 static void 1350 racct_init(void) 1351 { 1352 if (!racct_enable) 1353 return; 1354 1355 racct_zone = uma_zcreate("racct", sizeof(struct racct), 1356 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1357 ast_register(TDA_RACCT, ASTR_ASTF_REQUIRED, 0, ast_racct); 1358 1359 /* 1360 * XXX: Move this somewhere. 1361 */ 1362 prison0.pr_prison_racct = prison_racct_find("0"); 1363 } 1364 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1365