1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/param.h> 37 #include <sys/devctl.h> 38 #include <sys/malloc.h> 39 #include <sys/queue.h> 40 #include <sys/refcount.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/loginclass.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/racct.h> 48 #include <sys/rctl.h> 49 #include <sys/resourcevar.h> 50 #include <sys/sx.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/types.h> 54 #include <sys/eventhandler.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #include <sys/rwlock.h> 58 #include <sys/sbuf.h> 59 #include <sys/taskqueue.h> 60 #include <sys/tree.h> 61 #include <vm/uma.h> 62 63 #ifdef RCTL 64 #ifndef RACCT 65 #error "The RCTL option requires the RACCT option" 66 #endif 67 68 FEATURE(rctl, "Resource Limits"); 69 70 #define HRF_DEFAULT 0 71 #define HRF_DONT_INHERIT 1 72 #define HRF_DONT_ACCUMULATE 2 73 74 #define RCTL_MAX_INBUFSIZE 4 * 1024 75 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 76 #define RCTL_LOG_BUFSIZE 128 77 78 #define RCTL_PCPU_SHIFT (10 * 1000000) 79 80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 81 static int rctl_log_rate_limit = 10; 82 static int rctl_devctl_rate_limit = 10; 83 84 /* 85 * Values below are initialized in rctl_init(). 86 */ 87 static int rctl_throttle_min = -1; 88 static int rctl_throttle_max = -1; 89 static int rctl_throttle_pct = -1; 90 static int rctl_throttle_pct2 = -1; 91 92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 96 97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 98 "Resource Limits"); 99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 100 &rctl_maxbufsize, 0, "Maximum output buffer size"); 101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 102 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 103 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 104 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 105 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 106 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 107 &rctl_throttle_min_sysctl, "IU", 108 "Shortest throttling duration, in hz"); 109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 111 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 112 &rctl_throttle_max_sysctl, "IU", 113 "Longest throttling duration, in hz"); 114 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 115 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 116 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 117 &rctl_throttle_pct_sysctl, "IU", 118 "Throttling penalty for process consumption, in percent"); 119 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 120 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 121 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 122 &rctl_throttle_pct2_sysctl, "IU", 123 "Throttling penalty for container consumption, in percent"); 124 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 125 126 /* 127 * 'rctl_rule_link' connects a rule with every racct it's related to. 128 * For example, rule 'user:X:openfiles:deny=N/process' is linked 129 * with uidinfo for user X, and to each process of that user. 130 */ 131 struct rctl_rule_link { 132 LIST_ENTRY(rctl_rule_link) rrl_next; 133 struct rctl_rule *rrl_rule; 134 int rrl_exceeded; 135 }; 136 137 struct dict { 138 const char *d_name; 139 int d_value; 140 }; 141 142 static struct dict subjectnames[] = { 143 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 144 { "user", RCTL_SUBJECT_TYPE_USER }, 145 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 146 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 147 { NULL, -1 }}; 148 149 static struct dict resourcenames[] = { 150 { "cputime", RACCT_CPU }, 151 { "datasize", RACCT_DATA }, 152 { "stacksize", RACCT_STACK }, 153 { "coredumpsize", RACCT_CORE }, 154 { "memoryuse", RACCT_RSS }, 155 { "memorylocked", RACCT_MEMLOCK }, 156 { "maxproc", RACCT_NPROC }, 157 { "openfiles", RACCT_NOFILE }, 158 { "vmemoryuse", RACCT_VMEM }, 159 { "pseudoterminals", RACCT_NPTS }, 160 { "swapuse", RACCT_SWAP }, 161 { "nthr", RACCT_NTHR }, 162 { "msgqqueued", RACCT_MSGQQUEUED }, 163 { "msgqsize", RACCT_MSGQSIZE }, 164 { "nmsgq", RACCT_NMSGQ }, 165 { "nsem", RACCT_NSEM }, 166 { "nsemop", RACCT_NSEMOP }, 167 { "nshm", RACCT_NSHM }, 168 { "shmsize", RACCT_SHMSIZE }, 169 { "wallclock", RACCT_WALLCLOCK }, 170 { "pcpu", RACCT_PCTCPU }, 171 { "readbps", RACCT_READBPS }, 172 { "writebps", RACCT_WRITEBPS }, 173 { "readiops", RACCT_READIOPS }, 174 { "writeiops", RACCT_WRITEIOPS }, 175 { NULL, -1 }}; 176 177 static struct dict actionnames[] = { 178 { "sighup", RCTL_ACTION_SIGHUP }, 179 { "sigint", RCTL_ACTION_SIGINT }, 180 { "sigquit", RCTL_ACTION_SIGQUIT }, 181 { "sigill", RCTL_ACTION_SIGILL }, 182 { "sigtrap", RCTL_ACTION_SIGTRAP }, 183 { "sigabrt", RCTL_ACTION_SIGABRT }, 184 { "sigemt", RCTL_ACTION_SIGEMT }, 185 { "sigfpe", RCTL_ACTION_SIGFPE }, 186 { "sigkill", RCTL_ACTION_SIGKILL }, 187 { "sigbus", RCTL_ACTION_SIGBUS }, 188 { "sigsegv", RCTL_ACTION_SIGSEGV }, 189 { "sigsys", RCTL_ACTION_SIGSYS }, 190 { "sigpipe", RCTL_ACTION_SIGPIPE }, 191 { "sigalrm", RCTL_ACTION_SIGALRM }, 192 { "sigterm", RCTL_ACTION_SIGTERM }, 193 { "sigurg", RCTL_ACTION_SIGURG }, 194 { "sigstop", RCTL_ACTION_SIGSTOP }, 195 { "sigtstp", RCTL_ACTION_SIGTSTP }, 196 { "sigchld", RCTL_ACTION_SIGCHLD }, 197 { "sigttin", RCTL_ACTION_SIGTTIN }, 198 { "sigttou", RCTL_ACTION_SIGTTOU }, 199 { "sigio", RCTL_ACTION_SIGIO }, 200 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 201 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 202 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 203 { "sigprof", RCTL_ACTION_SIGPROF }, 204 { "sigwinch", RCTL_ACTION_SIGWINCH }, 205 { "siginfo", RCTL_ACTION_SIGINFO }, 206 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 207 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 208 { "sigthr", RCTL_ACTION_SIGTHR }, 209 { "deny", RCTL_ACTION_DENY }, 210 { "log", RCTL_ACTION_LOG }, 211 { "devctl", RCTL_ACTION_DEVCTL }, 212 { "throttle", RCTL_ACTION_THROTTLE }, 213 { NULL, -1 }}; 214 215 static void rctl_init(void); 216 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 217 218 static uma_zone_t rctl_rule_zone; 219 static uma_zone_t rctl_rule_link_zone; 220 221 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 222 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 223 224 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 225 226 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 227 { 228 int error, val = rctl_throttle_min; 229 230 error = sysctl_handle_int(oidp, &val, 0, req); 231 if (error || !req->newptr) 232 return (error); 233 if (val < 1 || val > rctl_throttle_max) 234 return (EINVAL); 235 236 RACCT_LOCK(); 237 rctl_throttle_min = val; 238 RACCT_UNLOCK(); 239 240 return (0); 241 } 242 243 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 244 { 245 int error, val = rctl_throttle_max; 246 247 error = sysctl_handle_int(oidp, &val, 0, req); 248 if (error || !req->newptr) 249 return (error); 250 if (val < rctl_throttle_min) 251 return (EINVAL); 252 253 RACCT_LOCK(); 254 rctl_throttle_max = val; 255 RACCT_UNLOCK(); 256 257 return (0); 258 } 259 260 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 261 { 262 int error, val = rctl_throttle_pct; 263 264 error = sysctl_handle_int(oidp, &val, 0, req); 265 if (error || !req->newptr) 266 return (error); 267 if (val < 0) 268 return (EINVAL); 269 270 RACCT_LOCK(); 271 rctl_throttle_pct = val; 272 RACCT_UNLOCK(); 273 274 return (0); 275 } 276 277 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 278 { 279 int error, val = rctl_throttle_pct2; 280 281 error = sysctl_handle_int(oidp, &val, 0, req); 282 if (error || !req->newptr) 283 return (error); 284 if (val < 0) 285 return (EINVAL); 286 287 RACCT_LOCK(); 288 rctl_throttle_pct2 = val; 289 RACCT_UNLOCK(); 290 291 return (0); 292 } 293 294 static const char * 295 rctl_subject_type_name(int subject) 296 { 297 int i; 298 299 for (i = 0; subjectnames[i].d_name != NULL; i++) { 300 if (subjectnames[i].d_value == subject) 301 return (subjectnames[i].d_name); 302 } 303 304 panic("rctl_subject_type_name: unknown subject type %d", subject); 305 } 306 307 static const char * 308 rctl_action_name(int action) 309 { 310 int i; 311 312 for (i = 0; actionnames[i].d_name != NULL; i++) { 313 if (actionnames[i].d_value == action) 314 return (actionnames[i].d_name); 315 } 316 317 panic("rctl_action_name: unknown action %d", action); 318 } 319 320 const char * 321 rctl_resource_name(int resource) 322 { 323 int i; 324 325 for (i = 0; resourcenames[i].d_name != NULL; i++) { 326 if (resourcenames[i].d_value == resource) 327 return (resourcenames[i].d_name); 328 } 329 330 panic("rctl_resource_name: unknown resource %d", resource); 331 } 332 333 static struct racct * 334 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 335 { 336 struct ucred *cred = p->p_ucred; 337 338 ASSERT_RACCT_ENABLED(); 339 RACCT_LOCK_ASSERT(); 340 341 switch (rule->rr_per) { 342 case RCTL_SUBJECT_TYPE_PROCESS: 343 return (p->p_racct); 344 case RCTL_SUBJECT_TYPE_USER: 345 return (cred->cr_ruidinfo->ui_racct); 346 case RCTL_SUBJECT_TYPE_LOGINCLASS: 347 return (cred->cr_loginclass->lc_racct); 348 case RCTL_SUBJECT_TYPE_JAIL: 349 return (cred->cr_prison->pr_prison_racct->prr_racct); 350 default: 351 panic("%s: unknown per %d", __func__, rule->rr_per); 352 } 353 } 354 355 /* 356 * Return the amount of resource that can be allocated by 'p' before 357 * hitting 'rule'. 358 */ 359 static int64_t 360 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 361 { 362 const struct racct *racct; 363 int64_t available; 364 365 ASSERT_RACCT_ENABLED(); 366 RACCT_LOCK_ASSERT(); 367 368 racct = rctl_proc_rule_to_racct(p, rule); 369 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 370 371 return (available); 372 } 373 374 /* 375 * Called every second for proc, uidinfo, loginclass, and jail containers. 376 * If the limit isn't exceeded, it decreases the usage amount to zero. 377 * Otherwise, it decreases it by the value of the limit. This way 378 * resource consumption exceeding the limit "carries over" to the next 379 * period. 380 */ 381 void 382 rctl_throttle_decay(struct racct *racct, int resource) 383 { 384 struct rctl_rule *rule; 385 struct rctl_rule_link *link; 386 int64_t minavailable; 387 388 ASSERT_RACCT_ENABLED(); 389 RACCT_LOCK_ASSERT(); 390 391 minavailable = INT64_MAX; 392 393 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 394 rule = link->rrl_rule; 395 396 if (rule->rr_resource != resource) 397 continue; 398 if (rule->rr_action != RCTL_ACTION_THROTTLE) 399 continue; 400 401 if (rule->rr_amount < minavailable) 402 minavailable = rule->rr_amount; 403 } 404 405 if (racct->r_resources[resource] < minavailable) { 406 racct->r_resources[resource] = 0; 407 } else { 408 /* 409 * Cap utilization counter at ten times the limit. Otherwise, 410 * if we changed the rule lowering the allowed amount, it could 411 * take unreasonably long time for the accumulated resource 412 * usage to drop. 413 */ 414 if (racct->r_resources[resource] > minavailable * 10) 415 racct->r_resources[resource] = minavailable * 10; 416 417 racct->r_resources[resource] -= minavailable; 418 } 419 } 420 421 /* 422 * Special version of rctl_get_available() for the %CPU resource. 423 * We slightly cheat here and return less than we normally would. 424 */ 425 int64_t 426 rctl_pcpu_available(const struct proc *p) { 427 struct rctl_rule *rule; 428 struct rctl_rule_link *link; 429 int64_t available, minavailable, limit; 430 431 ASSERT_RACCT_ENABLED(); 432 RACCT_LOCK_ASSERT(); 433 434 minavailable = INT64_MAX; 435 limit = 0; 436 437 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 438 rule = link->rrl_rule; 439 if (rule->rr_resource != RACCT_PCTCPU) 440 continue; 441 if (rule->rr_action != RCTL_ACTION_DENY) 442 continue; 443 available = rctl_available_resource(p, rule); 444 if (available < minavailable) { 445 minavailable = available; 446 limit = rule->rr_amount; 447 } 448 } 449 450 /* 451 * Return slightly less than actual value of the available 452 * %cpu resource. This makes %cpu throttling more aggressive 453 * and lets us act sooner than the limits are already exceeded. 454 */ 455 if (limit != 0) { 456 if (limit > 2 * RCTL_PCPU_SHIFT) 457 minavailable -= RCTL_PCPU_SHIFT; 458 else 459 minavailable -= (limit / 2); 460 } 461 462 return (minavailable); 463 } 464 465 static uint64_t 466 xadd(uint64_t a, uint64_t b) 467 { 468 uint64_t c; 469 470 c = a + b; 471 472 /* 473 * Detect overflow. 474 */ 475 if (c < a || c < b) 476 return (UINT64_MAX); 477 478 return (c); 479 } 480 481 static uint64_t 482 xmul(uint64_t a, uint64_t b) 483 { 484 485 if (b != 0 && a > UINT64_MAX / b) 486 return (UINT64_MAX); 487 488 return (a * b); 489 } 490 491 /* 492 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 493 * to what it keeps allocated now. Returns non-zero if the allocation should 494 * be denied, 0 otherwise. 495 */ 496 int 497 rctl_enforce(struct proc *p, int resource, uint64_t amount) 498 { 499 static struct timeval log_lasttime, devctl_lasttime; 500 static int log_curtime = 0, devctl_curtime = 0; 501 struct rctl_rule *rule; 502 struct rctl_rule_link *link; 503 struct sbuf sb; 504 char *buf; 505 int64_t available; 506 uint64_t sleep_ms, sleep_ratio; 507 int should_deny = 0; 508 509 ASSERT_RACCT_ENABLED(); 510 RACCT_LOCK_ASSERT(); 511 512 /* 513 * There may be more than one matching rule; go through all of them. 514 * Denial should be done last, after logging and sending signals. 515 */ 516 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 517 rule = link->rrl_rule; 518 if (rule->rr_resource != resource) 519 continue; 520 521 available = rctl_available_resource(p, rule); 522 if (available >= (int64_t)amount) { 523 link->rrl_exceeded = 0; 524 continue; 525 } 526 527 switch (rule->rr_action) { 528 case RCTL_ACTION_DENY: 529 should_deny = 1; 530 continue; 531 case RCTL_ACTION_LOG: 532 /* 533 * If rrl_exceeded != 0, it means we've already 534 * logged a warning for this process. 535 */ 536 if (link->rrl_exceeded != 0) 537 continue; 538 539 /* 540 * If the process state is not fully initialized yet, 541 * we can't access most of the required fields, e.g. 542 * p->p_comm. This happens when called from fork1(). 543 * Ignore this rule for now; it will be processed just 544 * after fork, when called from racct_proc_fork_done(). 545 */ 546 if (p->p_state != PRS_NORMAL) 547 continue; 548 549 if (!ppsratecheck(&log_lasttime, &log_curtime, 550 rctl_log_rate_limit)) 551 continue; 552 553 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 554 if (buf == NULL) { 555 printf("rctl_enforce: out of memory\n"); 556 continue; 557 } 558 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 559 rctl_rule_to_sbuf(&sb, rule); 560 sbuf_finish(&sb); 561 printf("rctl: rule \"%s\" matched by pid %d " 562 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 563 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 564 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 565 sbuf_delete(&sb); 566 free(buf, M_RCTL); 567 link->rrl_exceeded = 1; 568 continue; 569 case RCTL_ACTION_DEVCTL: 570 if (link->rrl_exceeded != 0) 571 continue; 572 573 if (p->p_state != PRS_NORMAL) 574 continue; 575 576 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 577 rctl_devctl_rate_limit)) 578 continue; 579 580 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 581 if (buf == NULL) { 582 printf("rctl_enforce: out of memory\n"); 583 continue; 584 } 585 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 586 sbuf_printf(&sb, "rule="); 587 rctl_rule_to_sbuf(&sb, rule); 588 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 589 p->p_pid, p->p_ucred->cr_ruid, 590 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 591 sbuf_finish(&sb); 592 devctl_notify("RCTL", "rule", "matched", 593 sbuf_data(&sb)); 594 sbuf_delete(&sb); 595 free(buf, M_RCTL); 596 link->rrl_exceeded = 1; 597 continue; 598 case RCTL_ACTION_THROTTLE: 599 if (p->p_state != PRS_NORMAL) 600 continue; 601 602 if (rule->rr_amount == 0) { 603 racct_proc_throttle(p, rctl_throttle_max); 604 continue; 605 } 606 607 /* 608 * Make the process sleep for a fraction of second 609 * proportional to the ratio of process' resource 610 * utilization compared to the limit. The point is 611 * to penalize resource hogs: processes that consume 612 * more of the available resources sleep for longer. 613 * 614 * We're trying to defer division until the very end, 615 * to minimize the rounding effects. The following 616 * calculation could have been written in a clearer 617 * way like this: 618 * 619 * sleep_ms = hz * p->p_racct->r_resources[resource] / 620 * rule->rr_amount; 621 * sleep_ms *= rctl_throttle_pct / 100; 622 * if (sleep_ms < rctl_throttle_min) 623 * sleep_ms = rctl_throttle_min; 624 * 625 */ 626 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 627 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 628 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 629 sleep_ms = rctl_throttle_min * rule->rr_amount; 630 631 /* 632 * Multiply that by the ratio of the resource 633 * consumption for the container compared to the limit, 634 * squared. In other words, a process in a container 635 * that is two times over the limit will be throttled 636 * four times as much for hitting the same rule. The 637 * point is to penalize processes more if the container 638 * itself (eg certain UID or jail) is above the limit. 639 */ 640 if (available < 0) 641 sleep_ratio = -available / rule->rr_amount; 642 else 643 sleep_ratio = 0; 644 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 645 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 646 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 647 648 /* 649 * Finally the division. 650 */ 651 sleep_ms /= rule->rr_amount; 652 653 if (sleep_ms > rctl_throttle_max) 654 sleep_ms = rctl_throttle_max; 655 #if 0 656 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 657 __func__, p->p_pid, p->p_comm, 658 p->p_racct->r_resources[resource], 659 rule->rr_amount, (uintmax_t)sleep_ms, 660 (uintmax_t)sleep_ratio, (intmax_t)available); 661 #endif 662 663 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 664 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 665 racct_proc_throttle(p, sleep_ms); 666 continue; 667 default: 668 if (link->rrl_exceeded != 0) 669 continue; 670 671 if (p->p_state != PRS_NORMAL) 672 continue; 673 674 KASSERT(rule->rr_action > 0 && 675 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 676 ("rctl_enforce: unknown action %d", 677 rule->rr_action)); 678 679 /* 680 * We're using the fact that RCTL_ACTION_SIG* values 681 * are equal to their counterparts from sys/signal.h. 682 */ 683 kern_psignal(p, rule->rr_action); 684 link->rrl_exceeded = 1; 685 continue; 686 } 687 } 688 689 if (should_deny) { 690 /* 691 * Return fake error code; the caller should change it 692 * into one proper for the situation - EFSIZ, ENOMEM etc. 693 */ 694 return (EDOOFUS); 695 } 696 697 return (0); 698 } 699 700 uint64_t 701 rctl_get_limit(struct proc *p, int resource) 702 { 703 struct rctl_rule *rule; 704 struct rctl_rule_link *link; 705 uint64_t amount = UINT64_MAX; 706 707 ASSERT_RACCT_ENABLED(); 708 RACCT_LOCK_ASSERT(); 709 710 /* 711 * There may be more than one matching rule; go through all of them. 712 * Denial should be done last, after logging and sending signals. 713 */ 714 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 715 rule = link->rrl_rule; 716 if (rule->rr_resource != resource) 717 continue; 718 if (rule->rr_action != RCTL_ACTION_DENY) 719 continue; 720 if (rule->rr_amount < amount) 721 amount = rule->rr_amount; 722 } 723 724 return (amount); 725 } 726 727 uint64_t 728 rctl_get_available(struct proc *p, int resource) 729 { 730 struct rctl_rule *rule; 731 struct rctl_rule_link *link; 732 int64_t available, minavailable, allocated; 733 734 minavailable = INT64_MAX; 735 736 ASSERT_RACCT_ENABLED(); 737 RACCT_LOCK_ASSERT(); 738 739 /* 740 * There may be more than one matching rule; go through all of them. 741 * Denial should be done last, after logging and sending signals. 742 */ 743 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 744 rule = link->rrl_rule; 745 if (rule->rr_resource != resource) 746 continue; 747 if (rule->rr_action != RCTL_ACTION_DENY) 748 continue; 749 available = rctl_available_resource(p, rule); 750 if (available < minavailable) 751 minavailable = available; 752 } 753 754 /* 755 * XXX: Think about this _hard_. 756 */ 757 allocated = p->p_racct->r_resources[resource]; 758 if (minavailable < INT64_MAX - allocated) 759 minavailable += allocated; 760 if (minavailable < 0) 761 minavailable = 0; 762 763 return (minavailable); 764 } 765 766 static int 767 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 768 { 769 770 ASSERT_RACCT_ENABLED(); 771 772 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 773 if (rule->rr_subject_type != filter->rr_subject_type) 774 return (0); 775 776 switch (filter->rr_subject_type) { 777 case RCTL_SUBJECT_TYPE_PROCESS: 778 if (filter->rr_subject.rs_proc != NULL && 779 rule->rr_subject.rs_proc != 780 filter->rr_subject.rs_proc) 781 return (0); 782 break; 783 case RCTL_SUBJECT_TYPE_USER: 784 if (filter->rr_subject.rs_uip != NULL && 785 rule->rr_subject.rs_uip != 786 filter->rr_subject.rs_uip) 787 return (0); 788 break; 789 case RCTL_SUBJECT_TYPE_LOGINCLASS: 790 if (filter->rr_subject.rs_loginclass != NULL && 791 rule->rr_subject.rs_loginclass != 792 filter->rr_subject.rs_loginclass) 793 return (0); 794 break; 795 case RCTL_SUBJECT_TYPE_JAIL: 796 if (filter->rr_subject.rs_prison_racct != NULL && 797 rule->rr_subject.rs_prison_racct != 798 filter->rr_subject.rs_prison_racct) 799 return (0); 800 break; 801 default: 802 panic("rctl_rule_matches: unknown subject type %d", 803 filter->rr_subject_type); 804 } 805 } 806 807 if (filter->rr_resource != RACCT_UNDEFINED) { 808 if (rule->rr_resource != filter->rr_resource) 809 return (0); 810 } 811 812 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 813 if (rule->rr_action != filter->rr_action) 814 return (0); 815 } 816 817 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 818 if (rule->rr_amount != filter->rr_amount) 819 return (0); 820 } 821 822 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 823 if (rule->rr_per != filter->rr_per) 824 return (0); 825 } 826 827 return (1); 828 } 829 830 static int 831 str2value(const char *str, int *value, struct dict *table) 832 { 833 int i; 834 835 if (value == NULL) 836 return (EINVAL); 837 838 for (i = 0; table[i].d_name != NULL; i++) { 839 if (strcasecmp(table[i].d_name, str) == 0) { 840 *value = table[i].d_value; 841 return (0); 842 } 843 } 844 845 return (EINVAL); 846 } 847 848 static int 849 str2id(const char *str, id_t *value) 850 { 851 char *end; 852 853 if (str == NULL) 854 return (EINVAL); 855 856 *value = strtoul(str, &end, 10); 857 if ((size_t)(end - str) != strlen(str)) 858 return (EINVAL); 859 860 return (0); 861 } 862 863 static int 864 str2int64(const char *str, int64_t *value) 865 { 866 char *end; 867 868 if (str == NULL) 869 return (EINVAL); 870 871 *value = strtoul(str, &end, 10); 872 if ((size_t)(end - str) != strlen(str)) 873 return (EINVAL); 874 875 if (*value < 0) 876 return (ERANGE); 877 878 return (0); 879 } 880 881 /* 882 * Connect the rule to the racct, increasing refcount for the rule. 883 */ 884 static void 885 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 886 { 887 struct rctl_rule_link *link; 888 889 ASSERT_RACCT_ENABLED(); 890 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 891 892 rctl_rule_acquire(rule); 893 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 894 link->rrl_rule = rule; 895 link->rrl_exceeded = 0; 896 897 RACCT_LOCK(); 898 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 899 RACCT_UNLOCK(); 900 } 901 902 static int 903 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 904 { 905 struct rctl_rule_link *link; 906 907 ASSERT_RACCT_ENABLED(); 908 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 909 RACCT_LOCK_ASSERT(); 910 911 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 912 if (link == NULL) 913 return (ENOMEM); 914 rctl_rule_acquire(rule); 915 link->rrl_rule = rule; 916 link->rrl_exceeded = 0; 917 918 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 919 920 return (0); 921 } 922 923 /* 924 * Remove limits for a rules matching the filter and release 925 * the refcounts for the rules, possibly freeing them. Returns 926 * the number of limit structures removed. 927 */ 928 static int 929 rctl_racct_remove_rules(struct racct *racct, 930 const struct rctl_rule *filter) 931 { 932 struct rctl_rule_link *link, *linktmp; 933 int removed = 0; 934 935 ASSERT_RACCT_ENABLED(); 936 RACCT_LOCK_ASSERT(); 937 938 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 939 if (!rctl_rule_matches(link->rrl_rule, filter)) 940 continue; 941 942 LIST_REMOVE(link, rrl_next); 943 rctl_rule_release(link->rrl_rule); 944 uma_zfree(rctl_rule_link_zone, link); 945 removed++; 946 } 947 return (removed); 948 } 949 950 static void 951 rctl_rule_acquire_subject(struct rctl_rule *rule) 952 { 953 954 ASSERT_RACCT_ENABLED(); 955 956 switch (rule->rr_subject_type) { 957 case RCTL_SUBJECT_TYPE_UNDEFINED: 958 case RCTL_SUBJECT_TYPE_PROCESS: 959 break; 960 case RCTL_SUBJECT_TYPE_JAIL: 961 if (rule->rr_subject.rs_prison_racct != NULL) 962 prison_racct_hold(rule->rr_subject.rs_prison_racct); 963 break; 964 case RCTL_SUBJECT_TYPE_USER: 965 if (rule->rr_subject.rs_uip != NULL) 966 uihold(rule->rr_subject.rs_uip); 967 break; 968 case RCTL_SUBJECT_TYPE_LOGINCLASS: 969 if (rule->rr_subject.rs_loginclass != NULL) 970 loginclass_hold(rule->rr_subject.rs_loginclass); 971 break; 972 default: 973 panic("rctl_rule_acquire_subject: unknown subject type %d", 974 rule->rr_subject_type); 975 } 976 } 977 978 static void 979 rctl_rule_release_subject(struct rctl_rule *rule) 980 { 981 982 ASSERT_RACCT_ENABLED(); 983 984 switch (rule->rr_subject_type) { 985 case RCTL_SUBJECT_TYPE_UNDEFINED: 986 case RCTL_SUBJECT_TYPE_PROCESS: 987 break; 988 case RCTL_SUBJECT_TYPE_JAIL: 989 if (rule->rr_subject.rs_prison_racct != NULL) 990 prison_racct_free(rule->rr_subject.rs_prison_racct); 991 break; 992 case RCTL_SUBJECT_TYPE_USER: 993 if (rule->rr_subject.rs_uip != NULL) 994 uifree(rule->rr_subject.rs_uip); 995 break; 996 case RCTL_SUBJECT_TYPE_LOGINCLASS: 997 if (rule->rr_subject.rs_loginclass != NULL) 998 loginclass_free(rule->rr_subject.rs_loginclass); 999 break; 1000 default: 1001 panic("rctl_rule_release_subject: unknown subject type %d", 1002 rule->rr_subject_type); 1003 } 1004 } 1005 1006 struct rctl_rule * 1007 rctl_rule_alloc(int flags) 1008 { 1009 struct rctl_rule *rule; 1010 1011 ASSERT_RACCT_ENABLED(); 1012 1013 rule = uma_zalloc(rctl_rule_zone, flags); 1014 if (rule == NULL) 1015 return (NULL); 1016 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1017 rule->rr_subject.rs_proc = NULL; 1018 rule->rr_subject.rs_uip = NULL; 1019 rule->rr_subject.rs_loginclass = NULL; 1020 rule->rr_subject.rs_prison_racct = NULL; 1021 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1022 rule->rr_resource = RACCT_UNDEFINED; 1023 rule->rr_action = RCTL_ACTION_UNDEFINED; 1024 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1025 refcount_init(&rule->rr_refcount, 1); 1026 1027 return (rule); 1028 } 1029 1030 struct rctl_rule * 1031 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1032 { 1033 struct rctl_rule *copy; 1034 1035 ASSERT_RACCT_ENABLED(); 1036 1037 copy = uma_zalloc(rctl_rule_zone, flags); 1038 if (copy == NULL) 1039 return (NULL); 1040 copy->rr_subject_type = rule->rr_subject_type; 1041 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1042 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1043 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1044 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1045 copy->rr_per = rule->rr_per; 1046 copy->rr_resource = rule->rr_resource; 1047 copy->rr_action = rule->rr_action; 1048 copy->rr_amount = rule->rr_amount; 1049 refcount_init(©->rr_refcount, 1); 1050 rctl_rule_acquire_subject(copy); 1051 1052 return (copy); 1053 } 1054 1055 void 1056 rctl_rule_acquire(struct rctl_rule *rule) 1057 { 1058 1059 ASSERT_RACCT_ENABLED(); 1060 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1061 1062 refcount_acquire(&rule->rr_refcount); 1063 } 1064 1065 static void 1066 rctl_rule_free(void *context, int pending) 1067 { 1068 struct rctl_rule *rule; 1069 1070 rule = (struct rctl_rule *)context; 1071 1072 ASSERT_RACCT_ENABLED(); 1073 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1074 1075 /* 1076 * We don't need locking here; rule is guaranteed to be inaccessible. 1077 */ 1078 1079 rctl_rule_release_subject(rule); 1080 uma_zfree(rctl_rule_zone, rule); 1081 } 1082 1083 void 1084 rctl_rule_release(struct rctl_rule *rule) 1085 { 1086 1087 ASSERT_RACCT_ENABLED(); 1088 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1089 1090 if (refcount_release(&rule->rr_refcount)) { 1091 /* 1092 * rctl_rule_release() is often called when iterating 1093 * over all the uidinfo structures in the system, 1094 * holding uihashtbl_lock. Since rctl_rule_free() 1095 * might end up calling uifree(), this would lead 1096 * to lock recursion. Use taskqueue to avoid this. 1097 */ 1098 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1099 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1100 } 1101 } 1102 1103 static int 1104 rctl_rule_fully_specified(const struct rctl_rule *rule) 1105 { 1106 1107 ASSERT_RACCT_ENABLED(); 1108 1109 switch (rule->rr_subject_type) { 1110 case RCTL_SUBJECT_TYPE_UNDEFINED: 1111 return (0); 1112 case RCTL_SUBJECT_TYPE_PROCESS: 1113 if (rule->rr_subject.rs_proc == NULL) 1114 return (0); 1115 break; 1116 case RCTL_SUBJECT_TYPE_USER: 1117 if (rule->rr_subject.rs_uip == NULL) 1118 return (0); 1119 break; 1120 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1121 if (rule->rr_subject.rs_loginclass == NULL) 1122 return (0); 1123 break; 1124 case RCTL_SUBJECT_TYPE_JAIL: 1125 if (rule->rr_subject.rs_prison_racct == NULL) 1126 return (0); 1127 break; 1128 default: 1129 panic("rctl_rule_fully_specified: unknown subject type %d", 1130 rule->rr_subject_type); 1131 } 1132 if (rule->rr_resource == RACCT_UNDEFINED) 1133 return (0); 1134 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1135 return (0); 1136 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1137 return (0); 1138 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1139 return (0); 1140 1141 return (1); 1142 } 1143 1144 static int 1145 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1146 { 1147 struct rctl_rule *rule; 1148 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1149 *amountstr, *perstr; 1150 id_t id; 1151 int error = 0; 1152 1153 ASSERT_RACCT_ENABLED(); 1154 1155 rule = rctl_rule_alloc(M_WAITOK); 1156 1157 subjectstr = strsep(&rulestr, ":"); 1158 subject_idstr = strsep(&rulestr, ":"); 1159 resourcestr = strsep(&rulestr, ":"); 1160 actionstr = strsep(&rulestr, "=/"); 1161 amountstr = strsep(&rulestr, "/"); 1162 perstr = rulestr; 1163 1164 if (subjectstr == NULL || subjectstr[0] == '\0') 1165 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1166 else { 1167 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1168 if (error != 0) 1169 goto out; 1170 } 1171 1172 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1173 rule->rr_subject.rs_proc = NULL; 1174 rule->rr_subject.rs_uip = NULL; 1175 rule->rr_subject.rs_loginclass = NULL; 1176 rule->rr_subject.rs_prison_racct = NULL; 1177 } else { 1178 switch (rule->rr_subject_type) { 1179 case RCTL_SUBJECT_TYPE_UNDEFINED: 1180 error = EINVAL; 1181 goto out; 1182 case RCTL_SUBJECT_TYPE_PROCESS: 1183 error = str2id(subject_idstr, &id); 1184 if (error != 0) 1185 goto out; 1186 sx_assert(&allproc_lock, SA_LOCKED); 1187 rule->rr_subject.rs_proc = pfind(id); 1188 if (rule->rr_subject.rs_proc == NULL) { 1189 error = ESRCH; 1190 goto out; 1191 } 1192 PROC_UNLOCK(rule->rr_subject.rs_proc); 1193 break; 1194 case RCTL_SUBJECT_TYPE_USER: 1195 error = str2id(subject_idstr, &id); 1196 if (error != 0) 1197 goto out; 1198 rule->rr_subject.rs_uip = uifind(id); 1199 break; 1200 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1201 rule->rr_subject.rs_loginclass = 1202 loginclass_find(subject_idstr); 1203 if (rule->rr_subject.rs_loginclass == NULL) { 1204 error = ENAMETOOLONG; 1205 goto out; 1206 } 1207 break; 1208 case RCTL_SUBJECT_TYPE_JAIL: 1209 rule->rr_subject.rs_prison_racct = 1210 prison_racct_find(subject_idstr); 1211 if (rule->rr_subject.rs_prison_racct == NULL) { 1212 error = ENAMETOOLONG; 1213 goto out; 1214 } 1215 break; 1216 default: 1217 panic("rctl_string_to_rule: unknown subject type %d", 1218 rule->rr_subject_type); 1219 } 1220 } 1221 1222 if (resourcestr == NULL || resourcestr[0] == '\0') 1223 rule->rr_resource = RACCT_UNDEFINED; 1224 else { 1225 error = str2value(resourcestr, &rule->rr_resource, 1226 resourcenames); 1227 if (error != 0) 1228 goto out; 1229 } 1230 1231 if (actionstr == NULL || actionstr[0] == '\0') 1232 rule->rr_action = RCTL_ACTION_UNDEFINED; 1233 else { 1234 error = str2value(actionstr, &rule->rr_action, actionnames); 1235 if (error != 0) 1236 goto out; 1237 } 1238 1239 if (amountstr == NULL || amountstr[0] == '\0') 1240 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1241 else { 1242 error = str2int64(amountstr, &rule->rr_amount); 1243 if (error != 0) 1244 goto out; 1245 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1246 if (rule->rr_amount > INT64_MAX / 1000000) { 1247 error = ERANGE; 1248 goto out; 1249 } 1250 rule->rr_amount *= 1000000; 1251 } 1252 } 1253 1254 if (perstr == NULL || perstr[0] == '\0') 1255 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1256 else { 1257 error = str2value(perstr, &rule->rr_per, subjectnames); 1258 if (error != 0) 1259 goto out; 1260 } 1261 1262 out: 1263 if (error == 0) 1264 *rulep = rule; 1265 else 1266 rctl_rule_release(rule); 1267 1268 return (error); 1269 } 1270 1271 /* 1272 * Link a rule with all the subjects it applies to. 1273 */ 1274 int 1275 rctl_rule_add(struct rctl_rule *rule) 1276 { 1277 struct proc *p; 1278 struct ucred *cred; 1279 struct uidinfo *uip; 1280 struct prison *pr; 1281 struct prison_racct *prr; 1282 struct loginclass *lc; 1283 struct rctl_rule *rule2; 1284 int match; 1285 1286 ASSERT_RACCT_ENABLED(); 1287 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1288 1289 /* 1290 * Some rules just don't make sense, like "deny" rule for an undeniable 1291 * resource. The exception are the RSS and %CPU resources - they are 1292 * not deniable in the racct sense, but the limit is enforced in 1293 * a different way. 1294 */ 1295 if (rule->rr_action == RCTL_ACTION_DENY && 1296 !RACCT_IS_DENIABLE(rule->rr_resource) && 1297 rule->rr_resource != RACCT_RSS && 1298 rule->rr_resource != RACCT_PCTCPU) { 1299 return (EOPNOTSUPP); 1300 } 1301 1302 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1303 !RACCT_IS_DECAYING(rule->rr_resource)) { 1304 return (EOPNOTSUPP); 1305 } 1306 1307 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1308 rule->rr_resource == RACCT_PCTCPU) { 1309 return (EOPNOTSUPP); 1310 } 1311 1312 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1313 RACCT_IS_SLOPPY(rule->rr_resource)) { 1314 return (EOPNOTSUPP); 1315 } 1316 1317 /* 1318 * Make sure there are no duplicated rules. Also, for the "deny" 1319 * rules, remove ones differing only by "amount". 1320 */ 1321 if (rule->rr_action == RCTL_ACTION_DENY) { 1322 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1323 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1324 rctl_rule_remove(rule2); 1325 rctl_rule_release(rule2); 1326 } else 1327 rctl_rule_remove(rule); 1328 1329 switch (rule->rr_subject_type) { 1330 case RCTL_SUBJECT_TYPE_PROCESS: 1331 p = rule->rr_subject.rs_proc; 1332 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1333 1334 rctl_racct_add_rule(p->p_racct, rule); 1335 /* 1336 * In case of per-process rule, we don't have anything more 1337 * to do. 1338 */ 1339 return (0); 1340 1341 case RCTL_SUBJECT_TYPE_USER: 1342 uip = rule->rr_subject.rs_uip; 1343 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1344 rctl_racct_add_rule(uip->ui_racct, rule); 1345 break; 1346 1347 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1348 lc = rule->rr_subject.rs_loginclass; 1349 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1350 rctl_racct_add_rule(lc->lc_racct, rule); 1351 break; 1352 1353 case RCTL_SUBJECT_TYPE_JAIL: 1354 prr = rule->rr_subject.rs_prison_racct; 1355 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1356 rctl_racct_add_rule(prr->prr_racct, rule); 1357 break; 1358 1359 default: 1360 panic("rctl_rule_add: unknown subject type %d", 1361 rule->rr_subject_type); 1362 } 1363 1364 /* 1365 * Now go through all the processes and add the new rule to the ones 1366 * it applies to. 1367 */ 1368 sx_assert(&allproc_lock, SA_LOCKED); 1369 FOREACH_PROC_IN_SYSTEM(p) { 1370 cred = p->p_ucred; 1371 switch (rule->rr_subject_type) { 1372 case RCTL_SUBJECT_TYPE_USER: 1373 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1374 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1375 break; 1376 continue; 1377 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1378 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1379 break; 1380 continue; 1381 case RCTL_SUBJECT_TYPE_JAIL: 1382 match = 0; 1383 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1384 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1385 match = 1; 1386 break; 1387 } 1388 } 1389 if (match) 1390 break; 1391 continue; 1392 default: 1393 panic("rctl_rule_add: unknown subject type %d", 1394 rule->rr_subject_type); 1395 } 1396 1397 rctl_racct_add_rule(p->p_racct, rule); 1398 } 1399 1400 return (0); 1401 } 1402 1403 static void 1404 rctl_rule_pre_callback(void) 1405 { 1406 1407 RACCT_LOCK(); 1408 } 1409 1410 static void 1411 rctl_rule_post_callback(void) 1412 { 1413 1414 RACCT_UNLOCK(); 1415 } 1416 1417 static void 1418 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1419 { 1420 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1421 int found = 0; 1422 1423 ASSERT_RACCT_ENABLED(); 1424 RACCT_LOCK_ASSERT(); 1425 1426 found += rctl_racct_remove_rules(racct, filter); 1427 1428 *((int *)arg3) += found; 1429 } 1430 1431 /* 1432 * Remove all rules that match the filter. 1433 */ 1434 int 1435 rctl_rule_remove(struct rctl_rule *filter) 1436 { 1437 struct proc *p; 1438 int found = 0; 1439 1440 ASSERT_RACCT_ENABLED(); 1441 1442 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1443 filter->rr_subject.rs_proc != NULL) { 1444 p = filter->rr_subject.rs_proc; 1445 RACCT_LOCK(); 1446 found = rctl_racct_remove_rules(p->p_racct, filter); 1447 RACCT_UNLOCK(); 1448 if (found) 1449 return (0); 1450 return (ESRCH); 1451 } 1452 1453 loginclass_racct_foreach(rctl_rule_remove_callback, 1454 rctl_rule_pre_callback, rctl_rule_post_callback, 1455 filter, (void *)&found); 1456 ui_racct_foreach(rctl_rule_remove_callback, 1457 rctl_rule_pre_callback, rctl_rule_post_callback, 1458 filter, (void *)&found); 1459 prison_racct_foreach(rctl_rule_remove_callback, 1460 rctl_rule_pre_callback, rctl_rule_post_callback, 1461 filter, (void *)&found); 1462 1463 sx_assert(&allproc_lock, SA_LOCKED); 1464 RACCT_LOCK(); 1465 FOREACH_PROC_IN_SYSTEM(p) { 1466 found += rctl_racct_remove_rules(p->p_racct, filter); 1467 } 1468 RACCT_UNLOCK(); 1469 1470 if (found) 1471 return (0); 1472 return (ESRCH); 1473 } 1474 1475 /* 1476 * Appends a rule to the sbuf. 1477 */ 1478 static void 1479 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1480 { 1481 int64_t amount; 1482 1483 ASSERT_RACCT_ENABLED(); 1484 1485 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1486 1487 switch (rule->rr_subject_type) { 1488 case RCTL_SUBJECT_TYPE_PROCESS: 1489 if (rule->rr_subject.rs_proc == NULL) 1490 sbuf_printf(sb, ":"); 1491 else 1492 sbuf_printf(sb, "%d:", 1493 rule->rr_subject.rs_proc->p_pid); 1494 break; 1495 case RCTL_SUBJECT_TYPE_USER: 1496 if (rule->rr_subject.rs_uip == NULL) 1497 sbuf_printf(sb, ":"); 1498 else 1499 sbuf_printf(sb, "%d:", 1500 rule->rr_subject.rs_uip->ui_uid); 1501 break; 1502 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1503 if (rule->rr_subject.rs_loginclass == NULL) 1504 sbuf_printf(sb, ":"); 1505 else 1506 sbuf_printf(sb, "%s:", 1507 rule->rr_subject.rs_loginclass->lc_name); 1508 break; 1509 case RCTL_SUBJECT_TYPE_JAIL: 1510 if (rule->rr_subject.rs_prison_racct == NULL) 1511 sbuf_printf(sb, ":"); 1512 else 1513 sbuf_printf(sb, "%s:", 1514 rule->rr_subject.rs_prison_racct->prr_name); 1515 break; 1516 default: 1517 panic("rctl_rule_to_sbuf: unknown subject type %d", 1518 rule->rr_subject_type); 1519 } 1520 1521 amount = rule->rr_amount; 1522 if (amount != RCTL_AMOUNT_UNDEFINED && 1523 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1524 amount /= 1000000; 1525 1526 sbuf_printf(sb, "%s:%s=%jd", 1527 rctl_resource_name(rule->rr_resource), 1528 rctl_action_name(rule->rr_action), 1529 amount); 1530 1531 if (rule->rr_per != rule->rr_subject_type) 1532 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1533 } 1534 1535 /* 1536 * Routine used by RCTL syscalls to read in input string. 1537 */ 1538 static int 1539 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1540 { 1541 char *str; 1542 int error; 1543 1544 ASSERT_RACCT_ENABLED(); 1545 1546 if (inbuflen <= 0) 1547 return (EINVAL); 1548 if (inbuflen > RCTL_MAX_INBUFSIZE) 1549 return (E2BIG); 1550 1551 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1552 error = copyinstr(inbufp, str, inbuflen, NULL); 1553 if (error != 0) { 1554 free(str, M_RCTL); 1555 return (error); 1556 } 1557 1558 *inputstr = str; 1559 1560 return (0); 1561 } 1562 1563 /* 1564 * Routine used by RCTL syscalls to write out output string. 1565 */ 1566 static int 1567 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1568 { 1569 int error; 1570 1571 ASSERT_RACCT_ENABLED(); 1572 1573 if (outputsbuf == NULL) 1574 return (0); 1575 1576 sbuf_finish(outputsbuf); 1577 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1578 sbuf_delete(outputsbuf); 1579 return (ERANGE); 1580 } 1581 error = copyout(sbuf_data(outputsbuf), outbufp, 1582 sbuf_len(outputsbuf) + 1); 1583 sbuf_delete(outputsbuf); 1584 return (error); 1585 } 1586 1587 static struct sbuf * 1588 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1589 { 1590 struct sbuf *sb; 1591 int64_t amount; 1592 int i; 1593 1594 ASSERT_RACCT_ENABLED(); 1595 1596 sb = sbuf_new_auto(); 1597 for (i = 0; i <= RACCT_MAX; i++) { 1598 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1599 continue; 1600 RACCT_LOCK(); 1601 amount = racct->r_resources[i]; 1602 RACCT_UNLOCK(); 1603 if (RACCT_IS_IN_MILLIONS(i)) 1604 amount /= 1000000; 1605 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1606 } 1607 sbuf_setpos(sb, sbuf_len(sb) - 1); 1608 return (sb); 1609 } 1610 1611 int 1612 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1613 { 1614 struct rctl_rule *filter; 1615 struct sbuf *outputsbuf = NULL; 1616 struct proc *p; 1617 struct uidinfo *uip; 1618 struct loginclass *lc; 1619 struct prison_racct *prr; 1620 char *inputstr; 1621 int error; 1622 1623 if (!racct_enable) 1624 return (ENOSYS); 1625 1626 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1627 if (error != 0) 1628 return (error); 1629 1630 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1631 if (error != 0) 1632 return (error); 1633 1634 sx_slock(&allproc_lock); 1635 error = rctl_string_to_rule(inputstr, &filter); 1636 free(inputstr, M_RCTL); 1637 if (error != 0) { 1638 sx_sunlock(&allproc_lock); 1639 return (error); 1640 } 1641 1642 switch (filter->rr_subject_type) { 1643 case RCTL_SUBJECT_TYPE_PROCESS: 1644 p = filter->rr_subject.rs_proc; 1645 if (p == NULL) { 1646 error = EINVAL; 1647 goto out; 1648 } 1649 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1650 break; 1651 case RCTL_SUBJECT_TYPE_USER: 1652 uip = filter->rr_subject.rs_uip; 1653 if (uip == NULL) { 1654 error = EINVAL; 1655 goto out; 1656 } 1657 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1658 break; 1659 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1660 lc = filter->rr_subject.rs_loginclass; 1661 if (lc == NULL) { 1662 error = EINVAL; 1663 goto out; 1664 } 1665 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1666 break; 1667 case RCTL_SUBJECT_TYPE_JAIL: 1668 prr = filter->rr_subject.rs_prison_racct; 1669 if (prr == NULL) { 1670 error = EINVAL; 1671 goto out; 1672 } 1673 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1674 break; 1675 default: 1676 error = EINVAL; 1677 } 1678 out: 1679 rctl_rule_release(filter); 1680 sx_sunlock(&allproc_lock); 1681 if (error != 0) 1682 return (error); 1683 1684 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1685 1686 return (error); 1687 } 1688 1689 static void 1690 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1691 { 1692 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1693 struct rctl_rule_link *link; 1694 struct sbuf *sb = (struct sbuf *)arg3; 1695 1696 ASSERT_RACCT_ENABLED(); 1697 RACCT_LOCK_ASSERT(); 1698 1699 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1700 if (!rctl_rule_matches(link->rrl_rule, filter)) 1701 continue; 1702 rctl_rule_to_sbuf(sb, link->rrl_rule); 1703 sbuf_printf(sb, ","); 1704 } 1705 } 1706 1707 int 1708 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1709 { 1710 struct sbuf *sb; 1711 struct rctl_rule *filter; 1712 struct rctl_rule_link *link; 1713 struct proc *p; 1714 char *inputstr, *buf; 1715 size_t bufsize; 1716 int error; 1717 1718 if (!racct_enable) 1719 return (ENOSYS); 1720 1721 error = priv_check(td, PRIV_RCTL_GET_RULES); 1722 if (error != 0) 1723 return (error); 1724 1725 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1726 if (error != 0) 1727 return (error); 1728 1729 sx_slock(&allproc_lock); 1730 error = rctl_string_to_rule(inputstr, &filter); 1731 free(inputstr, M_RCTL); 1732 if (error != 0) { 1733 sx_sunlock(&allproc_lock); 1734 return (error); 1735 } 1736 1737 bufsize = uap->outbuflen; 1738 if (bufsize > rctl_maxbufsize) { 1739 sx_sunlock(&allproc_lock); 1740 return (E2BIG); 1741 } 1742 1743 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1744 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1745 KASSERT(sb != NULL, ("sbuf_new failed")); 1746 1747 FOREACH_PROC_IN_SYSTEM(p) { 1748 RACCT_LOCK(); 1749 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1750 /* 1751 * Non-process rules will be added to the buffer later. 1752 * Adding them here would result in duplicated output. 1753 */ 1754 if (link->rrl_rule->rr_subject_type != 1755 RCTL_SUBJECT_TYPE_PROCESS) 1756 continue; 1757 if (!rctl_rule_matches(link->rrl_rule, filter)) 1758 continue; 1759 rctl_rule_to_sbuf(sb, link->rrl_rule); 1760 sbuf_printf(sb, ","); 1761 } 1762 RACCT_UNLOCK(); 1763 } 1764 1765 loginclass_racct_foreach(rctl_get_rules_callback, 1766 rctl_rule_pre_callback, rctl_rule_post_callback, 1767 filter, sb); 1768 ui_racct_foreach(rctl_get_rules_callback, 1769 rctl_rule_pre_callback, rctl_rule_post_callback, 1770 filter, sb); 1771 prison_racct_foreach(rctl_get_rules_callback, 1772 rctl_rule_pre_callback, rctl_rule_post_callback, 1773 filter, sb); 1774 if (sbuf_error(sb) == ENOMEM) { 1775 error = ERANGE; 1776 goto out; 1777 } 1778 1779 /* 1780 * Remove trailing ",". 1781 */ 1782 if (sbuf_len(sb) > 0) 1783 sbuf_setpos(sb, sbuf_len(sb) - 1); 1784 1785 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1786 out: 1787 rctl_rule_release(filter); 1788 sx_sunlock(&allproc_lock); 1789 free(buf, M_RCTL); 1790 return (error); 1791 } 1792 1793 int 1794 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1795 { 1796 struct sbuf *sb; 1797 struct rctl_rule *filter; 1798 struct rctl_rule_link *link; 1799 char *inputstr, *buf; 1800 size_t bufsize; 1801 int error; 1802 1803 if (!racct_enable) 1804 return (ENOSYS); 1805 1806 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1807 if (error != 0) 1808 return (error); 1809 1810 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1811 if (error != 0) 1812 return (error); 1813 1814 sx_slock(&allproc_lock); 1815 error = rctl_string_to_rule(inputstr, &filter); 1816 free(inputstr, M_RCTL); 1817 if (error != 0) { 1818 sx_sunlock(&allproc_lock); 1819 return (error); 1820 } 1821 1822 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1823 rctl_rule_release(filter); 1824 sx_sunlock(&allproc_lock); 1825 return (EINVAL); 1826 } 1827 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1828 rctl_rule_release(filter); 1829 sx_sunlock(&allproc_lock); 1830 return (EOPNOTSUPP); 1831 } 1832 if (filter->rr_subject.rs_proc == NULL) { 1833 rctl_rule_release(filter); 1834 sx_sunlock(&allproc_lock); 1835 return (EINVAL); 1836 } 1837 1838 bufsize = uap->outbuflen; 1839 if (bufsize > rctl_maxbufsize) { 1840 rctl_rule_release(filter); 1841 sx_sunlock(&allproc_lock); 1842 return (E2BIG); 1843 } 1844 1845 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1846 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1847 KASSERT(sb != NULL, ("sbuf_new failed")); 1848 1849 RACCT_LOCK(); 1850 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1851 rrl_next) { 1852 rctl_rule_to_sbuf(sb, link->rrl_rule); 1853 sbuf_printf(sb, ","); 1854 } 1855 RACCT_UNLOCK(); 1856 if (sbuf_error(sb) == ENOMEM) { 1857 error = ERANGE; 1858 sbuf_delete(sb); 1859 goto out; 1860 } 1861 1862 /* 1863 * Remove trailing ",". 1864 */ 1865 if (sbuf_len(sb) > 0) 1866 sbuf_setpos(sb, sbuf_len(sb) - 1); 1867 1868 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1869 out: 1870 rctl_rule_release(filter); 1871 sx_sunlock(&allproc_lock); 1872 free(buf, M_RCTL); 1873 return (error); 1874 } 1875 1876 int 1877 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1878 { 1879 struct rctl_rule *rule; 1880 char *inputstr; 1881 int error; 1882 1883 if (!racct_enable) 1884 return (ENOSYS); 1885 1886 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1887 if (error != 0) 1888 return (error); 1889 1890 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1891 if (error != 0) 1892 return (error); 1893 1894 sx_slock(&allproc_lock); 1895 error = rctl_string_to_rule(inputstr, &rule); 1896 free(inputstr, M_RCTL); 1897 if (error != 0) { 1898 sx_sunlock(&allproc_lock); 1899 return (error); 1900 } 1901 /* 1902 * The 'per' part of a rule is optional. 1903 */ 1904 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1905 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1906 rule->rr_per = rule->rr_subject_type; 1907 1908 if (!rctl_rule_fully_specified(rule)) { 1909 error = EINVAL; 1910 goto out; 1911 } 1912 1913 error = rctl_rule_add(rule); 1914 1915 out: 1916 rctl_rule_release(rule); 1917 sx_sunlock(&allproc_lock); 1918 return (error); 1919 } 1920 1921 int 1922 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1923 { 1924 struct rctl_rule *filter; 1925 char *inputstr; 1926 int error; 1927 1928 if (!racct_enable) 1929 return (ENOSYS); 1930 1931 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1932 if (error != 0) 1933 return (error); 1934 1935 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1936 if (error != 0) 1937 return (error); 1938 1939 sx_slock(&allproc_lock); 1940 error = rctl_string_to_rule(inputstr, &filter); 1941 free(inputstr, M_RCTL); 1942 if (error != 0) { 1943 sx_sunlock(&allproc_lock); 1944 return (error); 1945 } 1946 1947 error = rctl_rule_remove(filter); 1948 rctl_rule_release(filter); 1949 sx_sunlock(&allproc_lock); 1950 1951 return (error); 1952 } 1953 1954 /* 1955 * Update RCTL rule list after credential change. 1956 */ 1957 void 1958 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1959 { 1960 LIST_HEAD(, rctl_rule_link) newrules; 1961 struct rctl_rule_link *link, *newlink; 1962 struct uidinfo *newuip; 1963 struct loginclass *newlc; 1964 struct prison_racct *newprr; 1965 int rulecnt, i; 1966 1967 if (!racct_enable) 1968 return; 1969 1970 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1971 1972 newuip = newcred->cr_ruidinfo; 1973 newlc = newcred->cr_loginclass; 1974 newprr = newcred->cr_prison->pr_prison_racct; 1975 1976 LIST_INIT(&newrules); 1977 1978 again: 1979 /* 1980 * First, count the rules that apply to the process with new 1981 * credentials. 1982 */ 1983 rulecnt = 0; 1984 RACCT_LOCK(); 1985 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1986 if (link->rrl_rule->rr_subject_type == 1987 RCTL_SUBJECT_TYPE_PROCESS) 1988 rulecnt++; 1989 } 1990 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1991 rulecnt++; 1992 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1993 rulecnt++; 1994 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1995 rulecnt++; 1996 RACCT_UNLOCK(); 1997 1998 /* 1999 * Create temporary list. We've dropped the rctl_lock in order 2000 * to use M_WAITOK. 2001 */ 2002 for (i = 0; i < rulecnt; i++) { 2003 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2004 newlink->rrl_rule = NULL; 2005 newlink->rrl_exceeded = 0; 2006 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2007 } 2008 2009 newlink = LIST_FIRST(&newrules); 2010 2011 /* 2012 * Assign rules to the newly allocated list entries. 2013 */ 2014 RACCT_LOCK(); 2015 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2016 if (link->rrl_rule->rr_subject_type == 2017 RCTL_SUBJECT_TYPE_PROCESS) { 2018 if (newlink == NULL) 2019 goto goaround; 2020 rctl_rule_acquire(link->rrl_rule); 2021 newlink->rrl_rule = link->rrl_rule; 2022 newlink->rrl_exceeded = link->rrl_exceeded; 2023 newlink = LIST_NEXT(newlink, rrl_next); 2024 rulecnt--; 2025 } 2026 } 2027 2028 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2029 if (newlink == NULL) 2030 goto goaround; 2031 rctl_rule_acquire(link->rrl_rule); 2032 newlink->rrl_rule = link->rrl_rule; 2033 newlink->rrl_exceeded = link->rrl_exceeded; 2034 newlink = LIST_NEXT(newlink, rrl_next); 2035 rulecnt--; 2036 } 2037 2038 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2039 if (newlink == NULL) 2040 goto goaround; 2041 rctl_rule_acquire(link->rrl_rule); 2042 newlink->rrl_rule = link->rrl_rule; 2043 newlink->rrl_exceeded = link->rrl_exceeded; 2044 newlink = LIST_NEXT(newlink, rrl_next); 2045 rulecnt--; 2046 } 2047 2048 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2049 if (newlink == NULL) 2050 goto goaround; 2051 rctl_rule_acquire(link->rrl_rule); 2052 newlink->rrl_rule = link->rrl_rule; 2053 newlink->rrl_exceeded = link->rrl_exceeded; 2054 newlink = LIST_NEXT(newlink, rrl_next); 2055 rulecnt--; 2056 } 2057 2058 if (rulecnt == 0) { 2059 /* 2060 * Free the old rule list. 2061 */ 2062 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2063 link = LIST_FIRST(&p->p_racct->r_rule_links); 2064 LIST_REMOVE(link, rrl_next); 2065 rctl_rule_release(link->rrl_rule); 2066 uma_zfree(rctl_rule_link_zone, link); 2067 } 2068 2069 /* 2070 * Replace lists and we're done. 2071 * 2072 * XXX: Is there any way to switch list heads instead 2073 * of iterating here? 2074 */ 2075 while (!LIST_EMPTY(&newrules)) { 2076 newlink = LIST_FIRST(&newrules); 2077 LIST_REMOVE(newlink, rrl_next); 2078 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2079 newlink, rrl_next); 2080 } 2081 2082 RACCT_UNLOCK(); 2083 2084 return; 2085 } 2086 2087 goaround: 2088 RACCT_UNLOCK(); 2089 2090 /* 2091 * Rule list changed while we were not holding the rctl_lock. 2092 * Free the new list and try again. 2093 */ 2094 while (!LIST_EMPTY(&newrules)) { 2095 newlink = LIST_FIRST(&newrules); 2096 LIST_REMOVE(newlink, rrl_next); 2097 if (newlink->rrl_rule != NULL) 2098 rctl_rule_release(newlink->rrl_rule); 2099 uma_zfree(rctl_rule_link_zone, newlink); 2100 } 2101 2102 goto again; 2103 } 2104 2105 /* 2106 * Assign RCTL rules to the newly created process. 2107 */ 2108 int 2109 rctl_proc_fork(struct proc *parent, struct proc *child) 2110 { 2111 struct rctl_rule *rule; 2112 struct rctl_rule_link *link; 2113 int error; 2114 2115 ASSERT_RACCT_ENABLED(); 2116 RACCT_LOCK_ASSERT(); 2117 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2118 2119 LIST_INIT(&child->p_racct->r_rule_links); 2120 2121 /* 2122 * Go through limits applicable to the parent and assign them 2123 * to the child. Rules with 'process' subject have to be duplicated 2124 * in order to make their rr_subject point to the new process. 2125 */ 2126 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2127 if (link->rrl_rule->rr_subject_type == 2128 RCTL_SUBJECT_TYPE_PROCESS) { 2129 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2130 if (rule == NULL) 2131 goto fail; 2132 KASSERT(rule->rr_subject.rs_proc == parent, 2133 ("rule->rr_subject.rs_proc != parent")); 2134 rule->rr_subject.rs_proc = child; 2135 error = rctl_racct_add_rule_locked(child->p_racct, 2136 rule); 2137 rctl_rule_release(rule); 2138 if (error != 0) 2139 goto fail; 2140 } else { 2141 error = rctl_racct_add_rule_locked(child->p_racct, 2142 link->rrl_rule); 2143 if (error != 0) 2144 goto fail; 2145 } 2146 } 2147 2148 return (0); 2149 2150 fail: 2151 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2152 link = LIST_FIRST(&child->p_racct->r_rule_links); 2153 LIST_REMOVE(link, rrl_next); 2154 rctl_rule_release(link->rrl_rule); 2155 uma_zfree(rctl_rule_link_zone, link); 2156 } 2157 2158 return (EAGAIN); 2159 } 2160 2161 /* 2162 * Release rules attached to the racct. 2163 */ 2164 void 2165 rctl_racct_release(struct racct *racct) 2166 { 2167 struct rctl_rule_link *link; 2168 2169 ASSERT_RACCT_ENABLED(); 2170 RACCT_LOCK_ASSERT(); 2171 2172 while (!LIST_EMPTY(&racct->r_rule_links)) { 2173 link = LIST_FIRST(&racct->r_rule_links); 2174 LIST_REMOVE(link, rrl_next); 2175 rctl_rule_release(link->rrl_rule); 2176 uma_zfree(rctl_rule_link_zone, link); 2177 } 2178 } 2179 2180 static void 2181 rctl_init(void) 2182 { 2183 2184 if (!racct_enable) 2185 return; 2186 2187 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2188 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2189 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2190 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2191 UMA_ALIGN_PTR, 0); 2192 2193 /* 2194 * Set default values, making sure not to overwrite the ones 2195 * fetched from tunables. Most of those could be set at the 2196 * declaration, except for the rctl_throttle_max - we cannot 2197 * set it there due to hz not being compile time constant. 2198 */ 2199 if (rctl_throttle_min < 1) 2200 rctl_throttle_min = 1; 2201 if (rctl_throttle_max < rctl_throttle_min) 2202 rctl_throttle_max = 2 * hz; 2203 if (rctl_throttle_pct < 0) 2204 rctl_throttle_pct = 100; 2205 if (rctl_throttle_pct2 < 0) 2206 rctl_throttle_pct2 = 100; 2207 } 2208 2209 #else /* !RCTL */ 2210 2211 int 2212 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2213 { 2214 2215 return (ENOSYS); 2216 } 2217 2218 int 2219 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2220 { 2221 2222 return (ENOSYS); 2223 } 2224 2225 int 2226 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2227 { 2228 2229 return (ENOSYS); 2230 } 2231 2232 int 2233 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2234 { 2235 2236 return (ENOSYS); 2237 } 2238 2239 int 2240 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2241 { 2242 2243 return (ENOSYS); 2244 } 2245 2246 #endif /* !RCTL */ 2247