1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed by Edward Tomasz Napierala under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $FreeBSD$ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/bus.h> 39 #include <sys/malloc.h> 40 #include <sys/queue.h> 41 #include <sys/refcount.h> 42 #include <sys/jail.h> 43 #include <sys/kernel.h> 44 #include <sys/limits.h> 45 #include <sys/loginclass.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/racct.h> 49 #include <sys/rctl.h> 50 #include <sys/resourcevar.h> 51 #include <sys/sx.h> 52 #include <sys/sysent.h> 53 #include <sys/sysproto.h> 54 #include <sys/systm.h> 55 #include <sys/types.h> 56 #include <sys/eventhandler.h> 57 #include <sys/lock.h> 58 #include <sys/mutex.h> 59 #include <sys/rwlock.h> 60 #include <sys/sbuf.h> 61 #include <sys/taskqueue.h> 62 #include <sys/tree.h> 63 #include <vm/uma.h> 64 65 #ifdef RCTL 66 #ifndef RACCT 67 #error "The RCTL option requires the RACCT option" 68 #endif 69 70 FEATURE(rctl, "Resource Limits"); 71 72 #define HRF_DEFAULT 0 73 #define HRF_DONT_INHERIT 1 74 #define HRF_DONT_ACCUMULATE 2 75 76 #define RCTL_MAX_INBUFSIZE 4 * 1024 77 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 78 #define RCTL_LOG_BUFSIZE 128 79 80 #define RCTL_PCPU_SHIFT (10 * 1000000) 81 82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 83 static int rctl_log_rate_limit = 10; 84 static int rctl_devctl_rate_limit = 10; 85 86 /* 87 * Values below are initialized in rctl_init(). 88 */ 89 static int rctl_throttle_min = -1; 90 static int rctl_throttle_max = -1; 91 static int rctl_throttle_pct = -1; 92 static int rctl_throttle_pct2 = -1; 93 94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 98 99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits"); 100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 101 &rctl_maxbufsize, 0, "Maximum output buffer size"); 102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 103 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 105 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 107 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU", 108 "Shortest throttling duration, in hz"); 109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 111 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU", 112 "Longest throttling duration, in hz"); 113 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 114 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 115 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU", 116 "Throttling penalty for process consumption, in percent"); 117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 119 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU", 120 "Throttling penalty for container consumption, in percent"); 121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 122 123 /* 124 * 'rctl_rule_link' connects a rule with every racct it's related to. 125 * For example, rule 'user:X:openfiles:deny=N/process' is linked 126 * with uidinfo for user X, and to each process of that user. 127 */ 128 struct rctl_rule_link { 129 LIST_ENTRY(rctl_rule_link) rrl_next; 130 struct rctl_rule *rrl_rule; 131 int rrl_exceeded; 132 }; 133 134 struct dict { 135 const char *d_name; 136 int d_value; 137 }; 138 139 static struct dict subjectnames[] = { 140 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 141 { "user", RCTL_SUBJECT_TYPE_USER }, 142 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 143 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 144 { NULL, -1 }}; 145 146 static struct dict resourcenames[] = { 147 { "cputime", RACCT_CPU }, 148 { "datasize", RACCT_DATA }, 149 { "stacksize", RACCT_STACK }, 150 { "coredumpsize", RACCT_CORE }, 151 { "memoryuse", RACCT_RSS }, 152 { "memorylocked", RACCT_MEMLOCK }, 153 { "maxproc", RACCT_NPROC }, 154 { "openfiles", RACCT_NOFILE }, 155 { "vmemoryuse", RACCT_VMEM }, 156 { "pseudoterminals", RACCT_NPTS }, 157 { "swapuse", RACCT_SWAP }, 158 { "nthr", RACCT_NTHR }, 159 { "msgqqueued", RACCT_MSGQQUEUED }, 160 { "msgqsize", RACCT_MSGQSIZE }, 161 { "nmsgq", RACCT_NMSGQ }, 162 { "nsem", RACCT_NSEM }, 163 { "nsemop", RACCT_NSEMOP }, 164 { "nshm", RACCT_NSHM }, 165 { "shmsize", RACCT_SHMSIZE }, 166 { "wallclock", RACCT_WALLCLOCK }, 167 { "pcpu", RACCT_PCTCPU }, 168 { "readbps", RACCT_READBPS }, 169 { "writebps", RACCT_WRITEBPS }, 170 { "readiops", RACCT_READIOPS }, 171 { "writeiops", RACCT_WRITEIOPS }, 172 { NULL, -1 }}; 173 174 static struct dict actionnames[] = { 175 { "sighup", RCTL_ACTION_SIGHUP }, 176 { "sigint", RCTL_ACTION_SIGINT }, 177 { "sigquit", RCTL_ACTION_SIGQUIT }, 178 { "sigill", RCTL_ACTION_SIGILL }, 179 { "sigtrap", RCTL_ACTION_SIGTRAP }, 180 { "sigabrt", RCTL_ACTION_SIGABRT }, 181 { "sigemt", RCTL_ACTION_SIGEMT }, 182 { "sigfpe", RCTL_ACTION_SIGFPE }, 183 { "sigkill", RCTL_ACTION_SIGKILL }, 184 { "sigbus", RCTL_ACTION_SIGBUS }, 185 { "sigsegv", RCTL_ACTION_SIGSEGV }, 186 { "sigsys", RCTL_ACTION_SIGSYS }, 187 { "sigpipe", RCTL_ACTION_SIGPIPE }, 188 { "sigalrm", RCTL_ACTION_SIGALRM }, 189 { "sigterm", RCTL_ACTION_SIGTERM }, 190 { "sigurg", RCTL_ACTION_SIGURG }, 191 { "sigstop", RCTL_ACTION_SIGSTOP }, 192 { "sigtstp", RCTL_ACTION_SIGTSTP }, 193 { "sigchld", RCTL_ACTION_SIGCHLD }, 194 { "sigttin", RCTL_ACTION_SIGTTIN }, 195 { "sigttou", RCTL_ACTION_SIGTTOU }, 196 { "sigio", RCTL_ACTION_SIGIO }, 197 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 198 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 199 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 200 { "sigprof", RCTL_ACTION_SIGPROF }, 201 { "sigwinch", RCTL_ACTION_SIGWINCH }, 202 { "siginfo", RCTL_ACTION_SIGINFO }, 203 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 204 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 205 { "sigthr", RCTL_ACTION_SIGTHR }, 206 { "deny", RCTL_ACTION_DENY }, 207 { "log", RCTL_ACTION_LOG }, 208 { "devctl", RCTL_ACTION_DEVCTL }, 209 { "throttle", RCTL_ACTION_THROTTLE }, 210 { NULL, -1 }}; 211 212 static void rctl_init(void); 213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 214 215 static uma_zone_t rctl_rule_zone; 216 static uma_zone_t rctl_rule_link_zone; 217 218 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 220 221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 222 223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 224 { 225 int error, val = rctl_throttle_min; 226 227 error = sysctl_handle_int(oidp, &val, 0, req); 228 if (error || !req->newptr) 229 return (error); 230 if (val < 1 || val > rctl_throttle_max) 231 return (EINVAL); 232 233 RACCT_LOCK(); 234 rctl_throttle_min = val; 235 RACCT_UNLOCK(); 236 237 return (0); 238 } 239 240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 241 { 242 int error, val = rctl_throttle_max; 243 244 error = sysctl_handle_int(oidp, &val, 0, req); 245 if (error || !req->newptr) 246 return (error); 247 if (val < rctl_throttle_min) 248 return (EINVAL); 249 250 RACCT_LOCK(); 251 rctl_throttle_max = val; 252 RACCT_UNLOCK(); 253 254 return (0); 255 } 256 257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 258 { 259 int error, val = rctl_throttle_pct; 260 261 error = sysctl_handle_int(oidp, &val, 0, req); 262 if (error || !req->newptr) 263 return (error); 264 if (val < 0) 265 return (EINVAL); 266 267 RACCT_LOCK(); 268 rctl_throttle_pct = val; 269 RACCT_UNLOCK(); 270 271 return (0); 272 } 273 274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 275 { 276 int error, val = rctl_throttle_pct2; 277 278 error = sysctl_handle_int(oidp, &val, 0, req); 279 if (error || !req->newptr) 280 return (error); 281 if (val < 0) 282 return (EINVAL); 283 284 RACCT_LOCK(); 285 rctl_throttle_pct2 = val; 286 RACCT_UNLOCK(); 287 288 return (0); 289 } 290 291 static const char * 292 rctl_subject_type_name(int subject) 293 { 294 int i; 295 296 for (i = 0; subjectnames[i].d_name != NULL; i++) { 297 if (subjectnames[i].d_value == subject) 298 return (subjectnames[i].d_name); 299 } 300 301 panic("rctl_subject_type_name: unknown subject type %d", subject); 302 } 303 304 static const char * 305 rctl_action_name(int action) 306 { 307 int i; 308 309 for (i = 0; actionnames[i].d_name != NULL; i++) { 310 if (actionnames[i].d_value == action) 311 return (actionnames[i].d_name); 312 } 313 314 panic("rctl_action_name: unknown action %d", action); 315 } 316 317 const char * 318 rctl_resource_name(int resource) 319 { 320 int i; 321 322 for (i = 0; resourcenames[i].d_name != NULL; i++) { 323 if (resourcenames[i].d_value == resource) 324 return (resourcenames[i].d_name); 325 } 326 327 panic("rctl_resource_name: unknown resource %d", resource); 328 } 329 330 static struct racct * 331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 332 { 333 struct ucred *cred = p->p_ucred; 334 335 ASSERT_RACCT_ENABLED(); 336 RACCT_LOCK_ASSERT(); 337 338 switch (rule->rr_per) { 339 case RCTL_SUBJECT_TYPE_PROCESS: 340 return (p->p_racct); 341 case RCTL_SUBJECT_TYPE_USER: 342 return (cred->cr_ruidinfo->ui_racct); 343 case RCTL_SUBJECT_TYPE_LOGINCLASS: 344 return (cred->cr_loginclass->lc_racct); 345 case RCTL_SUBJECT_TYPE_JAIL: 346 return (cred->cr_prison->pr_prison_racct->prr_racct); 347 default: 348 panic("%s: unknown per %d", __func__, rule->rr_per); 349 } 350 } 351 352 /* 353 * Return the amount of resource that can be allocated by 'p' before 354 * hitting 'rule'. 355 */ 356 static int64_t 357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 358 { 359 const struct racct *racct; 360 int64_t available; 361 362 ASSERT_RACCT_ENABLED(); 363 RACCT_LOCK_ASSERT(); 364 365 racct = rctl_proc_rule_to_racct(p, rule); 366 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 367 368 return (available); 369 } 370 371 /* 372 * Called every second for proc, uidinfo, loginclass, and jail containers. 373 * If the limit isn't exceeded, it decreases the usage amount to zero. 374 * Otherwise, it decreases it by the value of the limit. This way 375 * resource consumption exceeding the limit "carries over" to the next 376 * period. 377 */ 378 void 379 rctl_throttle_decay(struct racct *racct, int resource) 380 { 381 struct rctl_rule *rule; 382 struct rctl_rule_link *link; 383 int64_t minavailable; 384 385 ASSERT_RACCT_ENABLED(); 386 RACCT_LOCK_ASSERT(); 387 388 minavailable = INT64_MAX; 389 390 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 391 rule = link->rrl_rule; 392 393 if (rule->rr_resource != resource) 394 continue; 395 if (rule->rr_action != RCTL_ACTION_THROTTLE) 396 continue; 397 398 if (rule->rr_amount < minavailable) 399 minavailable = rule->rr_amount; 400 } 401 402 if (racct->r_resources[resource] < minavailable) { 403 racct->r_resources[resource] = 0; 404 } else { 405 /* 406 * Cap utilization counter at ten times the limit. Otherwise, 407 * if we changed the rule lowering the allowed amount, it could 408 * take unreasonably long time for the accumulated resource 409 * usage to drop. 410 */ 411 if (racct->r_resources[resource] > minavailable * 10) 412 racct->r_resources[resource] = minavailable * 10; 413 414 racct->r_resources[resource] -= minavailable; 415 } 416 } 417 418 /* 419 * Special version of rctl_get_available() for the %CPU resource. 420 * We slightly cheat here and return less than we normally would. 421 */ 422 int64_t 423 rctl_pcpu_available(const struct proc *p) { 424 struct rctl_rule *rule; 425 struct rctl_rule_link *link; 426 int64_t available, minavailable, limit; 427 428 ASSERT_RACCT_ENABLED(); 429 RACCT_LOCK_ASSERT(); 430 431 minavailable = INT64_MAX; 432 limit = 0; 433 434 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 435 rule = link->rrl_rule; 436 if (rule->rr_resource != RACCT_PCTCPU) 437 continue; 438 if (rule->rr_action != RCTL_ACTION_DENY) 439 continue; 440 available = rctl_available_resource(p, rule); 441 if (available < minavailable) { 442 minavailable = available; 443 limit = rule->rr_amount; 444 } 445 } 446 447 /* 448 * Return slightly less than actual value of the available 449 * %cpu resource. This makes %cpu throttling more aggressive 450 * and lets us act sooner than the limits are already exceeded. 451 */ 452 if (limit != 0) { 453 if (limit > 2 * RCTL_PCPU_SHIFT) 454 minavailable -= RCTL_PCPU_SHIFT; 455 else 456 minavailable -= (limit / 2); 457 } 458 459 return (minavailable); 460 } 461 462 static uint64_t 463 xadd(uint64_t a, uint64_t b) 464 { 465 uint64_t c; 466 467 c = a + b; 468 469 /* 470 * Detect overflow. 471 */ 472 if (c < a || c < b) 473 return (UINT64_MAX); 474 475 return (c); 476 } 477 478 static uint64_t 479 xmul(uint64_t a, uint64_t b) 480 { 481 482 if (b != 0 && a > UINT64_MAX / b) 483 return (UINT64_MAX); 484 485 return (a * b); 486 } 487 488 /* 489 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 490 * to what it keeps allocated now. Returns non-zero if the allocation should 491 * be denied, 0 otherwise. 492 */ 493 int 494 rctl_enforce(struct proc *p, int resource, uint64_t amount) 495 { 496 static struct timeval log_lasttime, devctl_lasttime; 497 static int log_curtime = 0, devctl_curtime = 0; 498 struct rctl_rule *rule; 499 struct rctl_rule_link *link; 500 struct sbuf sb; 501 char *buf; 502 int64_t available; 503 uint64_t sleep_ms, sleep_ratio; 504 int should_deny = 0; 505 506 ASSERT_RACCT_ENABLED(); 507 RACCT_LOCK_ASSERT(); 508 509 /* 510 * There may be more than one matching rule; go through all of them. 511 * Denial should be done last, after logging and sending signals. 512 */ 513 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 514 rule = link->rrl_rule; 515 if (rule->rr_resource != resource) 516 continue; 517 518 available = rctl_available_resource(p, rule); 519 if (available >= (int64_t)amount) { 520 link->rrl_exceeded = 0; 521 continue; 522 } 523 524 switch (rule->rr_action) { 525 case RCTL_ACTION_DENY: 526 should_deny = 1; 527 continue; 528 case RCTL_ACTION_LOG: 529 /* 530 * If rrl_exceeded != 0, it means we've already 531 * logged a warning for this process. 532 */ 533 if (link->rrl_exceeded != 0) 534 continue; 535 536 /* 537 * If the process state is not fully initialized yet, 538 * we can't access most of the required fields, e.g. 539 * p->p_comm. This happens when called from fork1(). 540 * Ignore this rule for now; it will be processed just 541 * after fork, when called from racct_proc_fork_done(). 542 */ 543 if (p->p_state != PRS_NORMAL) 544 continue; 545 546 if (!ppsratecheck(&log_lasttime, &log_curtime, 547 rctl_log_rate_limit)) 548 continue; 549 550 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 551 if (buf == NULL) { 552 printf("rctl_enforce: out of memory\n"); 553 continue; 554 } 555 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 556 rctl_rule_to_sbuf(&sb, rule); 557 sbuf_finish(&sb); 558 printf("rctl: rule \"%s\" matched by pid %d " 559 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 560 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 561 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 562 sbuf_delete(&sb); 563 free(buf, M_RCTL); 564 link->rrl_exceeded = 1; 565 continue; 566 case RCTL_ACTION_DEVCTL: 567 if (link->rrl_exceeded != 0) 568 continue; 569 570 if (p->p_state != PRS_NORMAL) 571 continue; 572 573 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 574 rctl_devctl_rate_limit)) 575 continue; 576 577 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 578 if (buf == NULL) { 579 printf("rctl_enforce: out of memory\n"); 580 continue; 581 } 582 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 583 sbuf_printf(&sb, "rule="); 584 rctl_rule_to_sbuf(&sb, rule); 585 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 586 p->p_pid, p->p_ucred->cr_ruid, 587 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 588 sbuf_finish(&sb); 589 devctl_notify_f("RCTL", "rule", "matched", 590 sbuf_data(&sb), M_NOWAIT); 591 sbuf_delete(&sb); 592 free(buf, M_RCTL); 593 link->rrl_exceeded = 1; 594 continue; 595 case RCTL_ACTION_THROTTLE: 596 if (p->p_state != PRS_NORMAL) 597 continue; 598 599 /* 600 * Make the process sleep for a fraction of second 601 * proportional to the ratio of process' resource 602 * utilization compared to the limit. The point is 603 * to penalize resource hogs: processes that consume 604 * more of the available resources sleep for longer. 605 * 606 * We're trying to defer division until the very end, 607 * to minimize the rounding effects. The following 608 * calculation could have been written in a clearer 609 * way like this: 610 * 611 * sleep_ms = hz * p->p_racct->r_resources[resource] / 612 * rule->rr_amount; 613 * sleep_ms *= rctl_throttle_pct / 100; 614 * if (sleep_ms < rctl_throttle_min) 615 * sleep_ms = rctl_throttle_min; 616 * 617 */ 618 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 619 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 620 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 621 sleep_ms = rctl_throttle_min * rule->rr_amount; 622 623 /* 624 * Multiply that by the ratio of the resource 625 * consumption for the container compared to the limit, 626 * squared. In other words, a process in a container 627 * that is two times over the limit will be throttled 628 * four times as much for hitting the same rule. The 629 * point is to penalize processes more if the container 630 * itself (eg certain UID or jail) is above the limit. 631 */ 632 if (available < 0) 633 sleep_ratio = -available / rule->rr_amount; 634 else 635 sleep_ratio = 0; 636 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 637 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 638 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 639 640 /* 641 * Finally the division. 642 */ 643 sleep_ms /= rule->rr_amount; 644 645 if (sleep_ms > rctl_throttle_max) 646 sleep_ms = rctl_throttle_max; 647 #if 0 648 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 649 __func__, p->p_pid, p->p_comm, 650 p->p_racct->r_resources[resource], 651 rule->rr_amount, (uintmax_t)sleep_ms, 652 (uintmax_t)sleep_ratio, (intmax_t)available); 653 #endif 654 655 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 656 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 657 racct_proc_throttle(p, sleep_ms); 658 continue; 659 default: 660 if (link->rrl_exceeded != 0) 661 continue; 662 663 if (p->p_state != PRS_NORMAL) 664 continue; 665 666 KASSERT(rule->rr_action > 0 && 667 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 668 ("rctl_enforce: unknown action %d", 669 rule->rr_action)); 670 671 /* 672 * We're using the fact that RCTL_ACTION_SIG* values 673 * are equal to their counterparts from sys/signal.h. 674 */ 675 kern_psignal(p, rule->rr_action); 676 link->rrl_exceeded = 1; 677 continue; 678 } 679 } 680 681 if (should_deny) { 682 /* 683 * Return fake error code; the caller should change it 684 * into one proper for the situation - EFSIZ, ENOMEM etc. 685 */ 686 return (EDOOFUS); 687 } 688 689 return (0); 690 } 691 692 uint64_t 693 rctl_get_limit(struct proc *p, int resource) 694 { 695 struct rctl_rule *rule; 696 struct rctl_rule_link *link; 697 uint64_t amount = UINT64_MAX; 698 699 ASSERT_RACCT_ENABLED(); 700 RACCT_LOCK_ASSERT(); 701 702 /* 703 * There may be more than one matching rule; go through all of them. 704 * Denial should be done last, after logging and sending signals. 705 */ 706 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 707 rule = link->rrl_rule; 708 if (rule->rr_resource != resource) 709 continue; 710 if (rule->rr_action != RCTL_ACTION_DENY) 711 continue; 712 if (rule->rr_amount < amount) 713 amount = rule->rr_amount; 714 } 715 716 return (amount); 717 } 718 719 uint64_t 720 rctl_get_available(struct proc *p, int resource) 721 { 722 struct rctl_rule *rule; 723 struct rctl_rule_link *link; 724 int64_t available, minavailable, allocated; 725 726 minavailable = INT64_MAX; 727 728 ASSERT_RACCT_ENABLED(); 729 RACCT_LOCK_ASSERT(); 730 731 /* 732 * There may be more than one matching rule; go through all of them. 733 * Denial should be done last, after logging and sending signals. 734 */ 735 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 736 rule = link->rrl_rule; 737 if (rule->rr_resource != resource) 738 continue; 739 if (rule->rr_action != RCTL_ACTION_DENY) 740 continue; 741 available = rctl_available_resource(p, rule); 742 if (available < minavailable) 743 minavailable = available; 744 } 745 746 /* 747 * XXX: Think about this _hard_. 748 */ 749 allocated = p->p_racct->r_resources[resource]; 750 if (minavailable < INT64_MAX - allocated) 751 minavailable += allocated; 752 if (minavailable < 0) 753 minavailable = 0; 754 755 return (minavailable); 756 } 757 758 static int 759 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 760 { 761 762 ASSERT_RACCT_ENABLED(); 763 764 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 765 if (rule->rr_subject_type != filter->rr_subject_type) 766 return (0); 767 768 switch (filter->rr_subject_type) { 769 case RCTL_SUBJECT_TYPE_PROCESS: 770 if (filter->rr_subject.rs_proc != NULL && 771 rule->rr_subject.rs_proc != 772 filter->rr_subject.rs_proc) 773 return (0); 774 break; 775 case RCTL_SUBJECT_TYPE_USER: 776 if (filter->rr_subject.rs_uip != NULL && 777 rule->rr_subject.rs_uip != 778 filter->rr_subject.rs_uip) 779 return (0); 780 break; 781 case RCTL_SUBJECT_TYPE_LOGINCLASS: 782 if (filter->rr_subject.rs_loginclass != NULL && 783 rule->rr_subject.rs_loginclass != 784 filter->rr_subject.rs_loginclass) 785 return (0); 786 break; 787 case RCTL_SUBJECT_TYPE_JAIL: 788 if (filter->rr_subject.rs_prison_racct != NULL && 789 rule->rr_subject.rs_prison_racct != 790 filter->rr_subject.rs_prison_racct) 791 return (0); 792 break; 793 default: 794 panic("rctl_rule_matches: unknown subject type %d", 795 filter->rr_subject_type); 796 } 797 } 798 799 if (filter->rr_resource != RACCT_UNDEFINED) { 800 if (rule->rr_resource != filter->rr_resource) 801 return (0); 802 } 803 804 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 805 if (rule->rr_action != filter->rr_action) 806 return (0); 807 } 808 809 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 810 if (rule->rr_amount != filter->rr_amount) 811 return (0); 812 } 813 814 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 815 if (rule->rr_per != filter->rr_per) 816 return (0); 817 } 818 819 return (1); 820 } 821 822 static int 823 str2value(const char *str, int *value, struct dict *table) 824 { 825 int i; 826 827 if (value == NULL) 828 return (EINVAL); 829 830 for (i = 0; table[i].d_name != NULL; i++) { 831 if (strcasecmp(table[i].d_name, str) == 0) { 832 *value = table[i].d_value; 833 return (0); 834 } 835 } 836 837 return (EINVAL); 838 } 839 840 static int 841 str2id(const char *str, id_t *value) 842 { 843 char *end; 844 845 if (str == NULL) 846 return (EINVAL); 847 848 *value = strtoul(str, &end, 10); 849 if ((size_t)(end - str) != strlen(str)) 850 return (EINVAL); 851 852 return (0); 853 } 854 855 static int 856 str2int64(const char *str, int64_t *value) 857 { 858 char *end; 859 860 if (str == NULL) 861 return (EINVAL); 862 863 *value = strtoul(str, &end, 10); 864 if ((size_t)(end - str) != strlen(str)) 865 return (EINVAL); 866 867 if (*value < 0) 868 return (ERANGE); 869 870 return (0); 871 } 872 873 /* 874 * Connect the rule to the racct, increasing refcount for the rule. 875 */ 876 static void 877 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 878 { 879 struct rctl_rule_link *link; 880 881 ASSERT_RACCT_ENABLED(); 882 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 883 884 rctl_rule_acquire(rule); 885 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 886 link->rrl_rule = rule; 887 link->rrl_exceeded = 0; 888 889 RACCT_LOCK(); 890 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 891 RACCT_UNLOCK(); 892 } 893 894 static int 895 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 896 { 897 struct rctl_rule_link *link; 898 899 ASSERT_RACCT_ENABLED(); 900 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 901 RACCT_LOCK_ASSERT(); 902 903 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 904 if (link == NULL) 905 return (ENOMEM); 906 rctl_rule_acquire(rule); 907 link->rrl_rule = rule; 908 link->rrl_exceeded = 0; 909 910 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 911 912 return (0); 913 } 914 915 /* 916 * Remove limits for a rules matching the filter and release 917 * the refcounts for the rules, possibly freeing them. Returns 918 * the number of limit structures removed. 919 */ 920 static int 921 rctl_racct_remove_rules(struct racct *racct, 922 const struct rctl_rule *filter) 923 { 924 struct rctl_rule_link *link, *linktmp; 925 int removed = 0; 926 927 ASSERT_RACCT_ENABLED(); 928 RACCT_LOCK_ASSERT(); 929 930 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 931 if (!rctl_rule_matches(link->rrl_rule, filter)) 932 continue; 933 934 LIST_REMOVE(link, rrl_next); 935 rctl_rule_release(link->rrl_rule); 936 uma_zfree(rctl_rule_link_zone, link); 937 removed++; 938 } 939 return (removed); 940 } 941 942 static void 943 rctl_rule_acquire_subject(struct rctl_rule *rule) 944 { 945 946 ASSERT_RACCT_ENABLED(); 947 948 switch (rule->rr_subject_type) { 949 case RCTL_SUBJECT_TYPE_UNDEFINED: 950 case RCTL_SUBJECT_TYPE_PROCESS: 951 break; 952 case RCTL_SUBJECT_TYPE_JAIL: 953 if (rule->rr_subject.rs_prison_racct != NULL) 954 prison_racct_hold(rule->rr_subject.rs_prison_racct); 955 break; 956 case RCTL_SUBJECT_TYPE_USER: 957 if (rule->rr_subject.rs_uip != NULL) 958 uihold(rule->rr_subject.rs_uip); 959 break; 960 case RCTL_SUBJECT_TYPE_LOGINCLASS: 961 if (rule->rr_subject.rs_loginclass != NULL) 962 loginclass_hold(rule->rr_subject.rs_loginclass); 963 break; 964 default: 965 panic("rctl_rule_acquire_subject: unknown subject type %d", 966 rule->rr_subject_type); 967 } 968 } 969 970 static void 971 rctl_rule_release_subject(struct rctl_rule *rule) 972 { 973 974 ASSERT_RACCT_ENABLED(); 975 976 switch (rule->rr_subject_type) { 977 case RCTL_SUBJECT_TYPE_UNDEFINED: 978 case RCTL_SUBJECT_TYPE_PROCESS: 979 break; 980 case RCTL_SUBJECT_TYPE_JAIL: 981 if (rule->rr_subject.rs_prison_racct != NULL) 982 prison_racct_free(rule->rr_subject.rs_prison_racct); 983 break; 984 case RCTL_SUBJECT_TYPE_USER: 985 if (rule->rr_subject.rs_uip != NULL) 986 uifree(rule->rr_subject.rs_uip); 987 break; 988 case RCTL_SUBJECT_TYPE_LOGINCLASS: 989 if (rule->rr_subject.rs_loginclass != NULL) 990 loginclass_free(rule->rr_subject.rs_loginclass); 991 break; 992 default: 993 panic("rctl_rule_release_subject: unknown subject type %d", 994 rule->rr_subject_type); 995 } 996 } 997 998 struct rctl_rule * 999 rctl_rule_alloc(int flags) 1000 { 1001 struct rctl_rule *rule; 1002 1003 ASSERT_RACCT_ENABLED(); 1004 1005 rule = uma_zalloc(rctl_rule_zone, flags); 1006 if (rule == NULL) 1007 return (NULL); 1008 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1009 rule->rr_subject.rs_proc = NULL; 1010 rule->rr_subject.rs_uip = NULL; 1011 rule->rr_subject.rs_loginclass = NULL; 1012 rule->rr_subject.rs_prison_racct = NULL; 1013 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1014 rule->rr_resource = RACCT_UNDEFINED; 1015 rule->rr_action = RCTL_ACTION_UNDEFINED; 1016 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1017 refcount_init(&rule->rr_refcount, 1); 1018 1019 return (rule); 1020 } 1021 1022 struct rctl_rule * 1023 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1024 { 1025 struct rctl_rule *copy; 1026 1027 ASSERT_RACCT_ENABLED(); 1028 1029 copy = uma_zalloc(rctl_rule_zone, flags); 1030 if (copy == NULL) 1031 return (NULL); 1032 copy->rr_subject_type = rule->rr_subject_type; 1033 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1034 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1035 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1036 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1037 copy->rr_per = rule->rr_per; 1038 copy->rr_resource = rule->rr_resource; 1039 copy->rr_action = rule->rr_action; 1040 copy->rr_amount = rule->rr_amount; 1041 refcount_init(©->rr_refcount, 1); 1042 rctl_rule_acquire_subject(copy); 1043 1044 return (copy); 1045 } 1046 1047 void 1048 rctl_rule_acquire(struct rctl_rule *rule) 1049 { 1050 1051 ASSERT_RACCT_ENABLED(); 1052 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1053 1054 refcount_acquire(&rule->rr_refcount); 1055 } 1056 1057 static void 1058 rctl_rule_free(void *context, int pending) 1059 { 1060 struct rctl_rule *rule; 1061 1062 rule = (struct rctl_rule *)context; 1063 1064 ASSERT_RACCT_ENABLED(); 1065 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1066 1067 /* 1068 * We don't need locking here; rule is guaranteed to be inaccessible. 1069 */ 1070 1071 rctl_rule_release_subject(rule); 1072 uma_zfree(rctl_rule_zone, rule); 1073 } 1074 1075 void 1076 rctl_rule_release(struct rctl_rule *rule) 1077 { 1078 1079 ASSERT_RACCT_ENABLED(); 1080 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1081 1082 if (refcount_release(&rule->rr_refcount)) { 1083 /* 1084 * rctl_rule_release() is often called when iterating 1085 * over all the uidinfo structures in the system, 1086 * holding uihashtbl_lock. Since rctl_rule_free() 1087 * might end up calling uifree(), this would lead 1088 * to lock recursion. Use taskqueue to avoid this. 1089 */ 1090 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1091 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1092 } 1093 } 1094 1095 static int 1096 rctl_rule_fully_specified(const struct rctl_rule *rule) 1097 { 1098 1099 ASSERT_RACCT_ENABLED(); 1100 1101 switch (rule->rr_subject_type) { 1102 case RCTL_SUBJECT_TYPE_UNDEFINED: 1103 return (0); 1104 case RCTL_SUBJECT_TYPE_PROCESS: 1105 if (rule->rr_subject.rs_proc == NULL) 1106 return (0); 1107 break; 1108 case RCTL_SUBJECT_TYPE_USER: 1109 if (rule->rr_subject.rs_uip == NULL) 1110 return (0); 1111 break; 1112 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1113 if (rule->rr_subject.rs_loginclass == NULL) 1114 return (0); 1115 break; 1116 case RCTL_SUBJECT_TYPE_JAIL: 1117 if (rule->rr_subject.rs_prison_racct == NULL) 1118 return (0); 1119 break; 1120 default: 1121 panic("rctl_rule_fully_specified: unknown subject type %d", 1122 rule->rr_subject_type); 1123 } 1124 if (rule->rr_resource == RACCT_UNDEFINED) 1125 return (0); 1126 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1127 return (0); 1128 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1129 return (0); 1130 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1131 return (0); 1132 1133 return (1); 1134 } 1135 1136 static int 1137 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1138 { 1139 struct rctl_rule *rule; 1140 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1141 *amountstr, *perstr; 1142 id_t id; 1143 int error = 0; 1144 1145 ASSERT_RACCT_ENABLED(); 1146 1147 rule = rctl_rule_alloc(M_WAITOK); 1148 1149 subjectstr = strsep(&rulestr, ":"); 1150 subject_idstr = strsep(&rulestr, ":"); 1151 resourcestr = strsep(&rulestr, ":"); 1152 actionstr = strsep(&rulestr, "=/"); 1153 amountstr = strsep(&rulestr, "/"); 1154 perstr = rulestr; 1155 1156 if (subjectstr == NULL || subjectstr[0] == '\0') 1157 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1158 else { 1159 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1160 if (error != 0) 1161 goto out; 1162 } 1163 1164 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1165 rule->rr_subject.rs_proc = NULL; 1166 rule->rr_subject.rs_uip = NULL; 1167 rule->rr_subject.rs_loginclass = NULL; 1168 rule->rr_subject.rs_prison_racct = NULL; 1169 } else { 1170 switch (rule->rr_subject_type) { 1171 case RCTL_SUBJECT_TYPE_UNDEFINED: 1172 error = EINVAL; 1173 goto out; 1174 case RCTL_SUBJECT_TYPE_PROCESS: 1175 error = str2id(subject_idstr, &id); 1176 if (error != 0) 1177 goto out; 1178 sx_assert(&allproc_lock, SA_LOCKED); 1179 rule->rr_subject.rs_proc = pfind(id); 1180 if (rule->rr_subject.rs_proc == NULL) { 1181 error = ESRCH; 1182 goto out; 1183 } 1184 PROC_UNLOCK(rule->rr_subject.rs_proc); 1185 break; 1186 case RCTL_SUBJECT_TYPE_USER: 1187 error = str2id(subject_idstr, &id); 1188 if (error != 0) 1189 goto out; 1190 rule->rr_subject.rs_uip = uifind(id); 1191 break; 1192 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1193 rule->rr_subject.rs_loginclass = 1194 loginclass_find(subject_idstr); 1195 if (rule->rr_subject.rs_loginclass == NULL) { 1196 error = ENAMETOOLONG; 1197 goto out; 1198 } 1199 break; 1200 case RCTL_SUBJECT_TYPE_JAIL: 1201 rule->rr_subject.rs_prison_racct = 1202 prison_racct_find(subject_idstr); 1203 if (rule->rr_subject.rs_prison_racct == NULL) { 1204 error = ENAMETOOLONG; 1205 goto out; 1206 } 1207 break; 1208 default: 1209 panic("rctl_string_to_rule: unknown subject type %d", 1210 rule->rr_subject_type); 1211 } 1212 } 1213 1214 if (resourcestr == NULL || resourcestr[0] == '\0') 1215 rule->rr_resource = RACCT_UNDEFINED; 1216 else { 1217 error = str2value(resourcestr, &rule->rr_resource, 1218 resourcenames); 1219 if (error != 0) 1220 goto out; 1221 } 1222 1223 if (actionstr == NULL || actionstr[0] == '\0') 1224 rule->rr_action = RCTL_ACTION_UNDEFINED; 1225 else { 1226 error = str2value(actionstr, &rule->rr_action, actionnames); 1227 if (error != 0) 1228 goto out; 1229 } 1230 1231 if (amountstr == NULL || amountstr[0] == '\0') 1232 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1233 else { 1234 error = str2int64(amountstr, &rule->rr_amount); 1235 if (error != 0) 1236 goto out; 1237 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1238 if (rule->rr_amount > INT64_MAX / 1000000) { 1239 error = ERANGE; 1240 goto out; 1241 } 1242 rule->rr_amount *= 1000000; 1243 } 1244 } 1245 1246 if (perstr == NULL || perstr[0] == '\0') 1247 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1248 else { 1249 error = str2value(perstr, &rule->rr_per, subjectnames); 1250 if (error != 0) 1251 goto out; 1252 } 1253 1254 out: 1255 if (error == 0) 1256 *rulep = rule; 1257 else 1258 rctl_rule_release(rule); 1259 1260 return (error); 1261 } 1262 1263 /* 1264 * Link a rule with all the subjects it applies to. 1265 */ 1266 int 1267 rctl_rule_add(struct rctl_rule *rule) 1268 { 1269 struct proc *p; 1270 struct ucred *cred; 1271 struct uidinfo *uip; 1272 struct prison *pr; 1273 struct prison_racct *prr; 1274 struct loginclass *lc; 1275 struct rctl_rule *rule2; 1276 int match; 1277 1278 ASSERT_RACCT_ENABLED(); 1279 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1280 1281 /* 1282 * Some rules just don't make sense, like "deny" rule for an undeniable 1283 * resource. The exception are the RSS and %CPU resources - they are 1284 * not deniable in the racct sense, but the limit is enforced in 1285 * a different way. 1286 */ 1287 if (rule->rr_action == RCTL_ACTION_DENY && 1288 !RACCT_IS_DENIABLE(rule->rr_resource) && 1289 rule->rr_resource != RACCT_RSS && 1290 rule->rr_resource != RACCT_PCTCPU) { 1291 return (EOPNOTSUPP); 1292 } 1293 1294 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1295 !RACCT_IS_DECAYING(rule->rr_resource)) { 1296 return (EOPNOTSUPP); 1297 } 1298 1299 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1300 rule->rr_resource == RACCT_PCTCPU) { 1301 return (EOPNOTSUPP); 1302 } 1303 1304 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1305 RACCT_IS_SLOPPY(rule->rr_resource)) { 1306 return (EOPNOTSUPP); 1307 } 1308 1309 /* 1310 * Make sure there are no duplicated rules. Also, for the "deny" 1311 * rules, remove ones differing only by "amount". 1312 */ 1313 if (rule->rr_action == RCTL_ACTION_DENY) { 1314 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1315 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1316 rctl_rule_remove(rule2); 1317 rctl_rule_release(rule2); 1318 } else 1319 rctl_rule_remove(rule); 1320 1321 switch (rule->rr_subject_type) { 1322 case RCTL_SUBJECT_TYPE_PROCESS: 1323 p = rule->rr_subject.rs_proc; 1324 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1325 1326 rctl_racct_add_rule(p->p_racct, rule); 1327 /* 1328 * In case of per-process rule, we don't have anything more 1329 * to do. 1330 */ 1331 return (0); 1332 1333 case RCTL_SUBJECT_TYPE_USER: 1334 uip = rule->rr_subject.rs_uip; 1335 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1336 rctl_racct_add_rule(uip->ui_racct, rule); 1337 break; 1338 1339 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1340 lc = rule->rr_subject.rs_loginclass; 1341 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1342 rctl_racct_add_rule(lc->lc_racct, rule); 1343 break; 1344 1345 case RCTL_SUBJECT_TYPE_JAIL: 1346 prr = rule->rr_subject.rs_prison_racct; 1347 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1348 rctl_racct_add_rule(prr->prr_racct, rule); 1349 break; 1350 1351 default: 1352 panic("rctl_rule_add: unknown subject type %d", 1353 rule->rr_subject_type); 1354 } 1355 1356 /* 1357 * Now go through all the processes and add the new rule to the ones 1358 * it applies to. 1359 */ 1360 sx_assert(&allproc_lock, SA_LOCKED); 1361 FOREACH_PROC_IN_SYSTEM(p) { 1362 cred = p->p_ucred; 1363 switch (rule->rr_subject_type) { 1364 case RCTL_SUBJECT_TYPE_USER: 1365 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1366 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1367 break; 1368 continue; 1369 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1370 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1371 break; 1372 continue; 1373 case RCTL_SUBJECT_TYPE_JAIL: 1374 match = 0; 1375 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1376 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1377 match = 1; 1378 break; 1379 } 1380 } 1381 if (match) 1382 break; 1383 continue; 1384 default: 1385 panic("rctl_rule_add: unknown subject type %d", 1386 rule->rr_subject_type); 1387 } 1388 1389 rctl_racct_add_rule(p->p_racct, rule); 1390 } 1391 1392 return (0); 1393 } 1394 1395 static void 1396 rctl_rule_pre_callback(void) 1397 { 1398 1399 RACCT_LOCK(); 1400 } 1401 1402 static void 1403 rctl_rule_post_callback(void) 1404 { 1405 1406 RACCT_UNLOCK(); 1407 } 1408 1409 static void 1410 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1411 { 1412 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1413 int found = 0; 1414 1415 ASSERT_RACCT_ENABLED(); 1416 RACCT_LOCK_ASSERT(); 1417 1418 found += rctl_racct_remove_rules(racct, filter); 1419 1420 *((int *)arg3) += found; 1421 } 1422 1423 /* 1424 * Remove all rules that match the filter. 1425 */ 1426 int 1427 rctl_rule_remove(struct rctl_rule *filter) 1428 { 1429 struct proc *p; 1430 int found = 0; 1431 1432 ASSERT_RACCT_ENABLED(); 1433 1434 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1435 filter->rr_subject.rs_proc != NULL) { 1436 p = filter->rr_subject.rs_proc; 1437 RACCT_LOCK(); 1438 found = rctl_racct_remove_rules(p->p_racct, filter); 1439 RACCT_UNLOCK(); 1440 if (found) 1441 return (0); 1442 return (ESRCH); 1443 } 1444 1445 loginclass_racct_foreach(rctl_rule_remove_callback, 1446 rctl_rule_pre_callback, rctl_rule_post_callback, 1447 filter, (void *)&found); 1448 ui_racct_foreach(rctl_rule_remove_callback, 1449 rctl_rule_pre_callback, rctl_rule_post_callback, 1450 filter, (void *)&found); 1451 prison_racct_foreach(rctl_rule_remove_callback, 1452 rctl_rule_pre_callback, rctl_rule_post_callback, 1453 filter, (void *)&found); 1454 1455 sx_assert(&allproc_lock, SA_LOCKED); 1456 RACCT_LOCK(); 1457 FOREACH_PROC_IN_SYSTEM(p) { 1458 found += rctl_racct_remove_rules(p->p_racct, filter); 1459 } 1460 RACCT_UNLOCK(); 1461 1462 if (found) 1463 return (0); 1464 return (ESRCH); 1465 } 1466 1467 /* 1468 * Appends a rule to the sbuf. 1469 */ 1470 static void 1471 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1472 { 1473 int64_t amount; 1474 1475 ASSERT_RACCT_ENABLED(); 1476 1477 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1478 1479 switch (rule->rr_subject_type) { 1480 case RCTL_SUBJECT_TYPE_PROCESS: 1481 if (rule->rr_subject.rs_proc == NULL) 1482 sbuf_printf(sb, ":"); 1483 else 1484 sbuf_printf(sb, "%d:", 1485 rule->rr_subject.rs_proc->p_pid); 1486 break; 1487 case RCTL_SUBJECT_TYPE_USER: 1488 if (rule->rr_subject.rs_uip == NULL) 1489 sbuf_printf(sb, ":"); 1490 else 1491 sbuf_printf(sb, "%d:", 1492 rule->rr_subject.rs_uip->ui_uid); 1493 break; 1494 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1495 if (rule->rr_subject.rs_loginclass == NULL) 1496 sbuf_printf(sb, ":"); 1497 else 1498 sbuf_printf(sb, "%s:", 1499 rule->rr_subject.rs_loginclass->lc_name); 1500 break; 1501 case RCTL_SUBJECT_TYPE_JAIL: 1502 if (rule->rr_subject.rs_prison_racct == NULL) 1503 sbuf_printf(sb, ":"); 1504 else 1505 sbuf_printf(sb, "%s:", 1506 rule->rr_subject.rs_prison_racct->prr_name); 1507 break; 1508 default: 1509 panic("rctl_rule_to_sbuf: unknown subject type %d", 1510 rule->rr_subject_type); 1511 } 1512 1513 amount = rule->rr_amount; 1514 if (amount != RCTL_AMOUNT_UNDEFINED && 1515 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1516 amount /= 1000000; 1517 1518 sbuf_printf(sb, "%s:%s=%jd", 1519 rctl_resource_name(rule->rr_resource), 1520 rctl_action_name(rule->rr_action), 1521 amount); 1522 1523 if (rule->rr_per != rule->rr_subject_type) 1524 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1525 } 1526 1527 /* 1528 * Routine used by RCTL syscalls to read in input string. 1529 */ 1530 static int 1531 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1532 { 1533 char *str; 1534 int error; 1535 1536 ASSERT_RACCT_ENABLED(); 1537 1538 if (inbuflen <= 0) 1539 return (EINVAL); 1540 if (inbuflen > RCTL_MAX_INBUFSIZE) 1541 return (E2BIG); 1542 1543 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1544 error = copyinstr(inbufp, str, inbuflen, NULL); 1545 if (error != 0) { 1546 free(str, M_RCTL); 1547 return (error); 1548 } 1549 1550 *inputstr = str; 1551 1552 return (0); 1553 } 1554 1555 /* 1556 * Routine used by RCTL syscalls to write out output string. 1557 */ 1558 static int 1559 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1560 { 1561 int error; 1562 1563 ASSERT_RACCT_ENABLED(); 1564 1565 if (outputsbuf == NULL) 1566 return (0); 1567 1568 sbuf_finish(outputsbuf); 1569 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1570 sbuf_delete(outputsbuf); 1571 return (ERANGE); 1572 } 1573 error = copyout(sbuf_data(outputsbuf), outbufp, 1574 sbuf_len(outputsbuf) + 1); 1575 sbuf_delete(outputsbuf); 1576 return (error); 1577 } 1578 1579 static struct sbuf * 1580 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1581 { 1582 struct sbuf *sb; 1583 int64_t amount; 1584 int i; 1585 1586 ASSERT_RACCT_ENABLED(); 1587 1588 sb = sbuf_new_auto(); 1589 for (i = 0; i <= RACCT_MAX; i++) { 1590 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1591 continue; 1592 RACCT_LOCK(); 1593 amount = racct->r_resources[i]; 1594 RACCT_UNLOCK(); 1595 if (RACCT_IS_IN_MILLIONS(i)) 1596 amount /= 1000000; 1597 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1598 } 1599 sbuf_setpos(sb, sbuf_len(sb) - 1); 1600 return (sb); 1601 } 1602 1603 int 1604 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1605 { 1606 struct rctl_rule *filter; 1607 struct sbuf *outputsbuf = NULL; 1608 struct proc *p; 1609 struct uidinfo *uip; 1610 struct loginclass *lc; 1611 struct prison_racct *prr; 1612 char *inputstr; 1613 int error; 1614 1615 if (!racct_enable) 1616 return (ENOSYS); 1617 1618 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1619 if (error != 0) 1620 return (error); 1621 1622 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1623 if (error != 0) 1624 return (error); 1625 1626 sx_slock(&allproc_lock); 1627 error = rctl_string_to_rule(inputstr, &filter); 1628 free(inputstr, M_RCTL); 1629 if (error != 0) { 1630 sx_sunlock(&allproc_lock); 1631 return (error); 1632 } 1633 1634 switch (filter->rr_subject_type) { 1635 case RCTL_SUBJECT_TYPE_PROCESS: 1636 p = filter->rr_subject.rs_proc; 1637 if (p == NULL) { 1638 error = EINVAL; 1639 goto out; 1640 } 1641 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1642 break; 1643 case RCTL_SUBJECT_TYPE_USER: 1644 uip = filter->rr_subject.rs_uip; 1645 if (uip == NULL) { 1646 error = EINVAL; 1647 goto out; 1648 } 1649 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1650 break; 1651 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1652 lc = filter->rr_subject.rs_loginclass; 1653 if (lc == NULL) { 1654 error = EINVAL; 1655 goto out; 1656 } 1657 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1658 break; 1659 case RCTL_SUBJECT_TYPE_JAIL: 1660 prr = filter->rr_subject.rs_prison_racct; 1661 if (prr == NULL) { 1662 error = EINVAL; 1663 goto out; 1664 } 1665 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1666 break; 1667 default: 1668 error = EINVAL; 1669 } 1670 out: 1671 rctl_rule_release(filter); 1672 sx_sunlock(&allproc_lock); 1673 if (error != 0) 1674 return (error); 1675 1676 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1677 1678 return (error); 1679 } 1680 1681 static void 1682 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1683 { 1684 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1685 struct rctl_rule_link *link; 1686 struct sbuf *sb = (struct sbuf *)arg3; 1687 1688 ASSERT_RACCT_ENABLED(); 1689 RACCT_LOCK_ASSERT(); 1690 1691 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1692 if (!rctl_rule_matches(link->rrl_rule, filter)) 1693 continue; 1694 rctl_rule_to_sbuf(sb, link->rrl_rule); 1695 sbuf_printf(sb, ","); 1696 } 1697 } 1698 1699 int 1700 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1701 { 1702 struct sbuf *sb; 1703 struct rctl_rule *filter; 1704 struct rctl_rule_link *link; 1705 struct proc *p; 1706 char *inputstr, *buf; 1707 size_t bufsize; 1708 int error; 1709 1710 if (!racct_enable) 1711 return (ENOSYS); 1712 1713 error = priv_check(td, PRIV_RCTL_GET_RULES); 1714 if (error != 0) 1715 return (error); 1716 1717 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1718 if (error != 0) 1719 return (error); 1720 1721 sx_slock(&allproc_lock); 1722 error = rctl_string_to_rule(inputstr, &filter); 1723 free(inputstr, M_RCTL); 1724 if (error != 0) { 1725 sx_sunlock(&allproc_lock); 1726 return (error); 1727 } 1728 1729 bufsize = uap->outbuflen; 1730 if (bufsize > rctl_maxbufsize) { 1731 sx_sunlock(&allproc_lock); 1732 return (E2BIG); 1733 } 1734 1735 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1736 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1737 KASSERT(sb != NULL, ("sbuf_new failed")); 1738 1739 FOREACH_PROC_IN_SYSTEM(p) { 1740 RACCT_LOCK(); 1741 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1742 /* 1743 * Non-process rules will be added to the buffer later. 1744 * Adding them here would result in duplicated output. 1745 */ 1746 if (link->rrl_rule->rr_subject_type != 1747 RCTL_SUBJECT_TYPE_PROCESS) 1748 continue; 1749 if (!rctl_rule_matches(link->rrl_rule, filter)) 1750 continue; 1751 rctl_rule_to_sbuf(sb, link->rrl_rule); 1752 sbuf_printf(sb, ","); 1753 } 1754 RACCT_UNLOCK(); 1755 } 1756 1757 loginclass_racct_foreach(rctl_get_rules_callback, 1758 rctl_rule_pre_callback, rctl_rule_post_callback, 1759 filter, sb); 1760 ui_racct_foreach(rctl_get_rules_callback, 1761 rctl_rule_pre_callback, rctl_rule_post_callback, 1762 filter, sb); 1763 prison_racct_foreach(rctl_get_rules_callback, 1764 rctl_rule_pre_callback, rctl_rule_post_callback, 1765 filter, sb); 1766 if (sbuf_error(sb) == ENOMEM) { 1767 error = ERANGE; 1768 goto out; 1769 } 1770 1771 /* 1772 * Remove trailing ",". 1773 */ 1774 if (sbuf_len(sb) > 0) 1775 sbuf_setpos(sb, sbuf_len(sb) - 1); 1776 1777 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1778 out: 1779 rctl_rule_release(filter); 1780 sx_sunlock(&allproc_lock); 1781 free(buf, M_RCTL); 1782 return (error); 1783 } 1784 1785 int 1786 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1787 { 1788 struct sbuf *sb; 1789 struct rctl_rule *filter; 1790 struct rctl_rule_link *link; 1791 char *inputstr, *buf; 1792 size_t bufsize; 1793 int error; 1794 1795 if (!racct_enable) 1796 return (ENOSYS); 1797 1798 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1799 if (error != 0) 1800 return (error); 1801 1802 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1803 if (error != 0) 1804 return (error); 1805 1806 sx_slock(&allproc_lock); 1807 error = rctl_string_to_rule(inputstr, &filter); 1808 free(inputstr, M_RCTL); 1809 if (error != 0) { 1810 sx_sunlock(&allproc_lock); 1811 return (error); 1812 } 1813 1814 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1815 rctl_rule_release(filter); 1816 sx_sunlock(&allproc_lock); 1817 return (EINVAL); 1818 } 1819 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1820 rctl_rule_release(filter); 1821 sx_sunlock(&allproc_lock); 1822 return (EOPNOTSUPP); 1823 } 1824 if (filter->rr_subject.rs_proc == NULL) { 1825 rctl_rule_release(filter); 1826 sx_sunlock(&allproc_lock); 1827 return (EINVAL); 1828 } 1829 1830 bufsize = uap->outbuflen; 1831 if (bufsize > rctl_maxbufsize) { 1832 rctl_rule_release(filter); 1833 sx_sunlock(&allproc_lock); 1834 return (E2BIG); 1835 } 1836 1837 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1838 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1839 KASSERT(sb != NULL, ("sbuf_new failed")); 1840 1841 RACCT_LOCK(); 1842 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1843 rrl_next) { 1844 rctl_rule_to_sbuf(sb, link->rrl_rule); 1845 sbuf_printf(sb, ","); 1846 } 1847 RACCT_UNLOCK(); 1848 if (sbuf_error(sb) == ENOMEM) { 1849 error = ERANGE; 1850 sbuf_delete(sb); 1851 goto out; 1852 } 1853 1854 /* 1855 * Remove trailing ",". 1856 */ 1857 if (sbuf_len(sb) > 0) 1858 sbuf_setpos(sb, sbuf_len(sb) - 1); 1859 1860 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1861 out: 1862 rctl_rule_release(filter); 1863 sx_sunlock(&allproc_lock); 1864 free(buf, M_RCTL); 1865 return (error); 1866 } 1867 1868 int 1869 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1870 { 1871 struct rctl_rule *rule; 1872 char *inputstr; 1873 int error; 1874 1875 if (!racct_enable) 1876 return (ENOSYS); 1877 1878 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1879 if (error != 0) 1880 return (error); 1881 1882 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1883 if (error != 0) 1884 return (error); 1885 1886 sx_slock(&allproc_lock); 1887 error = rctl_string_to_rule(inputstr, &rule); 1888 free(inputstr, M_RCTL); 1889 if (error != 0) { 1890 sx_sunlock(&allproc_lock); 1891 return (error); 1892 } 1893 /* 1894 * The 'per' part of a rule is optional. 1895 */ 1896 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1897 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1898 rule->rr_per = rule->rr_subject_type; 1899 1900 if (!rctl_rule_fully_specified(rule)) { 1901 error = EINVAL; 1902 goto out; 1903 } 1904 1905 error = rctl_rule_add(rule); 1906 1907 out: 1908 rctl_rule_release(rule); 1909 sx_sunlock(&allproc_lock); 1910 return (error); 1911 } 1912 1913 int 1914 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1915 { 1916 struct rctl_rule *filter; 1917 char *inputstr; 1918 int error; 1919 1920 if (!racct_enable) 1921 return (ENOSYS); 1922 1923 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1924 if (error != 0) 1925 return (error); 1926 1927 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1928 if (error != 0) 1929 return (error); 1930 1931 sx_slock(&allproc_lock); 1932 error = rctl_string_to_rule(inputstr, &filter); 1933 free(inputstr, M_RCTL); 1934 if (error != 0) { 1935 sx_sunlock(&allproc_lock); 1936 return (error); 1937 } 1938 1939 error = rctl_rule_remove(filter); 1940 rctl_rule_release(filter); 1941 sx_sunlock(&allproc_lock); 1942 1943 return (error); 1944 } 1945 1946 /* 1947 * Update RCTL rule list after credential change. 1948 */ 1949 void 1950 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1951 { 1952 LIST_HEAD(, rctl_rule_link) newrules; 1953 struct rctl_rule_link *link, *newlink; 1954 struct uidinfo *newuip; 1955 struct loginclass *newlc; 1956 struct prison_racct *newprr; 1957 int rulecnt, i; 1958 1959 if (!racct_enable) 1960 return; 1961 1962 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1963 1964 newuip = newcred->cr_ruidinfo; 1965 newlc = newcred->cr_loginclass; 1966 newprr = newcred->cr_prison->pr_prison_racct; 1967 1968 LIST_INIT(&newrules); 1969 1970 again: 1971 /* 1972 * First, count the rules that apply to the process with new 1973 * credentials. 1974 */ 1975 rulecnt = 0; 1976 RACCT_LOCK(); 1977 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1978 if (link->rrl_rule->rr_subject_type == 1979 RCTL_SUBJECT_TYPE_PROCESS) 1980 rulecnt++; 1981 } 1982 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1983 rulecnt++; 1984 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1985 rulecnt++; 1986 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1987 rulecnt++; 1988 RACCT_UNLOCK(); 1989 1990 /* 1991 * Create temporary list. We've dropped the rctl_lock in order 1992 * to use M_WAITOK. 1993 */ 1994 for (i = 0; i < rulecnt; i++) { 1995 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 1996 newlink->rrl_rule = NULL; 1997 newlink->rrl_exceeded = 0; 1998 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 1999 } 2000 2001 newlink = LIST_FIRST(&newrules); 2002 2003 /* 2004 * Assign rules to the newly allocated list entries. 2005 */ 2006 RACCT_LOCK(); 2007 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2008 if (link->rrl_rule->rr_subject_type == 2009 RCTL_SUBJECT_TYPE_PROCESS) { 2010 if (newlink == NULL) 2011 goto goaround; 2012 rctl_rule_acquire(link->rrl_rule); 2013 newlink->rrl_rule = link->rrl_rule; 2014 newlink->rrl_exceeded = link->rrl_exceeded; 2015 newlink = LIST_NEXT(newlink, rrl_next); 2016 rulecnt--; 2017 } 2018 } 2019 2020 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2021 if (newlink == NULL) 2022 goto goaround; 2023 rctl_rule_acquire(link->rrl_rule); 2024 newlink->rrl_rule = link->rrl_rule; 2025 newlink->rrl_exceeded = link->rrl_exceeded; 2026 newlink = LIST_NEXT(newlink, rrl_next); 2027 rulecnt--; 2028 } 2029 2030 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2031 if (newlink == NULL) 2032 goto goaround; 2033 rctl_rule_acquire(link->rrl_rule); 2034 newlink->rrl_rule = link->rrl_rule; 2035 newlink->rrl_exceeded = link->rrl_exceeded; 2036 newlink = LIST_NEXT(newlink, rrl_next); 2037 rulecnt--; 2038 } 2039 2040 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2041 if (newlink == NULL) 2042 goto goaround; 2043 rctl_rule_acquire(link->rrl_rule); 2044 newlink->rrl_rule = link->rrl_rule; 2045 newlink->rrl_exceeded = link->rrl_exceeded; 2046 newlink = LIST_NEXT(newlink, rrl_next); 2047 rulecnt--; 2048 } 2049 2050 if (rulecnt == 0) { 2051 /* 2052 * Free the old rule list. 2053 */ 2054 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2055 link = LIST_FIRST(&p->p_racct->r_rule_links); 2056 LIST_REMOVE(link, rrl_next); 2057 rctl_rule_release(link->rrl_rule); 2058 uma_zfree(rctl_rule_link_zone, link); 2059 } 2060 2061 /* 2062 * Replace lists and we're done. 2063 * 2064 * XXX: Is there any way to switch list heads instead 2065 * of iterating here? 2066 */ 2067 while (!LIST_EMPTY(&newrules)) { 2068 newlink = LIST_FIRST(&newrules); 2069 LIST_REMOVE(newlink, rrl_next); 2070 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2071 newlink, rrl_next); 2072 } 2073 2074 RACCT_UNLOCK(); 2075 2076 return; 2077 } 2078 2079 goaround: 2080 RACCT_UNLOCK(); 2081 2082 /* 2083 * Rule list changed while we were not holding the rctl_lock. 2084 * Free the new list and try again. 2085 */ 2086 while (!LIST_EMPTY(&newrules)) { 2087 newlink = LIST_FIRST(&newrules); 2088 LIST_REMOVE(newlink, rrl_next); 2089 if (newlink->rrl_rule != NULL) 2090 rctl_rule_release(newlink->rrl_rule); 2091 uma_zfree(rctl_rule_link_zone, newlink); 2092 } 2093 2094 goto again; 2095 } 2096 2097 /* 2098 * Assign RCTL rules to the newly created process. 2099 */ 2100 int 2101 rctl_proc_fork(struct proc *parent, struct proc *child) 2102 { 2103 struct rctl_rule *rule; 2104 struct rctl_rule_link *link; 2105 int error; 2106 2107 ASSERT_RACCT_ENABLED(); 2108 RACCT_LOCK_ASSERT(); 2109 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2110 2111 LIST_INIT(&child->p_racct->r_rule_links); 2112 2113 /* 2114 * Go through limits applicable to the parent and assign them 2115 * to the child. Rules with 'process' subject have to be duplicated 2116 * in order to make their rr_subject point to the new process. 2117 */ 2118 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2119 if (link->rrl_rule->rr_subject_type == 2120 RCTL_SUBJECT_TYPE_PROCESS) { 2121 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2122 if (rule == NULL) 2123 goto fail; 2124 KASSERT(rule->rr_subject.rs_proc == parent, 2125 ("rule->rr_subject.rs_proc != parent")); 2126 rule->rr_subject.rs_proc = child; 2127 error = rctl_racct_add_rule_locked(child->p_racct, 2128 rule); 2129 rctl_rule_release(rule); 2130 if (error != 0) 2131 goto fail; 2132 } else { 2133 error = rctl_racct_add_rule_locked(child->p_racct, 2134 link->rrl_rule); 2135 if (error != 0) 2136 goto fail; 2137 } 2138 } 2139 2140 return (0); 2141 2142 fail: 2143 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2144 link = LIST_FIRST(&child->p_racct->r_rule_links); 2145 LIST_REMOVE(link, rrl_next); 2146 rctl_rule_release(link->rrl_rule); 2147 uma_zfree(rctl_rule_link_zone, link); 2148 } 2149 2150 return (EAGAIN); 2151 } 2152 2153 /* 2154 * Release rules attached to the racct. 2155 */ 2156 void 2157 rctl_racct_release(struct racct *racct) 2158 { 2159 struct rctl_rule_link *link; 2160 2161 ASSERT_RACCT_ENABLED(); 2162 RACCT_LOCK_ASSERT(); 2163 2164 while (!LIST_EMPTY(&racct->r_rule_links)) { 2165 link = LIST_FIRST(&racct->r_rule_links); 2166 LIST_REMOVE(link, rrl_next); 2167 rctl_rule_release(link->rrl_rule); 2168 uma_zfree(rctl_rule_link_zone, link); 2169 } 2170 } 2171 2172 static void 2173 rctl_init(void) 2174 { 2175 2176 if (!racct_enable) 2177 return; 2178 2179 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2180 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2181 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2182 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2183 UMA_ALIGN_PTR, 0); 2184 2185 /* 2186 * Set default values, making sure not to overwrite the ones 2187 * fetched from tunables. Most of those could be set at the 2188 * declaration, except for the rctl_throttle_max - we cannot 2189 * set it there due to hz not being compile time constant. 2190 */ 2191 if (rctl_throttle_min < 1) 2192 rctl_throttle_min = 1; 2193 if (rctl_throttle_max < rctl_throttle_min) 2194 rctl_throttle_max = 2 * hz; 2195 if (rctl_throttle_pct < 0) 2196 rctl_throttle_pct = 100; 2197 if (rctl_throttle_pct2 < 0) 2198 rctl_throttle_pct2 = 100; 2199 } 2200 2201 #else /* !RCTL */ 2202 2203 int 2204 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2205 { 2206 2207 return (ENOSYS); 2208 } 2209 2210 int 2211 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2212 { 2213 2214 return (ENOSYS); 2215 } 2216 2217 int 2218 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2219 { 2220 2221 return (ENOSYS); 2222 } 2223 2224 int 2225 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2226 { 2227 2228 return (ENOSYS); 2229 } 2230 2231 int 2232 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2233 { 2234 2235 return (ENOSYS); 2236 } 2237 2238 #endif /* !RCTL */ 2239