1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed by Edward Tomasz Napierala under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $FreeBSD$ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/bus.h> 39 #include <sys/malloc.h> 40 #include <sys/queue.h> 41 #include <sys/refcount.h> 42 #include <sys/jail.h> 43 #include <sys/kernel.h> 44 #include <sys/limits.h> 45 #include <sys/loginclass.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/racct.h> 49 #include <sys/rctl.h> 50 #include <sys/resourcevar.h> 51 #include <sys/sx.h> 52 #include <sys/sysent.h> 53 #include <sys/sysproto.h> 54 #include <sys/systm.h> 55 #include <sys/types.h> 56 #include <sys/eventhandler.h> 57 #include <sys/lock.h> 58 #include <sys/mutex.h> 59 #include <sys/rwlock.h> 60 #include <sys/sbuf.h> 61 #include <sys/taskqueue.h> 62 #include <sys/tree.h> 63 #include <vm/uma.h> 64 65 #ifdef RCTL 66 #ifndef RACCT 67 #error "The RCTL option requires the RACCT option" 68 #endif 69 70 FEATURE(rctl, "Resource Limits"); 71 72 #define HRF_DEFAULT 0 73 #define HRF_DONT_INHERIT 1 74 #define HRF_DONT_ACCUMULATE 2 75 76 #define RCTL_MAX_INBUFSIZE 4 * 1024 77 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 78 #define RCTL_LOG_BUFSIZE 128 79 80 #define RCTL_PCPU_SHIFT (10 * 1000000) 81 82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 83 static int rctl_log_rate_limit = 10; 84 static int rctl_devctl_rate_limit = 10; 85 86 /* 87 * Values below are initialized in rctl_init(). 88 */ 89 static int rctl_throttle_min = -1; 90 static int rctl_throttle_max = -1; 91 static int rctl_throttle_pct = -1; 92 static int rctl_throttle_pct2 = -1; 93 94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 98 99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 100 "Resource Limits"); 101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 102 &rctl_maxbufsize, 0, "Maximum output buffer size"); 103 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 104 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 105 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 106 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 107 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 108 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 109 &rctl_throttle_min_sysctl, "IU", 110 "Shortest throttling duration, in hz"); 111 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 113 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 114 &rctl_throttle_max_sysctl, "IU", 115 "Longest throttling duration, in hz"); 116 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 117 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 118 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 119 &rctl_throttle_pct_sysctl, "IU", 120 "Throttling penalty for process consumption, in percent"); 121 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 122 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 123 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 124 &rctl_throttle_pct2_sysctl, "IU", 125 "Throttling penalty for container consumption, in percent"); 126 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 127 128 /* 129 * 'rctl_rule_link' connects a rule with every racct it's related to. 130 * For example, rule 'user:X:openfiles:deny=N/process' is linked 131 * with uidinfo for user X, and to each process of that user. 132 */ 133 struct rctl_rule_link { 134 LIST_ENTRY(rctl_rule_link) rrl_next; 135 struct rctl_rule *rrl_rule; 136 int rrl_exceeded; 137 }; 138 139 struct dict { 140 const char *d_name; 141 int d_value; 142 }; 143 144 static struct dict subjectnames[] = { 145 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 146 { "user", RCTL_SUBJECT_TYPE_USER }, 147 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 148 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 149 { NULL, -1 }}; 150 151 static struct dict resourcenames[] = { 152 { "cputime", RACCT_CPU }, 153 { "datasize", RACCT_DATA }, 154 { "stacksize", RACCT_STACK }, 155 { "coredumpsize", RACCT_CORE }, 156 { "memoryuse", RACCT_RSS }, 157 { "memorylocked", RACCT_MEMLOCK }, 158 { "maxproc", RACCT_NPROC }, 159 { "openfiles", RACCT_NOFILE }, 160 { "vmemoryuse", RACCT_VMEM }, 161 { "pseudoterminals", RACCT_NPTS }, 162 { "swapuse", RACCT_SWAP }, 163 { "nthr", RACCT_NTHR }, 164 { "msgqqueued", RACCT_MSGQQUEUED }, 165 { "msgqsize", RACCT_MSGQSIZE }, 166 { "nmsgq", RACCT_NMSGQ }, 167 { "nsem", RACCT_NSEM }, 168 { "nsemop", RACCT_NSEMOP }, 169 { "nshm", RACCT_NSHM }, 170 { "shmsize", RACCT_SHMSIZE }, 171 { "wallclock", RACCT_WALLCLOCK }, 172 { "pcpu", RACCT_PCTCPU }, 173 { "readbps", RACCT_READBPS }, 174 { "writebps", RACCT_WRITEBPS }, 175 { "readiops", RACCT_READIOPS }, 176 { "writeiops", RACCT_WRITEIOPS }, 177 { NULL, -1 }}; 178 179 static struct dict actionnames[] = { 180 { "sighup", RCTL_ACTION_SIGHUP }, 181 { "sigint", RCTL_ACTION_SIGINT }, 182 { "sigquit", RCTL_ACTION_SIGQUIT }, 183 { "sigill", RCTL_ACTION_SIGILL }, 184 { "sigtrap", RCTL_ACTION_SIGTRAP }, 185 { "sigabrt", RCTL_ACTION_SIGABRT }, 186 { "sigemt", RCTL_ACTION_SIGEMT }, 187 { "sigfpe", RCTL_ACTION_SIGFPE }, 188 { "sigkill", RCTL_ACTION_SIGKILL }, 189 { "sigbus", RCTL_ACTION_SIGBUS }, 190 { "sigsegv", RCTL_ACTION_SIGSEGV }, 191 { "sigsys", RCTL_ACTION_SIGSYS }, 192 { "sigpipe", RCTL_ACTION_SIGPIPE }, 193 { "sigalrm", RCTL_ACTION_SIGALRM }, 194 { "sigterm", RCTL_ACTION_SIGTERM }, 195 { "sigurg", RCTL_ACTION_SIGURG }, 196 { "sigstop", RCTL_ACTION_SIGSTOP }, 197 { "sigtstp", RCTL_ACTION_SIGTSTP }, 198 { "sigchld", RCTL_ACTION_SIGCHLD }, 199 { "sigttin", RCTL_ACTION_SIGTTIN }, 200 { "sigttou", RCTL_ACTION_SIGTTOU }, 201 { "sigio", RCTL_ACTION_SIGIO }, 202 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 203 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 204 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 205 { "sigprof", RCTL_ACTION_SIGPROF }, 206 { "sigwinch", RCTL_ACTION_SIGWINCH }, 207 { "siginfo", RCTL_ACTION_SIGINFO }, 208 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 209 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 210 { "sigthr", RCTL_ACTION_SIGTHR }, 211 { "deny", RCTL_ACTION_DENY }, 212 { "log", RCTL_ACTION_LOG }, 213 { "devctl", RCTL_ACTION_DEVCTL }, 214 { "throttle", RCTL_ACTION_THROTTLE }, 215 { NULL, -1 }}; 216 217 static void rctl_init(void); 218 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 219 220 static uma_zone_t rctl_rule_zone; 221 static uma_zone_t rctl_rule_link_zone; 222 223 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 224 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 225 226 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 227 228 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 229 { 230 int error, val = rctl_throttle_min; 231 232 error = sysctl_handle_int(oidp, &val, 0, req); 233 if (error || !req->newptr) 234 return (error); 235 if (val < 1 || val > rctl_throttle_max) 236 return (EINVAL); 237 238 RACCT_LOCK(); 239 rctl_throttle_min = val; 240 RACCT_UNLOCK(); 241 242 return (0); 243 } 244 245 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 246 { 247 int error, val = rctl_throttle_max; 248 249 error = sysctl_handle_int(oidp, &val, 0, req); 250 if (error || !req->newptr) 251 return (error); 252 if (val < rctl_throttle_min) 253 return (EINVAL); 254 255 RACCT_LOCK(); 256 rctl_throttle_max = val; 257 RACCT_UNLOCK(); 258 259 return (0); 260 } 261 262 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 263 { 264 int error, val = rctl_throttle_pct; 265 266 error = sysctl_handle_int(oidp, &val, 0, req); 267 if (error || !req->newptr) 268 return (error); 269 if (val < 0) 270 return (EINVAL); 271 272 RACCT_LOCK(); 273 rctl_throttle_pct = val; 274 RACCT_UNLOCK(); 275 276 return (0); 277 } 278 279 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 280 { 281 int error, val = rctl_throttle_pct2; 282 283 error = sysctl_handle_int(oidp, &val, 0, req); 284 if (error || !req->newptr) 285 return (error); 286 if (val < 0) 287 return (EINVAL); 288 289 RACCT_LOCK(); 290 rctl_throttle_pct2 = val; 291 RACCT_UNLOCK(); 292 293 return (0); 294 } 295 296 static const char * 297 rctl_subject_type_name(int subject) 298 { 299 int i; 300 301 for (i = 0; subjectnames[i].d_name != NULL; i++) { 302 if (subjectnames[i].d_value == subject) 303 return (subjectnames[i].d_name); 304 } 305 306 panic("rctl_subject_type_name: unknown subject type %d", subject); 307 } 308 309 static const char * 310 rctl_action_name(int action) 311 { 312 int i; 313 314 for (i = 0; actionnames[i].d_name != NULL; i++) { 315 if (actionnames[i].d_value == action) 316 return (actionnames[i].d_name); 317 } 318 319 panic("rctl_action_name: unknown action %d", action); 320 } 321 322 const char * 323 rctl_resource_name(int resource) 324 { 325 int i; 326 327 for (i = 0; resourcenames[i].d_name != NULL; i++) { 328 if (resourcenames[i].d_value == resource) 329 return (resourcenames[i].d_name); 330 } 331 332 panic("rctl_resource_name: unknown resource %d", resource); 333 } 334 335 static struct racct * 336 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 337 { 338 struct ucred *cred = p->p_ucred; 339 340 ASSERT_RACCT_ENABLED(); 341 RACCT_LOCK_ASSERT(); 342 343 switch (rule->rr_per) { 344 case RCTL_SUBJECT_TYPE_PROCESS: 345 return (p->p_racct); 346 case RCTL_SUBJECT_TYPE_USER: 347 return (cred->cr_ruidinfo->ui_racct); 348 case RCTL_SUBJECT_TYPE_LOGINCLASS: 349 return (cred->cr_loginclass->lc_racct); 350 case RCTL_SUBJECT_TYPE_JAIL: 351 return (cred->cr_prison->pr_prison_racct->prr_racct); 352 default: 353 panic("%s: unknown per %d", __func__, rule->rr_per); 354 } 355 } 356 357 /* 358 * Return the amount of resource that can be allocated by 'p' before 359 * hitting 'rule'. 360 */ 361 static int64_t 362 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 363 { 364 const struct racct *racct; 365 int64_t available; 366 367 ASSERT_RACCT_ENABLED(); 368 RACCT_LOCK_ASSERT(); 369 370 racct = rctl_proc_rule_to_racct(p, rule); 371 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 372 373 return (available); 374 } 375 376 /* 377 * Called every second for proc, uidinfo, loginclass, and jail containers. 378 * If the limit isn't exceeded, it decreases the usage amount to zero. 379 * Otherwise, it decreases it by the value of the limit. This way 380 * resource consumption exceeding the limit "carries over" to the next 381 * period. 382 */ 383 void 384 rctl_throttle_decay(struct racct *racct, int resource) 385 { 386 struct rctl_rule *rule; 387 struct rctl_rule_link *link; 388 int64_t minavailable; 389 390 ASSERT_RACCT_ENABLED(); 391 RACCT_LOCK_ASSERT(); 392 393 minavailable = INT64_MAX; 394 395 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 396 rule = link->rrl_rule; 397 398 if (rule->rr_resource != resource) 399 continue; 400 if (rule->rr_action != RCTL_ACTION_THROTTLE) 401 continue; 402 403 if (rule->rr_amount < minavailable) 404 minavailable = rule->rr_amount; 405 } 406 407 if (racct->r_resources[resource] < minavailable) { 408 racct->r_resources[resource] = 0; 409 } else { 410 /* 411 * Cap utilization counter at ten times the limit. Otherwise, 412 * if we changed the rule lowering the allowed amount, it could 413 * take unreasonably long time for the accumulated resource 414 * usage to drop. 415 */ 416 if (racct->r_resources[resource] > minavailable * 10) 417 racct->r_resources[resource] = minavailable * 10; 418 419 racct->r_resources[resource] -= minavailable; 420 } 421 } 422 423 /* 424 * Special version of rctl_get_available() for the %CPU resource. 425 * We slightly cheat here and return less than we normally would. 426 */ 427 int64_t 428 rctl_pcpu_available(const struct proc *p) { 429 struct rctl_rule *rule; 430 struct rctl_rule_link *link; 431 int64_t available, minavailable, limit; 432 433 ASSERT_RACCT_ENABLED(); 434 RACCT_LOCK_ASSERT(); 435 436 minavailable = INT64_MAX; 437 limit = 0; 438 439 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 440 rule = link->rrl_rule; 441 if (rule->rr_resource != RACCT_PCTCPU) 442 continue; 443 if (rule->rr_action != RCTL_ACTION_DENY) 444 continue; 445 available = rctl_available_resource(p, rule); 446 if (available < minavailable) { 447 minavailable = available; 448 limit = rule->rr_amount; 449 } 450 } 451 452 /* 453 * Return slightly less than actual value of the available 454 * %cpu resource. This makes %cpu throttling more aggressive 455 * and lets us act sooner than the limits are already exceeded. 456 */ 457 if (limit != 0) { 458 if (limit > 2 * RCTL_PCPU_SHIFT) 459 minavailable -= RCTL_PCPU_SHIFT; 460 else 461 minavailable -= (limit / 2); 462 } 463 464 return (minavailable); 465 } 466 467 static uint64_t 468 xadd(uint64_t a, uint64_t b) 469 { 470 uint64_t c; 471 472 c = a + b; 473 474 /* 475 * Detect overflow. 476 */ 477 if (c < a || c < b) 478 return (UINT64_MAX); 479 480 return (c); 481 } 482 483 static uint64_t 484 xmul(uint64_t a, uint64_t b) 485 { 486 487 if (b != 0 && a > UINT64_MAX / b) 488 return (UINT64_MAX); 489 490 return (a * b); 491 } 492 493 /* 494 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 495 * to what it keeps allocated now. Returns non-zero if the allocation should 496 * be denied, 0 otherwise. 497 */ 498 int 499 rctl_enforce(struct proc *p, int resource, uint64_t amount) 500 { 501 static struct timeval log_lasttime, devctl_lasttime; 502 static int log_curtime = 0, devctl_curtime = 0; 503 struct rctl_rule *rule; 504 struct rctl_rule_link *link; 505 struct sbuf sb; 506 char *buf; 507 int64_t available; 508 uint64_t sleep_ms, sleep_ratio; 509 int should_deny = 0; 510 511 ASSERT_RACCT_ENABLED(); 512 RACCT_LOCK_ASSERT(); 513 514 /* 515 * There may be more than one matching rule; go through all of them. 516 * Denial should be done last, after logging and sending signals. 517 */ 518 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 519 rule = link->rrl_rule; 520 if (rule->rr_resource != resource) 521 continue; 522 523 available = rctl_available_resource(p, rule); 524 if (available >= (int64_t)amount) { 525 link->rrl_exceeded = 0; 526 continue; 527 } 528 529 switch (rule->rr_action) { 530 case RCTL_ACTION_DENY: 531 should_deny = 1; 532 continue; 533 case RCTL_ACTION_LOG: 534 /* 535 * If rrl_exceeded != 0, it means we've already 536 * logged a warning for this process. 537 */ 538 if (link->rrl_exceeded != 0) 539 continue; 540 541 /* 542 * If the process state is not fully initialized yet, 543 * we can't access most of the required fields, e.g. 544 * p->p_comm. This happens when called from fork1(). 545 * Ignore this rule for now; it will be processed just 546 * after fork, when called from racct_proc_fork_done(). 547 */ 548 if (p->p_state != PRS_NORMAL) 549 continue; 550 551 if (!ppsratecheck(&log_lasttime, &log_curtime, 552 rctl_log_rate_limit)) 553 continue; 554 555 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 556 if (buf == NULL) { 557 printf("rctl_enforce: out of memory\n"); 558 continue; 559 } 560 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 561 rctl_rule_to_sbuf(&sb, rule); 562 sbuf_finish(&sb); 563 printf("rctl: rule \"%s\" matched by pid %d " 564 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 565 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 566 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 567 sbuf_delete(&sb); 568 free(buf, M_RCTL); 569 link->rrl_exceeded = 1; 570 continue; 571 case RCTL_ACTION_DEVCTL: 572 if (link->rrl_exceeded != 0) 573 continue; 574 575 if (p->p_state != PRS_NORMAL) 576 continue; 577 578 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 579 rctl_devctl_rate_limit)) 580 continue; 581 582 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 583 if (buf == NULL) { 584 printf("rctl_enforce: out of memory\n"); 585 continue; 586 } 587 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 588 sbuf_printf(&sb, "rule="); 589 rctl_rule_to_sbuf(&sb, rule); 590 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 591 p->p_pid, p->p_ucred->cr_ruid, 592 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 593 sbuf_finish(&sb); 594 devctl_notify_f("RCTL", "rule", "matched", 595 sbuf_data(&sb), M_NOWAIT); 596 sbuf_delete(&sb); 597 free(buf, M_RCTL); 598 link->rrl_exceeded = 1; 599 continue; 600 case RCTL_ACTION_THROTTLE: 601 if (p->p_state != PRS_NORMAL) 602 continue; 603 604 /* 605 * Make the process sleep for a fraction of second 606 * proportional to the ratio of process' resource 607 * utilization compared to the limit. The point is 608 * to penalize resource hogs: processes that consume 609 * more of the available resources sleep for longer. 610 * 611 * We're trying to defer division until the very end, 612 * to minimize the rounding effects. The following 613 * calculation could have been written in a clearer 614 * way like this: 615 * 616 * sleep_ms = hz * p->p_racct->r_resources[resource] / 617 * rule->rr_amount; 618 * sleep_ms *= rctl_throttle_pct / 100; 619 * if (sleep_ms < rctl_throttle_min) 620 * sleep_ms = rctl_throttle_min; 621 * 622 */ 623 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 624 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 625 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 626 sleep_ms = rctl_throttle_min * rule->rr_amount; 627 628 /* 629 * Multiply that by the ratio of the resource 630 * consumption for the container compared to the limit, 631 * squared. In other words, a process in a container 632 * that is two times over the limit will be throttled 633 * four times as much for hitting the same rule. The 634 * point is to penalize processes more if the container 635 * itself (eg certain UID or jail) is above the limit. 636 */ 637 if (available < 0) 638 sleep_ratio = -available / rule->rr_amount; 639 else 640 sleep_ratio = 0; 641 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 642 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 643 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 644 645 /* 646 * Finally the division. 647 */ 648 sleep_ms /= rule->rr_amount; 649 650 if (sleep_ms > rctl_throttle_max) 651 sleep_ms = rctl_throttle_max; 652 #if 0 653 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 654 __func__, p->p_pid, p->p_comm, 655 p->p_racct->r_resources[resource], 656 rule->rr_amount, (uintmax_t)sleep_ms, 657 (uintmax_t)sleep_ratio, (intmax_t)available); 658 #endif 659 660 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 661 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 662 racct_proc_throttle(p, sleep_ms); 663 continue; 664 default: 665 if (link->rrl_exceeded != 0) 666 continue; 667 668 if (p->p_state != PRS_NORMAL) 669 continue; 670 671 KASSERT(rule->rr_action > 0 && 672 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 673 ("rctl_enforce: unknown action %d", 674 rule->rr_action)); 675 676 /* 677 * We're using the fact that RCTL_ACTION_SIG* values 678 * are equal to their counterparts from sys/signal.h. 679 */ 680 kern_psignal(p, rule->rr_action); 681 link->rrl_exceeded = 1; 682 continue; 683 } 684 } 685 686 if (should_deny) { 687 /* 688 * Return fake error code; the caller should change it 689 * into one proper for the situation - EFSIZ, ENOMEM etc. 690 */ 691 return (EDOOFUS); 692 } 693 694 return (0); 695 } 696 697 uint64_t 698 rctl_get_limit(struct proc *p, int resource) 699 { 700 struct rctl_rule *rule; 701 struct rctl_rule_link *link; 702 uint64_t amount = UINT64_MAX; 703 704 ASSERT_RACCT_ENABLED(); 705 RACCT_LOCK_ASSERT(); 706 707 /* 708 * There may be more than one matching rule; go through all of them. 709 * Denial should be done last, after logging and sending signals. 710 */ 711 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 712 rule = link->rrl_rule; 713 if (rule->rr_resource != resource) 714 continue; 715 if (rule->rr_action != RCTL_ACTION_DENY) 716 continue; 717 if (rule->rr_amount < amount) 718 amount = rule->rr_amount; 719 } 720 721 return (amount); 722 } 723 724 uint64_t 725 rctl_get_available(struct proc *p, int resource) 726 { 727 struct rctl_rule *rule; 728 struct rctl_rule_link *link; 729 int64_t available, minavailable, allocated; 730 731 minavailable = INT64_MAX; 732 733 ASSERT_RACCT_ENABLED(); 734 RACCT_LOCK_ASSERT(); 735 736 /* 737 * There may be more than one matching rule; go through all of them. 738 * Denial should be done last, after logging and sending signals. 739 */ 740 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 741 rule = link->rrl_rule; 742 if (rule->rr_resource != resource) 743 continue; 744 if (rule->rr_action != RCTL_ACTION_DENY) 745 continue; 746 available = rctl_available_resource(p, rule); 747 if (available < minavailable) 748 minavailable = available; 749 } 750 751 /* 752 * XXX: Think about this _hard_. 753 */ 754 allocated = p->p_racct->r_resources[resource]; 755 if (minavailable < INT64_MAX - allocated) 756 minavailable += allocated; 757 if (minavailable < 0) 758 minavailable = 0; 759 760 return (minavailable); 761 } 762 763 static int 764 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 765 { 766 767 ASSERT_RACCT_ENABLED(); 768 769 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 770 if (rule->rr_subject_type != filter->rr_subject_type) 771 return (0); 772 773 switch (filter->rr_subject_type) { 774 case RCTL_SUBJECT_TYPE_PROCESS: 775 if (filter->rr_subject.rs_proc != NULL && 776 rule->rr_subject.rs_proc != 777 filter->rr_subject.rs_proc) 778 return (0); 779 break; 780 case RCTL_SUBJECT_TYPE_USER: 781 if (filter->rr_subject.rs_uip != NULL && 782 rule->rr_subject.rs_uip != 783 filter->rr_subject.rs_uip) 784 return (0); 785 break; 786 case RCTL_SUBJECT_TYPE_LOGINCLASS: 787 if (filter->rr_subject.rs_loginclass != NULL && 788 rule->rr_subject.rs_loginclass != 789 filter->rr_subject.rs_loginclass) 790 return (0); 791 break; 792 case RCTL_SUBJECT_TYPE_JAIL: 793 if (filter->rr_subject.rs_prison_racct != NULL && 794 rule->rr_subject.rs_prison_racct != 795 filter->rr_subject.rs_prison_racct) 796 return (0); 797 break; 798 default: 799 panic("rctl_rule_matches: unknown subject type %d", 800 filter->rr_subject_type); 801 } 802 } 803 804 if (filter->rr_resource != RACCT_UNDEFINED) { 805 if (rule->rr_resource != filter->rr_resource) 806 return (0); 807 } 808 809 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 810 if (rule->rr_action != filter->rr_action) 811 return (0); 812 } 813 814 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 815 if (rule->rr_amount != filter->rr_amount) 816 return (0); 817 } 818 819 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 820 if (rule->rr_per != filter->rr_per) 821 return (0); 822 } 823 824 return (1); 825 } 826 827 static int 828 str2value(const char *str, int *value, struct dict *table) 829 { 830 int i; 831 832 if (value == NULL) 833 return (EINVAL); 834 835 for (i = 0; table[i].d_name != NULL; i++) { 836 if (strcasecmp(table[i].d_name, str) == 0) { 837 *value = table[i].d_value; 838 return (0); 839 } 840 } 841 842 return (EINVAL); 843 } 844 845 static int 846 str2id(const char *str, id_t *value) 847 { 848 char *end; 849 850 if (str == NULL) 851 return (EINVAL); 852 853 *value = strtoul(str, &end, 10); 854 if ((size_t)(end - str) != strlen(str)) 855 return (EINVAL); 856 857 return (0); 858 } 859 860 static int 861 str2int64(const char *str, int64_t *value) 862 { 863 char *end; 864 865 if (str == NULL) 866 return (EINVAL); 867 868 *value = strtoul(str, &end, 10); 869 if ((size_t)(end - str) != strlen(str)) 870 return (EINVAL); 871 872 if (*value < 0) 873 return (ERANGE); 874 875 return (0); 876 } 877 878 /* 879 * Connect the rule to the racct, increasing refcount for the rule. 880 */ 881 static void 882 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 883 { 884 struct rctl_rule_link *link; 885 886 ASSERT_RACCT_ENABLED(); 887 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 888 889 rctl_rule_acquire(rule); 890 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 891 link->rrl_rule = rule; 892 link->rrl_exceeded = 0; 893 894 RACCT_LOCK(); 895 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 896 RACCT_UNLOCK(); 897 } 898 899 static int 900 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 901 { 902 struct rctl_rule_link *link; 903 904 ASSERT_RACCT_ENABLED(); 905 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 906 RACCT_LOCK_ASSERT(); 907 908 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 909 if (link == NULL) 910 return (ENOMEM); 911 rctl_rule_acquire(rule); 912 link->rrl_rule = rule; 913 link->rrl_exceeded = 0; 914 915 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 916 917 return (0); 918 } 919 920 /* 921 * Remove limits for a rules matching the filter and release 922 * the refcounts for the rules, possibly freeing them. Returns 923 * the number of limit structures removed. 924 */ 925 static int 926 rctl_racct_remove_rules(struct racct *racct, 927 const struct rctl_rule *filter) 928 { 929 struct rctl_rule_link *link, *linktmp; 930 int removed = 0; 931 932 ASSERT_RACCT_ENABLED(); 933 RACCT_LOCK_ASSERT(); 934 935 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 936 if (!rctl_rule_matches(link->rrl_rule, filter)) 937 continue; 938 939 LIST_REMOVE(link, rrl_next); 940 rctl_rule_release(link->rrl_rule); 941 uma_zfree(rctl_rule_link_zone, link); 942 removed++; 943 } 944 return (removed); 945 } 946 947 static void 948 rctl_rule_acquire_subject(struct rctl_rule *rule) 949 { 950 951 ASSERT_RACCT_ENABLED(); 952 953 switch (rule->rr_subject_type) { 954 case RCTL_SUBJECT_TYPE_UNDEFINED: 955 case RCTL_SUBJECT_TYPE_PROCESS: 956 break; 957 case RCTL_SUBJECT_TYPE_JAIL: 958 if (rule->rr_subject.rs_prison_racct != NULL) 959 prison_racct_hold(rule->rr_subject.rs_prison_racct); 960 break; 961 case RCTL_SUBJECT_TYPE_USER: 962 if (rule->rr_subject.rs_uip != NULL) 963 uihold(rule->rr_subject.rs_uip); 964 break; 965 case RCTL_SUBJECT_TYPE_LOGINCLASS: 966 if (rule->rr_subject.rs_loginclass != NULL) 967 loginclass_hold(rule->rr_subject.rs_loginclass); 968 break; 969 default: 970 panic("rctl_rule_acquire_subject: unknown subject type %d", 971 rule->rr_subject_type); 972 } 973 } 974 975 static void 976 rctl_rule_release_subject(struct rctl_rule *rule) 977 { 978 979 ASSERT_RACCT_ENABLED(); 980 981 switch (rule->rr_subject_type) { 982 case RCTL_SUBJECT_TYPE_UNDEFINED: 983 case RCTL_SUBJECT_TYPE_PROCESS: 984 break; 985 case RCTL_SUBJECT_TYPE_JAIL: 986 if (rule->rr_subject.rs_prison_racct != NULL) 987 prison_racct_free(rule->rr_subject.rs_prison_racct); 988 break; 989 case RCTL_SUBJECT_TYPE_USER: 990 if (rule->rr_subject.rs_uip != NULL) 991 uifree(rule->rr_subject.rs_uip); 992 break; 993 case RCTL_SUBJECT_TYPE_LOGINCLASS: 994 if (rule->rr_subject.rs_loginclass != NULL) 995 loginclass_free(rule->rr_subject.rs_loginclass); 996 break; 997 default: 998 panic("rctl_rule_release_subject: unknown subject type %d", 999 rule->rr_subject_type); 1000 } 1001 } 1002 1003 struct rctl_rule * 1004 rctl_rule_alloc(int flags) 1005 { 1006 struct rctl_rule *rule; 1007 1008 ASSERT_RACCT_ENABLED(); 1009 1010 rule = uma_zalloc(rctl_rule_zone, flags); 1011 if (rule == NULL) 1012 return (NULL); 1013 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1014 rule->rr_subject.rs_proc = NULL; 1015 rule->rr_subject.rs_uip = NULL; 1016 rule->rr_subject.rs_loginclass = NULL; 1017 rule->rr_subject.rs_prison_racct = NULL; 1018 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1019 rule->rr_resource = RACCT_UNDEFINED; 1020 rule->rr_action = RCTL_ACTION_UNDEFINED; 1021 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1022 refcount_init(&rule->rr_refcount, 1); 1023 1024 return (rule); 1025 } 1026 1027 struct rctl_rule * 1028 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1029 { 1030 struct rctl_rule *copy; 1031 1032 ASSERT_RACCT_ENABLED(); 1033 1034 copy = uma_zalloc(rctl_rule_zone, flags); 1035 if (copy == NULL) 1036 return (NULL); 1037 copy->rr_subject_type = rule->rr_subject_type; 1038 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1039 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1040 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1041 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1042 copy->rr_per = rule->rr_per; 1043 copy->rr_resource = rule->rr_resource; 1044 copy->rr_action = rule->rr_action; 1045 copy->rr_amount = rule->rr_amount; 1046 refcount_init(©->rr_refcount, 1); 1047 rctl_rule_acquire_subject(copy); 1048 1049 return (copy); 1050 } 1051 1052 void 1053 rctl_rule_acquire(struct rctl_rule *rule) 1054 { 1055 1056 ASSERT_RACCT_ENABLED(); 1057 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1058 1059 refcount_acquire(&rule->rr_refcount); 1060 } 1061 1062 static void 1063 rctl_rule_free(void *context, int pending) 1064 { 1065 struct rctl_rule *rule; 1066 1067 rule = (struct rctl_rule *)context; 1068 1069 ASSERT_RACCT_ENABLED(); 1070 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1071 1072 /* 1073 * We don't need locking here; rule is guaranteed to be inaccessible. 1074 */ 1075 1076 rctl_rule_release_subject(rule); 1077 uma_zfree(rctl_rule_zone, rule); 1078 } 1079 1080 void 1081 rctl_rule_release(struct rctl_rule *rule) 1082 { 1083 1084 ASSERT_RACCT_ENABLED(); 1085 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1086 1087 if (refcount_release(&rule->rr_refcount)) { 1088 /* 1089 * rctl_rule_release() is often called when iterating 1090 * over all the uidinfo structures in the system, 1091 * holding uihashtbl_lock. Since rctl_rule_free() 1092 * might end up calling uifree(), this would lead 1093 * to lock recursion. Use taskqueue to avoid this. 1094 */ 1095 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1096 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1097 } 1098 } 1099 1100 static int 1101 rctl_rule_fully_specified(const struct rctl_rule *rule) 1102 { 1103 1104 ASSERT_RACCT_ENABLED(); 1105 1106 switch (rule->rr_subject_type) { 1107 case RCTL_SUBJECT_TYPE_UNDEFINED: 1108 return (0); 1109 case RCTL_SUBJECT_TYPE_PROCESS: 1110 if (rule->rr_subject.rs_proc == NULL) 1111 return (0); 1112 break; 1113 case RCTL_SUBJECT_TYPE_USER: 1114 if (rule->rr_subject.rs_uip == NULL) 1115 return (0); 1116 break; 1117 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1118 if (rule->rr_subject.rs_loginclass == NULL) 1119 return (0); 1120 break; 1121 case RCTL_SUBJECT_TYPE_JAIL: 1122 if (rule->rr_subject.rs_prison_racct == NULL) 1123 return (0); 1124 break; 1125 default: 1126 panic("rctl_rule_fully_specified: unknown subject type %d", 1127 rule->rr_subject_type); 1128 } 1129 if (rule->rr_resource == RACCT_UNDEFINED) 1130 return (0); 1131 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1132 return (0); 1133 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1134 return (0); 1135 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1136 return (0); 1137 1138 return (1); 1139 } 1140 1141 static int 1142 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1143 { 1144 struct rctl_rule *rule; 1145 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1146 *amountstr, *perstr; 1147 id_t id; 1148 int error = 0; 1149 1150 ASSERT_RACCT_ENABLED(); 1151 1152 rule = rctl_rule_alloc(M_WAITOK); 1153 1154 subjectstr = strsep(&rulestr, ":"); 1155 subject_idstr = strsep(&rulestr, ":"); 1156 resourcestr = strsep(&rulestr, ":"); 1157 actionstr = strsep(&rulestr, "=/"); 1158 amountstr = strsep(&rulestr, "/"); 1159 perstr = rulestr; 1160 1161 if (subjectstr == NULL || subjectstr[0] == '\0') 1162 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1163 else { 1164 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1165 if (error != 0) 1166 goto out; 1167 } 1168 1169 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1170 rule->rr_subject.rs_proc = NULL; 1171 rule->rr_subject.rs_uip = NULL; 1172 rule->rr_subject.rs_loginclass = NULL; 1173 rule->rr_subject.rs_prison_racct = NULL; 1174 } else { 1175 switch (rule->rr_subject_type) { 1176 case RCTL_SUBJECT_TYPE_UNDEFINED: 1177 error = EINVAL; 1178 goto out; 1179 case RCTL_SUBJECT_TYPE_PROCESS: 1180 error = str2id(subject_idstr, &id); 1181 if (error != 0) 1182 goto out; 1183 sx_assert(&allproc_lock, SA_LOCKED); 1184 rule->rr_subject.rs_proc = pfind(id); 1185 if (rule->rr_subject.rs_proc == NULL) { 1186 error = ESRCH; 1187 goto out; 1188 } 1189 PROC_UNLOCK(rule->rr_subject.rs_proc); 1190 break; 1191 case RCTL_SUBJECT_TYPE_USER: 1192 error = str2id(subject_idstr, &id); 1193 if (error != 0) 1194 goto out; 1195 rule->rr_subject.rs_uip = uifind(id); 1196 break; 1197 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1198 rule->rr_subject.rs_loginclass = 1199 loginclass_find(subject_idstr); 1200 if (rule->rr_subject.rs_loginclass == NULL) { 1201 error = ENAMETOOLONG; 1202 goto out; 1203 } 1204 break; 1205 case RCTL_SUBJECT_TYPE_JAIL: 1206 rule->rr_subject.rs_prison_racct = 1207 prison_racct_find(subject_idstr); 1208 if (rule->rr_subject.rs_prison_racct == NULL) { 1209 error = ENAMETOOLONG; 1210 goto out; 1211 } 1212 break; 1213 default: 1214 panic("rctl_string_to_rule: unknown subject type %d", 1215 rule->rr_subject_type); 1216 } 1217 } 1218 1219 if (resourcestr == NULL || resourcestr[0] == '\0') 1220 rule->rr_resource = RACCT_UNDEFINED; 1221 else { 1222 error = str2value(resourcestr, &rule->rr_resource, 1223 resourcenames); 1224 if (error != 0) 1225 goto out; 1226 } 1227 1228 if (actionstr == NULL || actionstr[0] == '\0') 1229 rule->rr_action = RCTL_ACTION_UNDEFINED; 1230 else { 1231 error = str2value(actionstr, &rule->rr_action, actionnames); 1232 if (error != 0) 1233 goto out; 1234 } 1235 1236 if (amountstr == NULL || amountstr[0] == '\0') 1237 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1238 else { 1239 error = str2int64(amountstr, &rule->rr_amount); 1240 if (error != 0) 1241 goto out; 1242 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1243 if (rule->rr_amount > INT64_MAX / 1000000) { 1244 error = ERANGE; 1245 goto out; 1246 } 1247 rule->rr_amount *= 1000000; 1248 } 1249 } 1250 1251 if (perstr == NULL || perstr[0] == '\0') 1252 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1253 else { 1254 error = str2value(perstr, &rule->rr_per, subjectnames); 1255 if (error != 0) 1256 goto out; 1257 } 1258 1259 out: 1260 if (error == 0) 1261 *rulep = rule; 1262 else 1263 rctl_rule_release(rule); 1264 1265 return (error); 1266 } 1267 1268 /* 1269 * Link a rule with all the subjects it applies to. 1270 */ 1271 int 1272 rctl_rule_add(struct rctl_rule *rule) 1273 { 1274 struct proc *p; 1275 struct ucred *cred; 1276 struct uidinfo *uip; 1277 struct prison *pr; 1278 struct prison_racct *prr; 1279 struct loginclass *lc; 1280 struct rctl_rule *rule2; 1281 int match; 1282 1283 ASSERT_RACCT_ENABLED(); 1284 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1285 1286 /* 1287 * Some rules just don't make sense, like "deny" rule for an undeniable 1288 * resource. The exception are the RSS and %CPU resources - they are 1289 * not deniable in the racct sense, but the limit is enforced in 1290 * a different way. 1291 */ 1292 if (rule->rr_action == RCTL_ACTION_DENY && 1293 !RACCT_IS_DENIABLE(rule->rr_resource) && 1294 rule->rr_resource != RACCT_RSS && 1295 rule->rr_resource != RACCT_PCTCPU) { 1296 return (EOPNOTSUPP); 1297 } 1298 1299 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1300 !RACCT_IS_DECAYING(rule->rr_resource)) { 1301 return (EOPNOTSUPP); 1302 } 1303 1304 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1305 rule->rr_resource == RACCT_PCTCPU) { 1306 return (EOPNOTSUPP); 1307 } 1308 1309 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1310 RACCT_IS_SLOPPY(rule->rr_resource)) { 1311 return (EOPNOTSUPP); 1312 } 1313 1314 /* 1315 * Make sure there are no duplicated rules. Also, for the "deny" 1316 * rules, remove ones differing only by "amount". 1317 */ 1318 if (rule->rr_action == RCTL_ACTION_DENY) { 1319 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1320 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1321 rctl_rule_remove(rule2); 1322 rctl_rule_release(rule2); 1323 } else 1324 rctl_rule_remove(rule); 1325 1326 switch (rule->rr_subject_type) { 1327 case RCTL_SUBJECT_TYPE_PROCESS: 1328 p = rule->rr_subject.rs_proc; 1329 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1330 1331 rctl_racct_add_rule(p->p_racct, rule); 1332 /* 1333 * In case of per-process rule, we don't have anything more 1334 * to do. 1335 */ 1336 return (0); 1337 1338 case RCTL_SUBJECT_TYPE_USER: 1339 uip = rule->rr_subject.rs_uip; 1340 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1341 rctl_racct_add_rule(uip->ui_racct, rule); 1342 break; 1343 1344 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1345 lc = rule->rr_subject.rs_loginclass; 1346 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1347 rctl_racct_add_rule(lc->lc_racct, rule); 1348 break; 1349 1350 case RCTL_SUBJECT_TYPE_JAIL: 1351 prr = rule->rr_subject.rs_prison_racct; 1352 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1353 rctl_racct_add_rule(prr->prr_racct, rule); 1354 break; 1355 1356 default: 1357 panic("rctl_rule_add: unknown subject type %d", 1358 rule->rr_subject_type); 1359 } 1360 1361 /* 1362 * Now go through all the processes and add the new rule to the ones 1363 * it applies to. 1364 */ 1365 sx_assert(&allproc_lock, SA_LOCKED); 1366 FOREACH_PROC_IN_SYSTEM(p) { 1367 cred = p->p_ucred; 1368 switch (rule->rr_subject_type) { 1369 case RCTL_SUBJECT_TYPE_USER: 1370 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1371 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1372 break; 1373 continue; 1374 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1375 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1376 break; 1377 continue; 1378 case RCTL_SUBJECT_TYPE_JAIL: 1379 match = 0; 1380 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1381 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1382 match = 1; 1383 break; 1384 } 1385 } 1386 if (match) 1387 break; 1388 continue; 1389 default: 1390 panic("rctl_rule_add: unknown subject type %d", 1391 rule->rr_subject_type); 1392 } 1393 1394 rctl_racct_add_rule(p->p_racct, rule); 1395 } 1396 1397 return (0); 1398 } 1399 1400 static void 1401 rctl_rule_pre_callback(void) 1402 { 1403 1404 RACCT_LOCK(); 1405 } 1406 1407 static void 1408 rctl_rule_post_callback(void) 1409 { 1410 1411 RACCT_UNLOCK(); 1412 } 1413 1414 static void 1415 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1416 { 1417 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1418 int found = 0; 1419 1420 ASSERT_RACCT_ENABLED(); 1421 RACCT_LOCK_ASSERT(); 1422 1423 found += rctl_racct_remove_rules(racct, filter); 1424 1425 *((int *)arg3) += found; 1426 } 1427 1428 /* 1429 * Remove all rules that match the filter. 1430 */ 1431 int 1432 rctl_rule_remove(struct rctl_rule *filter) 1433 { 1434 struct proc *p; 1435 int found = 0; 1436 1437 ASSERT_RACCT_ENABLED(); 1438 1439 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1440 filter->rr_subject.rs_proc != NULL) { 1441 p = filter->rr_subject.rs_proc; 1442 RACCT_LOCK(); 1443 found = rctl_racct_remove_rules(p->p_racct, filter); 1444 RACCT_UNLOCK(); 1445 if (found) 1446 return (0); 1447 return (ESRCH); 1448 } 1449 1450 loginclass_racct_foreach(rctl_rule_remove_callback, 1451 rctl_rule_pre_callback, rctl_rule_post_callback, 1452 filter, (void *)&found); 1453 ui_racct_foreach(rctl_rule_remove_callback, 1454 rctl_rule_pre_callback, rctl_rule_post_callback, 1455 filter, (void *)&found); 1456 prison_racct_foreach(rctl_rule_remove_callback, 1457 rctl_rule_pre_callback, rctl_rule_post_callback, 1458 filter, (void *)&found); 1459 1460 sx_assert(&allproc_lock, SA_LOCKED); 1461 RACCT_LOCK(); 1462 FOREACH_PROC_IN_SYSTEM(p) { 1463 found += rctl_racct_remove_rules(p->p_racct, filter); 1464 } 1465 RACCT_UNLOCK(); 1466 1467 if (found) 1468 return (0); 1469 return (ESRCH); 1470 } 1471 1472 /* 1473 * Appends a rule to the sbuf. 1474 */ 1475 static void 1476 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1477 { 1478 int64_t amount; 1479 1480 ASSERT_RACCT_ENABLED(); 1481 1482 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1483 1484 switch (rule->rr_subject_type) { 1485 case RCTL_SUBJECT_TYPE_PROCESS: 1486 if (rule->rr_subject.rs_proc == NULL) 1487 sbuf_printf(sb, ":"); 1488 else 1489 sbuf_printf(sb, "%d:", 1490 rule->rr_subject.rs_proc->p_pid); 1491 break; 1492 case RCTL_SUBJECT_TYPE_USER: 1493 if (rule->rr_subject.rs_uip == NULL) 1494 sbuf_printf(sb, ":"); 1495 else 1496 sbuf_printf(sb, "%d:", 1497 rule->rr_subject.rs_uip->ui_uid); 1498 break; 1499 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1500 if (rule->rr_subject.rs_loginclass == NULL) 1501 sbuf_printf(sb, ":"); 1502 else 1503 sbuf_printf(sb, "%s:", 1504 rule->rr_subject.rs_loginclass->lc_name); 1505 break; 1506 case RCTL_SUBJECT_TYPE_JAIL: 1507 if (rule->rr_subject.rs_prison_racct == NULL) 1508 sbuf_printf(sb, ":"); 1509 else 1510 sbuf_printf(sb, "%s:", 1511 rule->rr_subject.rs_prison_racct->prr_name); 1512 break; 1513 default: 1514 panic("rctl_rule_to_sbuf: unknown subject type %d", 1515 rule->rr_subject_type); 1516 } 1517 1518 amount = rule->rr_amount; 1519 if (amount != RCTL_AMOUNT_UNDEFINED && 1520 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1521 amount /= 1000000; 1522 1523 sbuf_printf(sb, "%s:%s=%jd", 1524 rctl_resource_name(rule->rr_resource), 1525 rctl_action_name(rule->rr_action), 1526 amount); 1527 1528 if (rule->rr_per != rule->rr_subject_type) 1529 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1530 } 1531 1532 /* 1533 * Routine used by RCTL syscalls to read in input string. 1534 */ 1535 static int 1536 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1537 { 1538 char *str; 1539 int error; 1540 1541 ASSERT_RACCT_ENABLED(); 1542 1543 if (inbuflen <= 0) 1544 return (EINVAL); 1545 if (inbuflen > RCTL_MAX_INBUFSIZE) 1546 return (E2BIG); 1547 1548 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1549 error = copyinstr(inbufp, str, inbuflen, NULL); 1550 if (error != 0) { 1551 free(str, M_RCTL); 1552 return (error); 1553 } 1554 1555 *inputstr = str; 1556 1557 return (0); 1558 } 1559 1560 /* 1561 * Routine used by RCTL syscalls to write out output string. 1562 */ 1563 static int 1564 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1565 { 1566 int error; 1567 1568 ASSERT_RACCT_ENABLED(); 1569 1570 if (outputsbuf == NULL) 1571 return (0); 1572 1573 sbuf_finish(outputsbuf); 1574 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1575 sbuf_delete(outputsbuf); 1576 return (ERANGE); 1577 } 1578 error = copyout(sbuf_data(outputsbuf), outbufp, 1579 sbuf_len(outputsbuf) + 1); 1580 sbuf_delete(outputsbuf); 1581 return (error); 1582 } 1583 1584 static struct sbuf * 1585 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1586 { 1587 struct sbuf *sb; 1588 int64_t amount; 1589 int i; 1590 1591 ASSERT_RACCT_ENABLED(); 1592 1593 sb = sbuf_new_auto(); 1594 for (i = 0; i <= RACCT_MAX; i++) { 1595 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1596 continue; 1597 RACCT_LOCK(); 1598 amount = racct->r_resources[i]; 1599 RACCT_UNLOCK(); 1600 if (RACCT_IS_IN_MILLIONS(i)) 1601 amount /= 1000000; 1602 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1603 } 1604 sbuf_setpos(sb, sbuf_len(sb) - 1); 1605 return (sb); 1606 } 1607 1608 int 1609 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1610 { 1611 struct rctl_rule *filter; 1612 struct sbuf *outputsbuf = NULL; 1613 struct proc *p; 1614 struct uidinfo *uip; 1615 struct loginclass *lc; 1616 struct prison_racct *prr; 1617 char *inputstr; 1618 int error; 1619 1620 if (!racct_enable) 1621 return (ENOSYS); 1622 1623 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1624 if (error != 0) 1625 return (error); 1626 1627 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1628 if (error != 0) 1629 return (error); 1630 1631 sx_slock(&allproc_lock); 1632 error = rctl_string_to_rule(inputstr, &filter); 1633 free(inputstr, M_RCTL); 1634 if (error != 0) { 1635 sx_sunlock(&allproc_lock); 1636 return (error); 1637 } 1638 1639 switch (filter->rr_subject_type) { 1640 case RCTL_SUBJECT_TYPE_PROCESS: 1641 p = filter->rr_subject.rs_proc; 1642 if (p == NULL) { 1643 error = EINVAL; 1644 goto out; 1645 } 1646 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1647 break; 1648 case RCTL_SUBJECT_TYPE_USER: 1649 uip = filter->rr_subject.rs_uip; 1650 if (uip == NULL) { 1651 error = EINVAL; 1652 goto out; 1653 } 1654 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1655 break; 1656 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1657 lc = filter->rr_subject.rs_loginclass; 1658 if (lc == NULL) { 1659 error = EINVAL; 1660 goto out; 1661 } 1662 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1663 break; 1664 case RCTL_SUBJECT_TYPE_JAIL: 1665 prr = filter->rr_subject.rs_prison_racct; 1666 if (prr == NULL) { 1667 error = EINVAL; 1668 goto out; 1669 } 1670 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1671 break; 1672 default: 1673 error = EINVAL; 1674 } 1675 out: 1676 rctl_rule_release(filter); 1677 sx_sunlock(&allproc_lock); 1678 if (error != 0) 1679 return (error); 1680 1681 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1682 1683 return (error); 1684 } 1685 1686 static void 1687 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1688 { 1689 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1690 struct rctl_rule_link *link; 1691 struct sbuf *sb = (struct sbuf *)arg3; 1692 1693 ASSERT_RACCT_ENABLED(); 1694 RACCT_LOCK_ASSERT(); 1695 1696 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1697 if (!rctl_rule_matches(link->rrl_rule, filter)) 1698 continue; 1699 rctl_rule_to_sbuf(sb, link->rrl_rule); 1700 sbuf_printf(sb, ","); 1701 } 1702 } 1703 1704 int 1705 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1706 { 1707 struct sbuf *sb; 1708 struct rctl_rule *filter; 1709 struct rctl_rule_link *link; 1710 struct proc *p; 1711 char *inputstr, *buf; 1712 size_t bufsize; 1713 int error; 1714 1715 if (!racct_enable) 1716 return (ENOSYS); 1717 1718 error = priv_check(td, PRIV_RCTL_GET_RULES); 1719 if (error != 0) 1720 return (error); 1721 1722 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1723 if (error != 0) 1724 return (error); 1725 1726 sx_slock(&allproc_lock); 1727 error = rctl_string_to_rule(inputstr, &filter); 1728 free(inputstr, M_RCTL); 1729 if (error != 0) { 1730 sx_sunlock(&allproc_lock); 1731 return (error); 1732 } 1733 1734 bufsize = uap->outbuflen; 1735 if (bufsize > rctl_maxbufsize) { 1736 sx_sunlock(&allproc_lock); 1737 return (E2BIG); 1738 } 1739 1740 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1741 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1742 KASSERT(sb != NULL, ("sbuf_new failed")); 1743 1744 FOREACH_PROC_IN_SYSTEM(p) { 1745 RACCT_LOCK(); 1746 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1747 /* 1748 * Non-process rules will be added to the buffer later. 1749 * Adding them here would result in duplicated output. 1750 */ 1751 if (link->rrl_rule->rr_subject_type != 1752 RCTL_SUBJECT_TYPE_PROCESS) 1753 continue; 1754 if (!rctl_rule_matches(link->rrl_rule, filter)) 1755 continue; 1756 rctl_rule_to_sbuf(sb, link->rrl_rule); 1757 sbuf_printf(sb, ","); 1758 } 1759 RACCT_UNLOCK(); 1760 } 1761 1762 loginclass_racct_foreach(rctl_get_rules_callback, 1763 rctl_rule_pre_callback, rctl_rule_post_callback, 1764 filter, sb); 1765 ui_racct_foreach(rctl_get_rules_callback, 1766 rctl_rule_pre_callback, rctl_rule_post_callback, 1767 filter, sb); 1768 prison_racct_foreach(rctl_get_rules_callback, 1769 rctl_rule_pre_callback, rctl_rule_post_callback, 1770 filter, sb); 1771 if (sbuf_error(sb) == ENOMEM) { 1772 error = ERANGE; 1773 goto out; 1774 } 1775 1776 /* 1777 * Remove trailing ",". 1778 */ 1779 if (sbuf_len(sb) > 0) 1780 sbuf_setpos(sb, sbuf_len(sb) - 1); 1781 1782 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1783 out: 1784 rctl_rule_release(filter); 1785 sx_sunlock(&allproc_lock); 1786 free(buf, M_RCTL); 1787 return (error); 1788 } 1789 1790 int 1791 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1792 { 1793 struct sbuf *sb; 1794 struct rctl_rule *filter; 1795 struct rctl_rule_link *link; 1796 char *inputstr, *buf; 1797 size_t bufsize; 1798 int error; 1799 1800 if (!racct_enable) 1801 return (ENOSYS); 1802 1803 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1804 if (error != 0) 1805 return (error); 1806 1807 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1808 if (error != 0) 1809 return (error); 1810 1811 sx_slock(&allproc_lock); 1812 error = rctl_string_to_rule(inputstr, &filter); 1813 free(inputstr, M_RCTL); 1814 if (error != 0) { 1815 sx_sunlock(&allproc_lock); 1816 return (error); 1817 } 1818 1819 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1820 rctl_rule_release(filter); 1821 sx_sunlock(&allproc_lock); 1822 return (EINVAL); 1823 } 1824 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1825 rctl_rule_release(filter); 1826 sx_sunlock(&allproc_lock); 1827 return (EOPNOTSUPP); 1828 } 1829 if (filter->rr_subject.rs_proc == NULL) { 1830 rctl_rule_release(filter); 1831 sx_sunlock(&allproc_lock); 1832 return (EINVAL); 1833 } 1834 1835 bufsize = uap->outbuflen; 1836 if (bufsize > rctl_maxbufsize) { 1837 rctl_rule_release(filter); 1838 sx_sunlock(&allproc_lock); 1839 return (E2BIG); 1840 } 1841 1842 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1843 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1844 KASSERT(sb != NULL, ("sbuf_new failed")); 1845 1846 RACCT_LOCK(); 1847 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1848 rrl_next) { 1849 rctl_rule_to_sbuf(sb, link->rrl_rule); 1850 sbuf_printf(sb, ","); 1851 } 1852 RACCT_UNLOCK(); 1853 if (sbuf_error(sb) == ENOMEM) { 1854 error = ERANGE; 1855 sbuf_delete(sb); 1856 goto out; 1857 } 1858 1859 /* 1860 * Remove trailing ",". 1861 */ 1862 if (sbuf_len(sb) > 0) 1863 sbuf_setpos(sb, sbuf_len(sb) - 1); 1864 1865 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1866 out: 1867 rctl_rule_release(filter); 1868 sx_sunlock(&allproc_lock); 1869 free(buf, M_RCTL); 1870 return (error); 1871 } 1872 1873 int 1874 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1875 { 1876 struct rctl_rule *rule; 1877 char *inputstr; 1878 int error; 1879 1880 if (!racct_enable) 1881 return (ENOSYS); 1882 1883 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1884 if (error != 0) 1885 return (error); 1886 1887 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1888 if (error != 0) 1889 return (error); 1890 1891 sx_slock(&allproc_lock); 1892 error = rctl_string_to_rule(inputstr, &rule); 1893 free(inputstr, M_RCTL); 1894 if (error != 0) { 1895 sx_sunlock(&allproc_lock); 1896 return (error); 1897 } 1898 /* 1899 * The 'per' part of a rule is optional. 1900 */ 1901 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1902 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1903 rule->rr_per = rule->rr_subject_type; 1904 1905 if (!rctl_rule_fully_specified(rule)) { 1906 error = EINVAL; 1907 goto out; 1908 } 1909 1910 error = rctl_rule_add(rule); 1911 1912 out: 1913 rctl_rule_release(rule); 1914 sx_sunlock(&allproc_lock); 1915 return (error); 1916 } 1917 1918 int 1919 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1920 { 1921 struct rctl_rule *filter; 1922 char *inputstr; 1923 int error; 1924 1925 if (!racct_enable) 1926 return (ENOSYS); 1927 1928 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1929 if (error != 0) 1930 return (error); 1931 1932 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1933 if (error != 0) 1934 return (error); 1935 1936 sx_slock(&allproc_lock); 1937 error = rctl_string_to_rule(inputstr, &filter); 1938 free(inputstr, M_RCTL); 1939 if (error != 0) { 1940 sx_sunlock(&allproc_lock); 1941 return (error); 1942 } 1943 1944 error = rctl_rule_remove(filter); 1945 rctl_rule_release(filter); 1946 sx_sunlock(&allproc_lock); 1947 1948 return (error); 1949 } 1950 1951 /* 1952 * Update RCTL rule list after credential change. 1953 */ 1954 void 1955 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1956 { 1957 LIST_HEAD(, rctl_rule_link) newrules; 1958 struct rctl_rule_link *link, *newlink; 1959 struct uidinfo *newuip; 1960 struct loginclass *newlc; 1961 struct prison_racct *newprr; 1962 int rulecnt, i; 1963 1964 if (!racct_enable) 1965 return; 1966 1967 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1968 1969 newuip = newcred->cr_ruidinfo; 1970 newlc = newcred->cr_loginclass; 1971 newprr = newcred->cr_prison->pr_prison_racct; 1972 1973 LIST_INIT(&newrules); 1974 1975 again: 1976 /* 1977 * First, count the rules that apply to the process with new 1978 * credentials. 1979 */ 1980 rulecnt = 0; 1981 RACCT_LOCK(); 1982 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1983 if (link->rrl_rule->rr_subject_type == 1984 RCTL_SUBJECT_TYPE_PROCESS) 1985 rulecnt++; 1986 } 1987 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1988 rulecnt++; 1989 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1990 rulecnt++; 1991 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1992 rulecnt++; 1993 RACCT_UNLOCK(); 1994 1995 /* 1996 * Create temporary list. We've dropped the rctl_lock in order 1997 * to use M_WAITOK. 1998 */ 1999 for (i = 0; i < rulecnt; i++) { 2000 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2001 newlink->rrl_rule = NULL; 2002 newlink->rrl_exceeded = 0; 2003 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2004 } 2005 2006 newlink = LIST_FIRST(&newrules); 2007 2008 /* 2009 * Assign rules to the newly allocated list entries. 2010 */ 2011 RACCT_LOCK(); 2012 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2013 if (link->rrl_rule->rr_subject_type == 2014 RCTL_SUBJECT_TYPE_PROCESS) { 2015 if (newlink == NULL) 2016 goto goaround; 2017 rctl_rule_acquire(link->rrl_rule); 2018 newlink->rrl_rule = link->rrl_rule; 2019 newlink->rrl_exceeded = link->rrl_exceeded; 2020 newlink = LIST_NEXT(newlink, rrl_next); 2021 rulecnt--; 2022 } 2023 } 2024 2025 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2026 if (newlink == NULL) 2027 goto goaround; 2028 rctl_rule_acquire(link->rrl_rule); 2029 newlink->rrl_rule = link->rrl_rule; 2030 newlink->rrl_exceeded = link->rrl_exceeded; 2031 newlink = LIST_NEXT(newlink, rrl_next); 2032 rulecnt--; 2033 } 2034 2035 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2036 if (newlink == NULL) 2037 goto goaround; 2038 rctl_rule_acquire(link->rrl_rule); 2039 newlink->rrl_rule = link->rrl_rule; 2040 newlink->rrl_exceeded = link->rrl_exceeded; 2041 newlink = LIST_NEXT(newlink, rrl_next); 2042 rulecnt--; 2043 } 2044 2045 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2046 if (newlink == NULL) 2047 goto goaround; 2048 rctl_rule_acquire(link->rrl_rule); 2049 newlink->rrl_rule = link->rrl_rule; 2050 newlink->rrl_exceeded = link->rrl_exceeded; 2051 newlink = LIST_NEXT(newlink, rrl_next); 2052 rulecnt--; 2053 } 2054 2055 if (rulecnt == 0) { 2056 /* 2057 * Free the old rule list. 2058 */ 2059 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2060 link = LIST_FIRST(&p->p_racct->r_rule_links); 2061 LIST_REMOVE(link, rrl_next); 2062 rctl_rule_release(link->rrl_rule); 2063 uma_zfree(rctl_rule_link_zone, link); 2064 } 2065 2066 /* 2067 * Replace lists and we're done. 2068 * 2069 * XXX: Is there any way to switch list heads instead 2070 * of iterating here? 2071 */ 2072 while (!LIST_EMPTY(&newrules)) { 2073 newlink = LIST_FIRST(&newrules); 2074 LIST_REMOVE(newlink, rrl_next); 2075 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2076 newlink, rrl_next); 2077 } 2078 2079 RACCT_UNLOCK(); 2080 2081 return; 2082 } 2083 2084 goaround: 2085 RACCT_UNLOCK(); 2086 2087 /* 2088 * Rule list changed while we were not holding the rctl_lock. 2089 * Free the new list and try again. 2090 */ 2091 while (!LIST_EMPTY(&newrules)) { 2092 newlink = LIST_FIRST(&newrules); 2093 LIST_REMOVE(newlink, rrl_next); 2094 if (newlink->rrl_rule != NULL) 2095 rctl_rule_release(newlink->rrl_rule); 2096 uma_zfree(rctl_rule_link_zone, newlink); 2097 } 2098 2099 goto again; 2100 } 2101 2102 /* 2103 * Assign RCTL rules to the newly created process. 2104 */ 2105 int 2106 rctl_proc_fork(struct proc *parent, struct proc *child) 2107 { 2108 struct rctl_rule *rule; 2109 struct rctl_rule_link *link; 2110 int error; 2111 2112 ASSERT_RACCT_ENABLED(); 2113 RACCT_LOCK_ASSERT(); 2114 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2115 2116 LIST_INIT(&child->p_racct->r_rule_links); 2117 2118 /* 2119 * Go through limits applicable to the parent and assign them 2120 * to the child. Rules with 'process' subject have to be duplicated 2121 * in order to make their rr_subject point to the new process. 2122 */ 2123 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2124 if (link->rrl_rule->rr_subject_type == 2125 RCTL_SUBJECT_TYPE_PROCESS) { 2126 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2127 if (rule == NULL) 2128 goto fail; 2129 KASSERT(rule->rr_subject.rs_proc == parent, 2130 ("rule->rr_subject.rs_proc != parent")); 2131 rule->rr_subject.rs_proc = child; 2132 error = rctl_racct_add_rule_locked(child->p_racct, 2133 rule); 2134 rctl_rule_release(rule); 2135 if (error != 0) 2136 goto fail; 2137 } else { 2138 error = rctl_racct_add_rule_locked(child->p_racct, 2139 link->rrl_rule); 2140 if (error != 0) 2141 goto fail; 2142 } 2143 } 2144 2145 return (0); 2146 2147 fail: 2148 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2149 link = LIST_FIRST(&child->p_racct->r_rule_links); 2150 LIST_REMOVE(link, rrl_next); 2151 rctl_rule_release(link->rrl_rule); 2152 uma_zfree(rctl_rule_link_zone, link); 2153 } 2154 2155 return (EAGAIN); 2156 } 2157 2158 /* 2159 * Release rules attached to the racct. 2160 */ 2161 void 2162 rctl_racct_release(struct racct *racct) 2163 { 2164 struct rctl_rule_link *link; 2165 2166 ASSERT_RACCT_ENABLED(); 2167 RACCT_LOCK_ASSERT(); 2168 2169 while (!LIST_EMPTY(&racct->r_rule_links)) { 2170 link = LIST_FIRST(&racct->r_rule_links); 2171 LIST_REMOVE(link, rrl_next); 2172 rctl_rule_release(link->rrl_rule); 2173 uma_zfree(rctl_rule_link_zone, link); 2174 } 2175 } 2176 2177 static void 2178 rctl_init(void) 2179 { 2180 2181 if (!racct_enable) 2182 return; 2183 2184 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2185 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2186 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2187 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2188 UMA_ALIGN_PTR, 0); 2189 2190 /* 2191 * Set default values, making sure not to overwrite the ones 2192 * fetched from tunables. Most of those could be set at the 2193 * declaration, except for the rctl_throttle_max - we cannot 2194 * set it there due to hz not being compile time constant. 2195 */ 2196 if (rctl_throttle_min < 1) 2197 rctl_throttle_min = 1; 2198 if (rctl_throttle_max < rctl_throttle_min) 2199 rctl_throttle_max = 2 * hz; 2200 if (rctl_throttle_pct < 0) 2201 rctl_throttle_pct = 100; 2202 if (rctl_throttle_pct2 < 0) 2203 rctl_throttle_pct2 = 100; 2204 } 2205 2206 #else /* !RCTL */ 2207 2208 int 2209 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2210 { 2211 2212 return (ENOSYS); 2213 } 2214 2215 int 2216 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2217 { 2218 2219 return (ENOSYS); 2220 } 2221 2222 int 2223 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2224 { 2225 2226 return (ENOSYS); 2227 } 2228 2229 int 2230 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2231 { 2232 2233 return (ENOSYS); 2234 } 2235 2236 int 2237 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2238 { 2239 2240 return (ENOSYS); 2241 } 2242 2243 #endif /* !RCTL */ 2244